From 6467759012a139e76dcf022a681355f7d310a30d Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Mon, 7 Aug 2023 15:38:47 -0700 Subject: feat: :construction: first work on URLContextProvider --- .../continuedev/plugins/context_providers/url.py | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 continuedev/src/continuedev/plugins/context_providers/url.py diff --git a/continuedev/src/continuedev/plugins/context_providers/url.py b/continuedev/src/continuedev/plugins/context_providers/url.py new file mode 100644 index 00000000..d6a74d95 --- /dev/null +++ b/continuedev/src/continuedev/plugins/context_providers/url.py @@ -0,0 +1,51 @@ +from typing import List + +from .util import remove_meilisearch_disallowed_chars +from ...core.main import ContextItem, ContextItemDescription, ContextItemId +from ...core.context import ContextProvider + + +class URLContextProvider(ContextProvider): + title = "url" + + URL_CONTEXT_ITEM_ID = "url" + + @property + def BASE_CONTEXT_ITEM(self): + return ContextItem( + content="", + description=ContextItemDescription( + name="URL", + description="Reference the contents of a webpage (e.g. '@url https://www.w3schools.com/python/python_ref_functions.asp')", + id=ContextItemId( + provider_title=self.title, + item_id=self.URL_CONTEXT_ITEM_ID + ) + ) + ) + + def _get_url_text_contents(self, url: str): + import requests + + response = requests.get(url) + return response.text + + async def provide_context_items(self, workspace_dir: str) -> List[ContextItem]: + self.workspace_dir = workspace_dir + return [self.BASE_CONTEXT_ITEM] + + async def get_item(self, id: ContextItemId, query: str) -> ContextItem: + if not id.item_id == self.URL_CONTEXT_ITEM_ID: + raise Exception("Invalid item id") + + query = query.lstrip("url ") + url = query.strip() + content = await self._get_url_text_contents(url) + + ctx_item = self.BASE_CONTEXT_ITEM.copy() + ctx_item.content = content + ctx_item.description.name = url.replace( + "https://", "").replace("http://", "") + ctx_item.description.id.item_id = remove_meilisearch_disallowed_chars( + query) + return ctx_item -- cgit v1.2.3-70-g09d2 From 99db0da9d68c64d0b5adcab21e07c2db438c2404 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Mon, 7 Aug 2023 16:33:22 -0700 Subject: feat: :sparkles: diff context provider --- continuedev/poetry.lock | 44 +++++++++++++++++++++- continuedev/pyproject.toml | 1 + .../continuedev/libs/constants/default_config.py | 5 ++- .../continuedev/plugins/context_providers/diff.py | 43 +++++++++++++++++++++ .../plugins/context_providers/search.py | 2 +- .../continuedev/plugins/context_providers/url.py | 7 ++-- 6 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 continuedev/src/continuedev/plugins/context_providers/diff.py diff --git a/continuedev/poetry.lock b/continuedev/poetry.lock index 7c347488..3754d121 100644 --- a/continuedev/poetry.lock +++ b/continuedev/poetry.lock @@ -212,6 +212,24 @@ files = [ {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, ] +[[package]] +name = "beautifulsoup4" +version = "4.12.2" +description = "Screen-scraping library" +optional = false +python-versions = ">=3.6.0" +files = [ + {file = "beautifulsoup4-4.12.2-py3-none-any.whl", hash = "sha256:bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a"}, + {file = "beautifulsoup4-4.12.2.tar.gz", hash = "sha256:492bbc69dca35d12daac71c4db1bfff0c876c00ef4a2ffacce226d4638eb72da"}, +] + +[package.dependencies] +soupsieve = ">1.2" + +[package.extras] +html5lib = ["html5lib"] +lxml = ["lxml"] + [[package]] name = "boltons" version = "23.0.0" @@ -223,6 +241,19 @@ files = [ {file = "boltons-23.0.0.tar.gz", hash = "sha256:8c50a71829525835ca3c849c7ed2511610c972b4dddfcd41a4a5447222beb4b0"}, ] +[[package]] +name = "bs4" +version = "0.0.1" +description = "Dummy package for Beautiful Soup" +optional = false +python-versions = "*" +files = [ + {file = "bs4-0.0.1.tar.gz", hash = "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"}, +] + +[package.dependencies] +beautifulsoup4 = "*" + [[package]] name = "camel-converter" version = "3.0.2" @@ -1307,6 +1338,17 @@ files = [ {file = "socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac"}, ] +[[package]] +name = "soupsieve" +version = "2.4.1" +description = "A modern CSS selector implementation for Beautiful Soup." +optional = false +python-versions = ">=3.7" +files = [ + {file = "soupsieve-2.4.1-py3-none-any.whl", hash = "sha256:1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8"}, + {file = "soupsieve-2.4.1.tar.gz", hash = "sha256:89d12b2d5dfcd2c9e8c22326da9d9aa9cb3dfab0a83a024f05704076ee8d35ea"}, +] + [[package]] name = "starlette" version = "0.26.1" @@ -1777,4 +1819,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "aacc9ed670fbe067caccc88bf35f1a36541ed3b9446f64fdff7f36fe9b7b558a" +content-hash = "19ea56d05ab21d19f2fee6f837f7c8915ca1c597c392a539f43010dd0a4f6a78" diff --git a/continuedev/pyproject.toml b/continuedev/pyproject.toml index f640c749..a3c058bd 100644 --- a/continuedev/pyproject.toml +++ b/continuedev/pyproject.toml @@ -29,6 +29,7 @@ pygithub = "^1.59.0" meilisearch-python-async = "^1.4.8" socksio = "^1.0.0" ripgrepy = "^2.0.0" +bs4 = "^0.0.1" [tool.poetry.scripts] typegen = "src.continuedev.models.generate_json_schema:main" diff --git a/continuedev/src/continuedev/libs/constants/default_config.py b/continuedev/src/continuedev/libs/constants/default_config.py index f3b19f89..dbd2c8eb 100644 --- a/continuedev/src/continuedev/libs/constants/default_config.py +++ b/continuedev/src/continuedev/libs/constants/default_config.py @@ -22,7 +22,7 @@ from continuedev.src.continuedev.plugins.steps.feedback import FeedbackStep from continuedev.src.continuedev.plugins.steps.comment_code import CommentCodeStep from continuedev.src.continuedev.plugins.steps.main import EditHighlightedCodeStep from continuedev.src.continuedev.plugins.context_providers.search import SearchContextProvider - +from continuedev.src.continuedev.plugins.context_providers.diff import DiffContextProvider class CommitMessageStep(Step): \"\"\" @@ -123,7 +123,8 @@ config = ContinueConfig( # GoogleContextProvider( # serper_api_key="" # ) - SearchContextProvider() + SearchContextProvider(), + DiffContextProvider() ], # Policies hold the main logic that decides which Step to take next diff --git a/continuedev/src/continuedev/plugins/context_providers/diff.py b/continuedev/src/continuedev/plugins/context_providers/diff.py new file mode 100644 index 00000000..7a53e87a --- /dev/null +++ b/continuedev/src/continuedev/plugins/context_providers/diff.py @@ -0,0 +1,43 @@ +import subprocess +from typing import List + +from .util import remove_meilisearch_disallowed_chars +from ...core.main import ContextItem, ContextItemDescription, ContextItemId +from ...core.context import ContextProvider + + +class DiffContextProvider(ContextProvider): + title = "diff" + + DIFF_CONTEXT_ITEM_ID = "diff" + + workspace_dir: str = None + + @property + def BASE_CONTEXT_ITEM(self): + return ContextItem( + content="", + description=ContextItemDescription( + name="Diff", + description="Reference the output of 'git diff' for the current workspace", + id=ContextItemId( + provider_title=self.title, + item_id=self.DIFF_CONTEXT_ITEM_ID + ) + ) + ) + + async def provide_context_items(self, workspace_dir: str) -> List[ContextItem]: + self.workspace_dir = workspace_dir + return [self.BASE_CONTEXT_ITEM] + + async def get_item(self, id: ContextItemId, query: str) -> ContextItem: + if not id.item_id == self.DIFF_CONTEXT_ITEM_ID: + raise Exception("Invalid item id") + + diff = subprocess.check_output( + ["git", "diff"], cwd=self.workspace_dir).decode("utf-8") + + ctx_item = self.BASE_CONTEXT_ITEM.copy() + ctx_item.content = diff + return ctx_item diff --git a/continuedev/src/continuedev/plugins/context_providers/search.py b/continuedev/src/continuedev/plugins/context_providers/search.py index 17f2660c..da991a78 100644 --- a/continuedev/src/continuedev/plugins/context_providers/search.py +++ b/continuedev/src/continuedev/plugins/context_providers/search.py @@ -33,7 +33,7 @@ class SearchContextProvider(ContextProvider): rg_path = f"C:\\Users\\{os.getlogin()}\\AppData\\Local\\Programs\\Microsoft VS Code\\resources\\app\\node_modules.asar.unpacked\\vscode-ripgrep\\bin\\rg.exe" elif os.name == 'posix': if 'darwin' in os.sys.platform: - rg_path = "/Applications/Visual Studio Code.app/Contents/Resources/app/node_modules.asar.unpacked/vscode-ripgrep/bin/rg" + rg_path = "/Applications/Visual Studio Code.app/Contents/Resources/app/node_modules.asar.unpacked/@vscode/ripgrep/bin/rg" else: rg_path = "/usr/share/code/resources/app/node_modules.asar.unpacked/vscode-ripgrep/bin/rg" else: diff --git a/continuedev/src/continuedev/plugins/context_providers/url.py b/continuedev/src/continuedev/plugins/context_providers/url.py index d6a74d95..9274d84a 100644 --- a/continuedev/src/continuedev/plugins/context_providers/url.py +++ b/continuedev/src/continuedev/plugins/context_providers/url.py @@ -25,13 +25,14 @@ class URLContextProvider(ContextProvider): ) def _get_url_text_contents(self, url: str): + from bs4 import BeautifulSoup import requests response = requests.get(url) - return response.text + soup = BeautifulSoup(response.text, 'html.parser') + return soup.get_text() async def provide_context_items(self, workspace_dir: str) -> List[ContextItem]: - self.workspace_dir = workspace_dir return [self.BASE_CONTEXT_ITEM] async def get_item(self, id: ContextItemId, query: str) -> ContextItem: @@ -40,7 +41,7 @@ class URLContextProvider(ContextProvider): query = query.lstrip("url ") url = query.strip() - content = await self._get_url_text_contents(url) + content = self._get_url_text_contents(url) ctx_item = self.BASE_CONTEXT_ITEM.copy() ctx_item.content = content -- cgit v1.2.3-70-g09d2