Merge branch 'main' into dlt-transform

author: Nate Sesti <sestinj@gmail.com> 2023-06-09 12:11:12 -0400
committer: Nate Sesti <sestinj@gmail.com> 2023-06-09 12:11:12 -0400
commit: f4a8be10182efd328c615798fa9237f6e3c8fac9 (patch)
tree: 05111a482fe10d64c379e08fa1b1d64fb0fedf4e /continuedev
parent: 083919c3cc8e25153af7ee7d4cd2a0321ee15a61 (diff)
parent: d7fad7f55aa9fc5eee908bc1e77b7d976a506c9d (diff)
download: sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.tar.gz
sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.tar.bz2
sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.zip
12 files changed, 180 insertions, 66 deletions
diff --git a/continuedev/src/continuedev/core/autopilot.py b/continuedev/src/continuedev/core/autopilot.py
index 5a6bd2e7..b82e1fef 100644
--- a/continuedev/src/continuedev/core/autopilot.py
+++ b/continuedev/src/continuedev/core/autopilot.py
@@ -1,3 +1,4 @@
+from functools import cached_property
 import traceback
 import time
 from typing import Any, Callable, Coroutine, Dict, List
@@ -26,8 +27,9 @@ class Autopilot(ContinueBaseModel):
     _main_user_input_queue: List[str] = []
 
     _user_input_queue = AsyncSubscriptionQueue()
+    _retry_queue = AsyncSubscriptionQueue()
 
-    @property
+    @cached_property
     def continue_sdk(self) -> ContinueSDK:
         return ContinueSDK(self)
 
@@ -83,9 +85,7 @@ class Autopilot(ContinueBaseModel):
     _step_depth: int = 0
 
     async def retry_at_index(self, index: int):
-        step = self.history.timeline[index].step.copy()
-        await self.update_subscribers()
-        await self._run_singular_step(step)
+        self._retry_queue.post(str(index), None)
 
     async def _run_singular_step(self, step: "Step", is_future_step: bool = False) -> Coroutine[Observation, None, None]:
         capture_event(
@@ -109,50 +109,62 @@ class Autopilot(ContinueBaseModel):
         # Try to run step and handle errors
         self._step_depth += 1
 
+        caught_error = False
         try:
             observation = await step(self.continue_sdk)
-        except ContinueCustomException as e:
+        except Exception as e:
+            caught_error = True
+
+            is_continue_custom_exception = issubclass(
+                e.__class__, ContinueCustomException)
+
+            error_string = e.message if is_continue_custom_exception else '\n\n'.join(
+                traceback.format_tb(e.__traceback__)) + f"\n\n{e.__repr__()}"
+            error_title = e.title if is_continue_custom_exception else e.__repr__()
+
             # Attach an InternalErrorObservation to the step and unhide it.
-            error_string = e.message
-            print(
-                f"\n{error_string}\n{e}")
+            print(f"Error while running step: \n{error_string}\n{error_title}")
 
             observation = InternalErrorObservation(
-                error=error_string, title=e.title)
+                error=error_string, title=error_title)
 
             # Reveal this step, but hide all of the following steps (its substeps)
+            step_was_hidden = step.hide
+
             step.hide = False
             i = self.history.get_current_index()
             while self.history.timeline[i].step.name != step.name:
                 self.history.timeline[i].step.hide = True
                 i -= 1
 
-            if e.with_step is not None:
-                await self._run_singular_step(e.with_step)
+            # i is now the index of the step that we want to show/rerun
+            self.history.timeline[i].observation = observation
 
-        except Exception as e:
-            # Attach an InternalErrorObservation to the step and unhide it.
-            error_string = '\n\n'.join(
-                traceback.format_tb(e.__traceback__)) + f"\n\n{e.__repr__()}"
-            print(
-                f"Error while running step: \n{error_string}\n{e}")
+            await self.update_subscribers()
 
-            observation = InternalErrorObservation(
-                error=error_string, title=e.__repr__())
+            # ContinueCustomException can optionally specify a step to run on the error
+            if is_continue_custom_exception and e.with_step is not None:
+                await self._run_singular_step(e.with_step)
 
-            # Reveal this step, but hide all of the following steps (its substeps)
-            step.hide = False
-            i = self.history.get_current_index()
-            while self.history.timeline[i].step.name != step.name:
-                self.history.timeline[i].step.hide = True
-                i -= 1
+            # Wait for a retry signal and then resume the step
+            self._active = False
+            await self._retry_queue.get(str(i))
+            self._active = True
+            # You might consider a "ignore and continue" button
+            # want it to have same step depth, so have to decrement
+            self._step_depth -= 1
+            copy_step = step.copy()
+            copy_step.hide = step_was_hidden
+            observation = await self._run_singular_step(copy_step)
+            self._step_depth += 1
 
         self._step_depth -= 1
 
-        # Add observation to history
-        self.history.get_last_at_depth(
-            self._step_depth, include_current=True).observation = observation
-        await self.update_subscribers()
+        # Add observation to history, unless already attached error observation
+        if not caught_error:
+            self.history.get_last_at_depth(
+                self._step_depth, include_current=True).observation = observation
+            await self.update_subscribers()
 
         # Update its description
         if step.description is None:
@@ -189,8 +201,7 @@ class Autopilot(ContinueBaseModel):
         self._active = False
 
         # Doing this so active can make it to the frontend after steps are done. But want better state syncing tools
-        for callback in self._on_update_callbacks:
-            await callback(None)
+        await self.update_subscribers()
 
     async def run_from_observation(self, observation: Observation):
         next_step = self.policy.next(self.history)
diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py
index 51faadf2..ea90a13a 100644
--- a/continuedev/src/continuedev/core/sdk.py
+++ b/continuedev/src/continuedev/core/sdk.py
@@ -1,4 +1,6 @@
 from abc import ABC, abstractmethod
+import asyncio
+from functools import cached_property
 from typing import Coroutine, Union
 import os
 
@@ -20,30 +22,30 @@ class Autopilot:
     pass
 
 
-class ContinueSDKSteps:
-    def __init__(self, sdk: "ContinueSDK"):
-        self.sdk = sdk
-
-
 class Models:
     def __init__(self, sdk: "ContinueSDK"):
         self.sdk = sdk
 
-    async def starcoder(self):
-        api_key = await self.sdk.get_user_secret(
-            'HUGGING_FACE_TOKEN', 'Please add your Hugging Face token to the .env file')
-        return HuggingFaceInferenceAPI(api_key=api_key)
+    @cached_property
+    def starcoder(self):
+        async def load_starcoder():
+            api_key = await self.sdk.get_user_secret(
+                'HUGGING_FACE_TOKEN', 'Please add your Hugging Face token to the .env file')
+            return HuggingFaceInferenceAPI(api_key=api_key)
+        return asyncio.get_event_loop().run_until_complete(load_starcoder())
 
-    async def gpt35(self):
-        api_key = await self.sdk.get_user_secret(
-            'OPENAI_API_KEY', 'Please add your OpenAI API key to the .env file')
-        return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo")
+    @cached_property
+    def gpt35(self):
+        async def load_gpt35():
+            api_key = await self.sdk.get_user_secret(
+                'OPENAI_API_KEY', 'Please add your OpenAI API key to the .env file')
+            return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo")
+        return asyncio.get_event_loop().run_until_complete(load_gpt35())
 
 
 class ContinueSDK(AbstractContinueSDK):
     """The SDK provided as parameters to a step"""
     ide: AbstractIdeProtocolServer
-    steps: ContinueSDKSteps
     models: Models
     context: Context
     __autopilot: Autopilot
@@ -51,7 +53,6 @@ class ContinueSDK(AbstractContinueSDK):
     def __init__(self, autopilot: Autopilot):
         self.ide = autopilot.ide
         self.__autopilot = autopilot
-        self.steps = ContinueSDKSteps(self)
         self.models = Models(self)
         self.context = autopilot.context
 
@@ -86,7 +87,7 @@ class ContinueSDK(AbstractContinueSDK):
         await self.ide.setFileOpen(filepath)
         contents = await self.ide.readFile(filepath)
         await self.run_step(Gpt35EditCodeStep(
-            range_in_files=[RangeInFile(filepath=filename, range=range) if range is not None else RangeInFile.from_entire_file(
+            range_in_files=[RangeInFile(filepath=filepath, range=range) if range is not None else RangeInFile.from_entire_file(
                 filepath, contents)],
             user_input=prompt,
             description=description,
diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
index 428ac9cc..39e1ba42 100644
--- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
+++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
@@ -4,7 +4,7 @@ from ...core.main import Step
 from ...core.sdk import ContinueSDK
 from ...steps.core.core import WaitForUserInputStep
 from ...steps.main import MessageStep
-from .steps import SetupPipelineStep, ValidatePipelineStep
+from .steps import SetupPipelineStep, ValidatePipelineStep, RunQueryStep
 
 
 class CreatePipelineRecipe(Step):
@@ -26,5 +26,6 @@ class CreatePipelineRecipe(Step):
         )
         await sdk.run_step(
             SetupPipelineStep(api_description=text_observation.text) >>
-            ValidatePipelineStep()
+            ValidatePipelineStep() >>
+            RunQueryStep()
         )
diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
index 511abd1f..3b9a8c85 100644
--- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
+++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
@@ -30,7 +30,7 @@ class SetupPipelineStep(Step):
     async def run(self, sdk: ContinueSDK):
         sdk.context.set("api_description", self.api_description)
 
-        source_name = (await sdk.models.gpt35()).complete(
+        source_name = sdk.models.gpt35.complete(
             f"Write a snake_case name for the data source described by {self.api_description}: ").strip()
         filename = f'{source_name}.py'
 
@@ -89,10 +89,10 @@ class ValidatePipelineStep(Step):
         output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API")
 
         # If it fails, return the error
-        if "Traceback" in output:
+        if "Traceback" in output or "SyntaxError" in output:
             output = "Traceback" + output.split("Traceback")[-1]
             file_content = await sdk.ide.readFile(os.path.join(workspace_dir, filename))
-            suggestion = (await sdk.models.gpt35()).complete(dedent(f"""\
+            suggestion = sdk.models.gpt35.complete(dedent(f"""\
                 ```python
                 {file_content}
                 ```
@@ -104,7 +104,7 @@ class ValidatePipelineStep(Step):
 
                 This is a brief summary of the error followed by a suggestion on how it can be fixed by editing the resource function:"""))
 
-            api_documentation_url = (await sdk.models.gpt35()).complete(dedent(f"""\
+            api_documentation_url = sdk.models.gpt35.complete(dedent(f"""\
                 The API I am trying to call is the '{sdk.context.get('api_description')}'. I tried calling it in the @resource function like this:
                 ```python       
                 {file_content}
@@ -134,15 +134,16 @@ class ValidatePipelineStep(Step):
         # load the data into the DuckDB instance
         await sdk.run(f'python3 {filename}', name="Load data into DuckDB", description=f"Running python3 {filename} to load data into DuckDB")
 
-        table_name = f"{source_name}.{source_name}_resource"
         tables_query_code = dedent(f'''\
             import duckdb
 
             # connect to DuckDB instance
             conn = duckdb.connect(database="{source_name}.duckdb")
 
+            conn.execute("SET search_path = '{source_name}_data';")
+
             # get table names
-            rows = conn.execute("SELECT * FROM {table_name};").fetchall()
+            rows = conn.execute("SELECT * FROM _dlt_loads;").fetchall()
 
             # print table names
             for row in rows:
@@ -150,4 +151,27 @@ class ValidatePipelineStep(Step):
 
         query_filename = os.path.join(workspace_dir, "query.py")
         await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code), name="Add query.py file", description="Adding a file called `query.py` to the workspace that will run a test query on the DuckDB instance")
-        await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected")
+
+
+class RunQueryStep(Step):
+    hide: bool = True
+
+    async def run(self, sdk: ContinueSDK):
+        output = await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected")
+
+        if "Traceback" in output or "SyntaxError" in output:
+            suggestion = sdk.models.gpt35.complete(dedent(f"""\
+                ```python
+                {await sdk.ide.readFile(os.path.join(sdk.ide.workspace_directory, "query.py"))}
+                ```
+                This above code is a query that runs on the DuckDB instance. While attempting to run the query, the following error occurred:
+
+                ```ascii
+                {output}
+                ```
+
+                This is a brief summary of the error followed by a suggestion on how it can be fixed:"""))
+
+            sdk.raise_exception(
+                title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion)
+            )
diff --git a/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py b/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
index 82876a08..5994aa89 100644
--- a/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
+++ b/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
@@ -38,7 +38,7 @@ class WritePytestsRecipe(Step):
             "{self.instructions}"
 
             Here is a complete set of pytest unit tests:""")
-        tests = (await sdk.models.gpt35()).complete(prompt)
+        tests = sdk.models.gpt35.complete(prompt)
 
         await sdk.apply_filesystem_edit(AddFile(filepath=path, content=tests))
 
diff --git a/continuedev/src/continuedev/server/ide.py b/continuedev/src/continuedev/server/ide.py
index 007eb2b4..5826f15f 100644
--- a/continuedev/src/continuedev/server/ide.py
+++ b/continuedev/src/continuedev/server/ide.py
@@ -1,5 +1,6 @@
 # This is a separate server from server/main.py
 import asyncio
+from functools import cached_property
 import json
 import os
 from typing import Any, Dict, List, Type, TypeVar, Union
@@ -199,6 +200,10 @@ class IdeProtocolServer(AbstractIdeProtocolServer):
         resp = await self._send_and_receive_json({}, WorkspaceDirectoryResponse, "workspaceDirectory")
         return resp.workspaceDirectory
 
+    @cached_property
+    def workspace_directory(self) -> str:
+        return asyncio.run(self.getWorkspaceDirectory())
+
     async def getHighlightedCode(self) -> List[RangeInFile]:
         resp = await self._send_and_receive_json({}, HighlightedCodeResponse, "highlightedCode")
         return resp.highlightedCode
diff --git a/continuedev/src/continuedev/server/ide_protocol.py b/continuedev/src/continuedev/server/ide_protocol.py
index 4622d6ff..a937ad75 100644
--- a/continuedev/src/continuedev/server/ide_protocol.py
+++ b/continuedev/src/continuedev/server/ide_protocol.py
@@ -1,5 +1,5 @@
 from typing import Any, List
-from abc import ABC, abstractmethod
+from abc import ABC, abstractmethod, abstractproperty
 
 from ..models.main import Traceback
 from ..models.filesystem_edit import FileEdit, FileSystemEdit, EditDiff
@@ -90,3 +90,7 @@ class AbstractIdeProtocolServer(ABC):
     @abstractmethod
     async def runCommand(self, command: str) -> str:
         """Run a command"""
+
+    @abstractproperty
+    def workspace_directory(self) -> str:
+        """Get the workspace directory"""
diff --git a/continuedev/src/continuedev/steps/chroma.py b/continuedev/src/continuedev/steps/chroma.py
index 7bb9389e..058455b2 100644
--- a/continuedev/src/continuedev/steps/chroma.py
+++ b/continuedev/src/continuedev/steps/chroma.py
@@ -56,7 +56,7 @@ class AnswerQuestionChroma(Step):
 
             Here is the answer:""")
 
-        answer = (await sdk.models.gpt35()).complete(prompt)
+        answer = sdk.models.gpt35.complete(prompt)
         # Make paths relative to the workspace directory
         answer = answer.replace(await sdk.ide.getWorkspaceDirectory(), "")
 
diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py
index 413bc195..dfd765eb 100644
--- a/continuedev/src/continuedev/steps/core/core.py
+++ b/continuedev/src/continuedev/steps/core/core.py
@@ -100,7 +100,7 @@ class Gpt35EditCodeStep(Step):
             return a + b
         <|endoftext|>
 
-        Now complete the real thing:
+        Now complete the real thing. Do NOT rewrite the prefix or suffix.
 
         <file_prefix>
         {file_prefix}
@@ -110,7 +110,8 @@ class Gpt35EditCodeStep(Step):
         {code}
         <commit_msg>
         {user_request}
-        <commit_after>""")
+        <commit_after>
+        """)
 
     _prompt_and_completion: str = ""
 
@@ -134,7 +135,7 @@ class Gpt35EditCodeStep(Step):
             prompt = self._prompt.format(
                 code=rif.contents, user_request=self.user_input, file_prefix=segs[0], file_suffix=segs[1])
 
-            completion = str((await sdk.models.gpt35()).complete(prompt))
+            completion = str(sdk.models.gpt35.complete(prompt))
             eot_token = "<|endoftext|>"
             completion = completion.removesuffix(eot_token)
 
@@ -242,5 +243,4 @@ class WaitForUserConfirmationStep(Step):
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
         self.description = self.prompt
         resp = await sdk.wait_for_user_input()
-        self.hide = True
         return TextObservation(text=resp)
diff --git a/continuedev/src/continuedev/steps/draft/migration.py b/continuedev/src/continuedev/steps/draft/migration.py
index b386bed8..f3b36b5e 100644
--- a/continuedev/src/continuedev/steps/draft/migration.py
+++ b/continuedev/src/continuedev/steps/draft/migration.py
@@ -13,7 +13,7 @@ class MigrationStep(Step):
         recent_edits = await sdk.ide.get_recent_edits(self.edited_file)
         recent_edits_string = "\n\n".join(
             map(lambda x: x.to_string(), recent_edits))
-        description = await (await sdk.models.gpt35()).complete(f"{recent_edits_string}\n\nGenerate a short description of the migration made in the above changes:\n")
+        description = await sdk.models.gpt35.complete(f"{recent_edits_string}\n\nGenerate a short description of the migration made in the above changes:\n")
         await sdk.run([
             "cd libs",
             "poetry run alembic revision --autogenerate -m " + description,
diff --git a/continuedev/src/continuedev/steps/main.py b/continuedev/src/continuedev/steps/main.py
index 69c98bd4..81a1e3a9 100644
--- a/continuedev/src/continuedev/steps/main.py
+++ b/continuedev/src/continuedev/steps/main.py
@@ -144,7 +144,7 @@ class FasterEditHighlightedCodeStep(Step):
         for rif in rif_with_contents:
             rif_dict[rif.filepath] = rif.contents
 
-        completion = (await sdk.models.gpt35()).complete(prompt)
+        completion = sdk.models.gpt35.complete(prompt)
 
         # Temporarily doing this to generate description.
         self._prompt = prompt
@@ -239,7 +239,7 @@ class StarCoderEditHighlightedCodeStep(Step):
         for rif in rif_with_contents:
             prompt = self._prompt.format(
                 code=rif.contents, user_request=self.user_input)
-            completion = str((await sdk.models.starcoder()).complete(prompt))
+            completion = str(sdk.models.starcoder.complete(prompt))
             eot_token = "<|endoftext|>"
             if completion.endswith(eot_token):
                 completion = completion[:completion.rindex(eot_token)]
diff --git a/continuedev/src/continuedev/steps/search_directory.py b/continuedev/src/continuedev/steps/search_directory.py
new file mode 100644
index 00000000..9f4594b9
--- /dev/null
+++ b/continuedev/src/continuedev/steps/search_directory.py
@@ -0,0 +1,68 @@
+import asyncio
+from textwrap import dedent
+from typing import List
+
+from ..models.filesystem import RangeInFile
+from ..models.main import Range
+from ..core.main import Step
+from ..core.sdk import ContinueSDK
+import os
+import re
+
+# Already have some code for this somewhere
+IGNORE_DIRS = ["env", "venv", ".venv"]
+IGNORE_FILES = [".env"]
+
+
+def find_all_matches_in_dir(pattern: str, dirpath: str) -> List[RangeInFile]:
+    range_in_files = []
+    for root, dirs, files in os.walk(dirpath):
+        dirname = os.path.basename(root)
+        if dirname.startswith(".") or dirname in IGNORE_DIRS:
+            continue
+        for file in files:
+            if file in IGNORE_FILES:
+                continue
+            with open(os.path.join(root, file), "r") as f:
+                # Find the index of all occurences of the pattern in the file. Use re.
+                file_content = f.read()
+                results = re.finditer(pattern, file_content)
+                range_in_files += [
+                    RangeInFile(filepath=os.path.join(root, file), range=Range.from_indices(
+                        file_content, result.start(), result.end()))
+                    for result in results
+                ]
+
+    return range_in_files
+
+
+class WriteRegexPatternStep(Step):
+    user_request: str
+
+    async def run(self, sdk: ContinueSDK):
+        # Ask the user for a regex pattern
+        pattern = sdk.models.gpt35.complete(dedent(f"""\
+            This is the user request:
+
+            {self.user_request}
+
+            Please write either a regex pattern or just a string that be used with python's re module to find all matches requested by the user. It will be used as `re.findall(<PATTERN_YOU_WILL_WRITE>, file_content)`. Your output should be only the regex or string, nothing else:"""))
+
+        return pattern
+
+
+class EditAllMatchesStep(Step):
+    pattern: str
+    user_request: str
+    directory: str | None = None
+
+    async def run(self, sdk: ContinueSDK):
+        # Search all files for a given string
+        range_in_files = find_all_matches_in_dir(self.pattern, self.directory or await sdk.ide.getWorkspaceDirectory())
+
+        tasks = [asyncio.create_task(sdk.edit_file(
+            range=range_in_file.range,
+            filename=range_in_file.filepath,
+            prompt=self.user_request
+        )) for range_in_file in range_in_files]
+        await asyncio.gather(*tasks)
author	Nate Sesti <sestinj@gmail.com>	2023-06-09 12:11:12 -0400
committer	Nate Sesti <sestinj@gmail.com>	2023-06-09 12:11:12 -0400
commit	f4a8be10182efd328c615798fa9237f6e3c8fac9 (patch)
tree	05111a482fe10d64c379e08fa1b1d64fb0fedf4e /continuedev
parent	083919c3cc8e25153af7ee7d4cd2a0321ee15a61 (diff)
parent	d7fad7f55aa9fc5eee908bc1e77b7d976a506c9d (diff)
download	sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.tar.gz sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.tar.bz2 sncontinue-f4a8be10182efd328c615798fa9237f6e3c8fac9.zip