Merge branch 'main' into deploy-airflow

author: Nate Sesti <sestinj@gmail.com> 2023-06-11 23:38:17 -0700
committer: Nate Sesti <sestinj@gmail.com> 2023-06-11 23:38:17 -0700
commit: e61fffea48256030c01ca02b5a33d8f3125c733d (patch)
tree: 88870aed8b4936d12506cc4eec0f9954ed3e1831
parent: bb44ad69a91be1d678baa04acb07777b8cd325ed (diff)
parent: 695051efc31165f70c9e776158cff53066968795 (diff)
download: sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.tar.gz
sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.tar.bz2
sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.zip
28 files changed, 556 insertions, 109 deletions
diff --git a/continuedev/src/continuedev/core/abstract_sdk.py b/continuedev/src/continuedev/core/abstract_sdk.py
index 1c800875..417971cd 100644
--- a/continuedev/src/continuedev/core/abstract_sdk.py
+++ b/continuedev/src/continuedev/core/abstract_sdk.py
@@ -1,10 +1,10 @@
-from abc import ABC, abstractmethod
+from abc import ABC, abstractmethod, abstractproperty
 from typing import Coroutine, List, Union
 
 from .config import ContinueConfig
 from ..models.filesystem_edit import FileSystemEdit
 from .observation import Observation
-from .main import History, Step
+from .main import ChatMessage, History, Step, ChatMessageRole
 
 
 """
@@ -83,3 +83,11 @@ class AbstractContinueSDK(ABC):
     @abstractmethod
     def set_loading_message(self, message: str):
         pass
+
+    @abstractmethod
+    def add_chat_context(self, content: str, role: ChatMessageRole = "assistent"):
+        pass
+
+    @abstractproperty
+    def chat_context(self) -> List[ChatMessage]:
+        pass
diff --git a/continuedev/src/continuedev/core/autopilot.py b/continuedev/src/continuedev/core/autopilot.py
index 5a6bd2e7..c979d53a 100644
--- a/continuedev/src/continuedev/core/autopilot.py
+++ b/continuedev/src/continuedev/core/autopilot.py
@@ -1,3 +1,4 @@
+from functools import cached_property
 import traceback
 import time
 from typing import Any, Callable, Coroutine, Dict, List
@@ -26,13 +27,15 @@ class Autopilot(ContinueBaseModel):
     _main_user_input_queue: List[str] = []
 
     _user_input_queue = AsyncSubscriptionQueue()
+    _retry_queue = AsyncSubscriptionQueue()
 
-    @property
+    @cached_property
     def continue_sdk(self) -> ContinueSDK:
         return ContinueSDK(self)
 
     class Config:
         arbitrary_types_allowed = True
+        keep_untouched = (cached_property,)
 
     def get_full_state(self) -> FullState:
         return FullState(history=self.history, active=self._active, user_input_queue=self._main_user_input_queue)
@@ -83,9 +86,7 @@ class Autopilot(ContinueBaseModel):
     _step_depth: int = 0
 
     async def retry_at_index(self, index: int):
-        step = self.history.timeline[index].step.copy()
-        await self.update_subscribers()
-        await self._run_singular_step(step)
+        self._retry_queue.post(str(index), None)
 
     async def _run_singular_step(self, step: "Step", is_future_step: bool = False) -> Coroutine[Observation, None, None]:
         capture_event(
@@ -109,50 +110,62 @@ class Autopilot(ContinueBaseModel):
         # Try to run step and handle errors
         self._step_depth += 1
 
+        caught_error = False
         try:
             observation = await step(self.continue_sdk)
-        except ContinueCustomException as e:
+        except Exception as e:
+            caught_error = True
+
+            is_continue_custom_exception = issubclass(
+                e.__class__, ContinueCustomException)
+
+            error_string = e.message if is_continue_custom_exception else '\n\n'.join(
+                traceback.format_tb(e.__traceback__)) + f"\n\n{e.__repr__()}"
+            error_title = e.title if is_continue_custom_exception else e.__repr__()
+
             # Attach an InternalErrorObservation to the step and unhide it.
-            error_string = e.message
-            print(
-                f"\n{error_string}\n{e}")
+            print(f"Error while running step: \n{error_string}\n{error_title}")
 
             observation = InternalErrorObservation(
-                error=error_string, title=e.title)
+                error=error_string, title=error_title)
 
             # Reveal this step, but hide all of the following steps (its substeps)
+            step_was_hidden = step.hide
+
             step.hide = False
             i = self.history.get_current_index()
             while self.history.timeline[i].step.name != step.name:
                 self.history.timeline[i].step.hide = True
                 i -= 1
 
-            if e.with_step is not None:
-                await self._run_singular_step(e.with_step)
+            # i is now the index of the step that we want to show/rerun
+            self.history.timeline[i].observation = observation
 
-        except Exception as e:
-            # Attach an InternalErrorObservation to the step and unhide it.
-            error_string = '\n\n'.join(
-                traceback.format_tb(e.__traceback__)) + f"\n\n{e.__repr__()}"
-            print(
-                f"Error while running step: \n{error_string}\n{e}")
+            await self.update_subscribers()
 
-            observation = InternalErrorObservation(
-                error=error_string, title=e.__repr__())
+            # ContinueCustomException can optionally specify a step to run on the error
+            if is_continue_custom_exception and e.with_step is not None:
+                await self._run_singular_step(e.with_step)
 
-            # Reveal this step, but hide all of the following steps (its substeps)
-            step.hide = False
-            i = self.history.get_current_index()
-            while self.history.timeline[i].step.name != step.name:
-                self.history.timeline[i].step.hide = True
-                i -= 1
+            # Wait for a retry signal and then resume the step
+            self._active = False
+            await self._retry_queue.get(str(i))
+            self._active = True
+            # You might consider a "ignore and continue" button
+            # want it to have same step depth, so have to decrement
+            self._step_depth -= 1
+            copy_step = step.copy()
+            copy_step.hide = step_was_hidden
+            observation = await self._run_singular_step(copy_step)
+            self._step_depth += 1
 
         self._step_depth -= 1
 
-        # Add observation to history
-        self.history.get_last_at_depth(
-            self._step_depth, include_current=True).observation = observation
-        await self.update_subscribers()
+        # Add observation to history, unless already attached error observation
+        if not caught_error:
+            self.history.get_last_at_depth(
+                self._step_depth, include_current=True).observation = observation
+            await self.update_subscribers()
 
         # Update its description
         if step.description is None:
@@ -189,8 +202,7 @@ class Autopilot(ContinueBaseModel):
         self._active = False
 
         # Doing this so active can make it to the frontend after steps are done. But want better state syncing tools
-        for callback in self._on_update_callbacks:
-            await callback(None)
+        await self.update_subscribers()
 
     async def run_from_observation(self, observation: Observation):
         next_step = self.policy.next(self.history)
diff --git a/continuedev/src/continuedev/core/main.py b/continuedev/src/continuedev/core/main.py
index 37d80de3..3053e5a1 100644
--- a/continuedev/src/continuedev/core/main.py
+++ b/continuedev/src/continuedev/core/main.py
@@ -1,10 +1,17 @@
-from typing import Callable, Coroutine, Dict, Generator, List, Tuple, Union
+from textwrap import dedent
+from typing import Callable, Coroutine, Dict, Generator, List, Literal, Tuple, Union
 
 from ..models.main import ContinueBaseModel
 from pydantic import validator
-from ..libs.llm import LLM
 from .observation import Observation
 
+ChatMessageRole = Literal["assistant", "user", "system"]
+
+
+class ChatMessage(ContinueBaseModel):
+    role: ChatMessageRole
+    content: str
+
 
 class HistoryNode(ContinueBaseModel):
     """A point in history, a list of which make up History"""
@@ -12,12 +19,24 @@ class HistoryNode(ContinueBaseModel):
     observation: Union[Observation, None]
     depth: int
 
+    def to_chat_messages(self) -> List[ChatMessage]:
+        if self.step.description is None:
+            return self.step.chat_context
+        return self.step.chat_context + [ChatMessage(role="assistant", content=self.step.description)]
+
 
 class History(ContinueBaseModel):
     """A history of steps taken and their results"""
     timeline: List[HistoryNode]
     current_index: int
 
+    def to_chat_history(self) -> List[ChatMessage]:
+        msgs = []
+        for node in self.timeline:
+            if not node.step.hide:
+                msgs += node.to_chat_messages()
+        return msgs
+
     def add_node(self, node: HistoryNode):
         self.timeline.insert(self.current_index + 1, node)
         self.current_index += 1
@@ -113,6 +132,7 @@ class Step(ContinueBaseModel):
     description: Union[str, None] = None
 
     system_message: Union[str, None] = None
+    chat_context: List[ChatMessage] = []
 
     class Config:
         copy_on_model_validation = False
diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py
index d87a3582..a4c8d60f 100644
--- a/continuedev/src/continuedev/core/policy.py
+++ b/continuedev/src/continuedev/core/policy.py
@@ -4,12 +4,14 @@ from ..steps.chroma import AnswerQuestionChroma, EditFileChroma, CreateCodebaseI
 from ..steps.steps_on_startup import StepsOnStartupStep
 from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe
 from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe
+from ..recipes.AddTransformRecipe.main import AddTransformRecipe
 from .main import Step, Validator, History, Policy
 from .observation import Observation, TracebackObservation, UserInputObservation
-from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, MessageStep, EmptyStep, SetupContinueWorkspaceStep
+from ..steps.main import EditHighlightedCodeStep, SolveTracebackStep, RunCodeStep, FasterEditHighlightedCodeStep, StarCoderEditHighlightedCodeStep, EmptyStep, SetupContinueWorkspaceStep
 from ..recipes.WritePytestsRecipe.main import WritePytestsRecipe
 from ..recipes.ContinueRecipeRecipe.main import ContinueStepStep
 from ..steps.comment_code import CommentCodeStep
+from ..steps.core.core import MessageStep
 
 
 class DemoPolicy(Policy):
@@ -33,6 +35,8 @@ class DemoPolicy(Policy):
                 return CreatePipelineRecipe()
             elif "/airflow" in observation.user_input.lower():
                 return DeployPipelineAirflowRecipe()
+            elif "/transform" in observation.user_input.lower():
+                return AddTransformRecipe()
             elif "/comment" in observation.user_input.lower():
                 return CommentCodeStep()
             elif "/ask" in observation.user_input:
diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py
index 51faadf2..59bfc0f2 100644
--- a/continuedev/src/continuedev/core/sdk.py
+++ b/continuedev/src/continuedev/core/sdk.py
@@ -1,4 +1,6 @@
 from abc import ABC, abstractmethod
+import asyncio
+from functools import cached_property
 from typing import Coroutine, Union
 import os
 
@@ -12,7 +14,7 @@ from ..libs.llm.hf_inference_api import HuggingFaceInferenceAPI
 from ..libs.llm.openai import OpenAI
 from .observation import Observation
 from ..server.ide_protocol import AbstractIdeProtocolServer
-from .main import Context, ContinueCustomException, History, Step
+from .main import Context, ContinueCustomException, History, Step, ChatMessage, ChatMessageRole
 from ..steps.core.core import *
 
 
@@ -20,30 +22,30 @@ class Autopilot:
     pass
 
 
-class ContinueSDKSteps:
-    def __init__(self, sdk: "ContinueSDK"):
-        self.sdk = sdk
-
-
 class Models:
     def __init__(self, sdk: "ContinueSDK"):
         self.sdk = sdk
 
-    async def starcoder(self):
-        api_key = await self.sdk.get_user_secret(
-            'HUGGING_FACE_TOKEN', 'Please add your Hugging Face token to the .env file')
-        return HuggingFaceInferenceAPI(api_key=api_key)
+    @cached_property
+    def starcoder(self):
+        async def load_starcoder():
+            api_key = await self.sdk.get_user_secret(
+                'HUGGING_FACE_TOKEN', 'Please add your Hugging Face token to the .env file')
+            return HuggingFaceInferenceAPI(api_key=api_key)
+        return asyncio.get_event_loop().run_until_complete(load_starcoder())
 
-    async def gpt35(self):
-        api_key = await self.sdk.get_user_secret(
-            'OPENAI_API_KEY', 'Please add your OpenAI API key to the .env file')
-        return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo")
+    @cached_property
+    def gpt35(self):
+        async def load_gpt35():
+            api_key = await self.sdk.get_user_secret(
+                'OPENAI_API_KEY', 'Please add your OpenAI API key to the .env file')
+            return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo")
+        return asyncio.get_event_loop().run_until_complete(load_gpt35())
 
 
 class ContinueSDK(AbstractContinueSDK):
     """The SDK provided as parameters to a step"""
     ide: AbstractIdeProtocolServer
-    steps: ContinueSDKSteps
     models: Models
     context: Context
     __autopilot: Autopilot
@@ -51,7 +53,6 @@ class ContinueSDK(AbstractContinueSDK):
     def __init__(self, autopilot: Autopilot):
         self.ide = autopilot.ide
         self.__autopilot = autopilot
-        self.steps = ContinueSDKSteps(self)
         self.models = Models(self)
         self.context = autopilot.context
 
@@ -76,9 +77,9 @@ class ContinueSDK(AbstractContinueSDK):
     async def wait_for_user_confirmation(self, prompt: str):
         return await self.run_step(WaitForUserConfirmationStep(prompt=prompt))
 
-    async def run(self, commands: Union[List[str], str], cwd: str = None, name: str = None, description: str = None) -> Coroutine[str, None, None]:
+    async def run(self, commands: Union[List[str], str], cwd: str = None, name: str = None, description: str = None, handle_error: bool = True) -> Coroutine[str, None, None]:
         commands = commands if isinstance(commands, List) else [commands]
-        return (await self.run_step(ShellCommandsStep(cmds=commands, cwd=cwd, description=description, **({'name': name} if name else {})))).text
+        return (await self.run_step(ShellCommandsStep(cmds=commands, cwd=cwd, description=description, handle_error=handle_error, **({'name': name} if name else {})))).text
 
     async def edit_file(self, filename: str, prompt: str, name: str = None, description: str = None, range: Range = None):
         filepath = await self._ensure_absolute_path(filename)
@@ -86,7 +87,7 @@ class ContinueSDK(AbstractContinueSDK):
         await self.ide.setFileOpen(filepath)
         contents = await self.ide.readFile(filepath)
         await self.run_step(Gpt35EditCodeStep(
-            range_in_files=[RangeInFile(filepath=filename, range=range) if range is not None else RangeInFile.from_entire_file(
+            range_in_files=[RangeInFile(filepath=filepath, range=range) if range is not None else RangeInFile.from_entire_file(
                 filepath, contents)],
             user_input=prompt,
             description=description,
@@ -135,3 +136,11 @@ class ContinueSDK(AbstractContinueSDK):
 
     def raise_exception(self, message: str, title: str, with_step: Union[Step, None] = None):
         raise ContinueCustomException(message, title, with_step)
+
+    def add_chat_context(self, content: str, role: ChatMessageRole = "assistent"):
+        self.history.timeline[self.history.current_index].step.chat_context.append(
+            ChatMessage(content=content, role=role))
+
+    @property
+    def chat_context(self) -> List[ChatMessage]:
+        return self.history.to_chat_history()
diff --git a/continuedev/src/continuedev/libs/llm/__init__.py b/continuedev/src/continuedev/libs/llm/__init__.py
index 6bae2222..24fd34be 100644
--- a/continuedev/src/continuedev/libs/llm/__init__.py
+++ b/continuedev/src/continuedev/libs/llm/__init__.py
@@ -1,4 +1,6 @@
-from typing import Union
+from typing import List, Union
+
+from ...core.main import ChatMessage
 from ...models.main import AbstractModel
 from pydantic import BaseModel
 
@@ -6,7 +8,7 @@ from pydantic import BaseModel
 class LLM(BaseModel):
     system_message: Union[str, None] = None
 
-    def complete(self, prompt: str, **kwargs):
+    def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs):
         """Return the completion of the text with the given temperature."""
         raise
 
diff --git a/continuedev/src/continuedev/libs/llm/hf_inference_api.py b/continuedev/src/continuedev/libs/llm/hf_inference_api.py
index 734da160..1586c620 100644
--- a/continuedev/src/continuedev/libs/llm/hf_inference_api.py
+++ b/continuedev/src/continuedev/libs/llm/hf_inference_api.py
@@ -1,3 +1,5 @@
+from typing import List
+from ...core.main import ChatMessage
 from ..llm import LLM
 import requests
 
@@ -9,7 +11,7 @@ class HuggingFaceInferenceAPI(LLM):
     api_key: str
     model: str = "bigcode/starcoder"
 
-    def complete(self, prompt: str, **kwargs):
+    def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs):
         """Return the completion of the text with the given temperature."""
         API_URL = f"https://api-inference.huggingface.co/models/{self.model}"
         headers = {
diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py
index 10801465..6a537afd 100644
--- a/continuedev/src/continuedev/libs/llm/openai.py
+++ b/continuedev/src/continuedev/libs/llm/openai.py
@@ -1,6 +1,7 @@
 import asyncio
 import time
 from typing import Any, Dict, Generator, List, Union
+from ...core.main import ChatMessage
 import openai
 import aiohttp
 from ..llm import LLM
@@ -62,7 +63,7 @@ class OpenAI(LLM):
             for chunk in generator:
                 yield chunk.choices[0].text
 
-    def complete(self, prompt: str, **kwargs) -> str:
+    def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> str:
         t1 = time.time()
 
         self.completion_count += 1
@@ -70,15 +71,17 @@ class OpenAI(LLM):
                 "frequency_penalty": 0, "presence_penalty": 0, "stream": False} | kwargs
 
         if args["model"] == "gpt-3.5-turbo":
-            messages = [{
-                "role": "user",
-                "content": prompt
-            }]
+            messages = []
             if self.system_message:
-                messages.insert(0, {
+                messages.append({
                     "role": "system",
                     "content": self.system_message
                 })
+            messages += [msg.dict() for msg in with_history]
+            messages.append({
+                "role": "user",
+                "content": prompt
+            })
             resp = openai.ChatCompletion.create(
                 messages=messages,
                 **args,
diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md
new file mode 100644
index 00000000..d735e0cd
--- /dev/null
+++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/README.md
@@ -0,0 +1,8 @@
+# AddTransformRecipe
+
+Uses the Chess.com API example to show how to add map and filter Python transforms to a dlt pipeline.
+
+Background
+- https://dlthub.com/docs/general-usage/resource#filter-transform-and-pivot-data
+- https://dlthub.com/docs/customizations/customizing-pipelines/renaming_columns
+- https://dlthub.com/docs/customizations/customizing-pipelines/pseudonymizing_columns
+\ No newline at end of file
diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md b/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md
new file mode 100644
index 00000000..658b285f
--- /dev/null
+++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/dlt_transform_docs.md
@@ -0,0 +1,135 @@
+# Customize resources
+## Filter, transform and pivot data
+
+You can attach any number of transformations that are evaluated on item per item basis to your resource. The available transformation types:
+- map - transform the data item (resource.add_map)
+- filter - filter the data item (resource.add_filter)
+- yield map - a map that returns iterator (so single row may generate many rows - resource.add_yield_map)
+
+Example: We have a resource that loads a list of users from an api endpoint. We want to customize it so:
+- we remove users with user_id == 'me'
+- we anonymize user data
+Here's our resource:
+```python
+import dlt
+
+@dlt.resource(write_disposition='replace')
+def users():
+    ...
+    users = requests.get(...)
+    ...
+    yield users
+```
+
+Here's our script that defines transformations and loads the data.
+```python
+from pipedrive import users
+
+def anonymize_user(user_data):
+    user_data['user_id'] = hash_str(user_data['user_id'])
+    user_data['user_email'] = hash_str(user_data['user_email'])
+    return user_data
+
+# add the filter and anonymize function to users resource and enumerate
+for user in users().add_filter(lambda user: user['user_id'] != 'me').add_map(anonymize_user):
+print(user)
+```
+                
+Here is a more complex example of a filter transformation:
+                
+    # Renaming columns
+    ## Renaming columns by replacing the special characters
+
+    In the example below, we create a dummy source with special characters in the name. We then write a function that we intend to apply to the resource to modify its output (i.e. replacing the German umlaut): replace_umlauts_in_dict_keys.
+    ```python
+    import dlt
+
+    # create a dummy source with umlauts (special characters) in key names (um)
+    @dlt.source
+    def dummy_source(prefix: str = None):
+        @dlt.resource
+        def dummy_data():
+            for _ in range(100):
+                yield {f'Objekt_{_}':{'Größe':_, 'Äquivalenzprüfung':True}}
+        return dummy_data(),
+
+    def replace_umlauts_in_dict_keys(d):
+        # Replaces umlauts in dictionary keys with standard characters.
+        umlaut_map =  {'ä': 'ae', 'ö': 'oe', 'ü': 'ue', 'ß': 'ss', 'Ä': 'Ae', 'Ö': 'Oe', 'Ü': 'Ue'}
+        result = {}
+        for k, v in d.items():
+            new_key = ''.join(umlaut_map.get(c, c) for c in k)
+            if isinstance(v, dict):
+                result[new_key] = replace_umlauts_in_dict_keys(v)
+            else:
+                result[new_key] = v
+        return result
+
+    # We can add the map function to the resource
+
+    # 1. Create an instance of the source so you can edit it.
+    data_source = dummy_source()
+
+    # 2. Modify this source instance's resource
+    data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys)
+
+    # 3. Inspect your result
+    for row in data_source:
+        print(row)
+
+    # {'Objekt_0': {'Groesse': 0, 'Aequivalenzpruefung': True}}
+    # ...
+    ```
+                
+Here is a more complex example of a map transformation:
+                
+# Pseudonymizing columns
+## Pseudonymizing (or anonymizing) columns by replacing the special characters
+Pseudonymization is a deterministic way to hide personally identifiable info (PII), enabling us to consistently achieve the same mapping. If instead you wish to anonymize, you can delete the data, or replace it with a constant. In the example below, we create a dummy source with a PII column called 'name', which we replace with deterministic hashes (i.e. replacing the German umlaut).
+
+```python
+import dlt
+import hashlib
+
+@dlt.source
+def dummy_source(prefix: str = None):
+    @dlt.resource
+    def dummy_data():
+        for _ in range(3):
+            yield {'id':_, 'name': f'Jane Washington {_}'}
+    return dummy_data(),
+
+def pseudonymize_name(doc):
+    Pseudonmyisation is a deterministic type of PII-obscuring
+    Its role is to allow identifying users by their hash, without revealing the underlying info.
+
+    # add a constant salt to generate
+    salt = 'WI@N57%zZrmk#88c'
+    salted_string = doc['name'] + salt
+    sh = hashlib.sha256()
+    sh.update(salted_string.encode())
+    hashed_string = sh.digest().hex()
+    doc['name'] = hashed_string
+    return doc
+
+    # run it as is
+    for row in dummy_source().dummy_data().add_map(pseudonymize_name):
+        print(row)
+
+    #{'id': 0, 'name': '96259edb2b28b48bebce8278c550e99fbdc4a3fac8189e6b90f183ecff01c442'}
+    #{'id': 1, 'name': '92d3972b625cbd21f28782fb5c89552ce1aa09281892a2ab32aee8feeb3544a1'}
+    #{'id': 2, 'name': '443679926a7cff506a3b5d5d094dc7734861352b9e0791af5d39db5a7356d11a'}
+
+    # Or create an instance of the data source, modify the resource and run the source.
+
+    # 1. Create an instance of the source so you can edit it.
+    data_source = dummy_source()
+    # 2. Modify this source instance's resource
+    data_source = data_source.dummy_data().add_map(replace_umlauts_in_dict_keys)
+    # 3. Inspect your result
+    for row in data_source:
+        print(row)
+
+    pipeline = dlt.pipeline(pipeline_name='example', destination='bigquery', dataset_name='normalized_data')
+    load_info = pipeline.run(data_source)
+```
+\ No newline at end of file
diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py
new file mode 100644
index 00000000..e9a998e3
--- /dev/null
+++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/main.py
@@ -0,0 +1,27 @@
+from textwrap import dedent
+
+from ...core.main import Step
+from ...core.sdk import ContinueSDK
+from ...steps.core.core import WaitForUserInputStep
+from ...steps.core.core import MessageStep
+from .steps import SetUpChessPipelineStep, AddTransformStep
+
+
+class AddTransformRecipe(Step):
+    hide: bool = True
+
+    async def run(self, sdk: ContinueSDK):
+        text_observation = await sdk.run_step(
+            MessageStep(message=dedent("""\
+                This recipe will walk you through the process of adding a transform to a dlt pipeline that uses the chess.com API source. With the help of Continue, you will:
+                - Set up a dlt pipeline for the chess.com API
+                - Add a filter or map transform to the pipeline
+                - Run the pipeline and view the transformed data in a Streamlit app"""), name="Add transformation to a dlt pipeline") >>
+            SetUpChessPipelineStep() >>
+            WaitForUserInputStep(
+                prompt="How do you want to transform the Chess.com API data before loading it? For example, you could use the `python-chess` library to decode the moves or filter out certain games")
+        )
+        await sdk.run_step(
+            AddTransformStep(
+                transform_description=text_observation.text)
+        )
diff --git a/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py
new file mode 100644
index 00000000..7bb0fc23
--- /dev/null
+++ b/continuedev/src/continuedev/recipes/AddTransformRecipe/steps.py
@@ -0,0 +1,89 @@
+import os
+from textwrap import dedent
+
+from ...models.main import Range
+from ...models.filesystem import RangeInFile
+from ...steps.core.core import MessageStep
+from ...core.sdk import Models
+from ...core.observation import DictObservation
+from ...models.filesystem_edit import AddFile
+from ...core.main import Step
+from ...core.sdk import ContinueSDK
+
+AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)"
+
+
+class SetUpChessPipelineStep(Step):
+    hide: bool = True
+    name: str = "Setup Chess.com API dlt Pipeline"
+
+    async def describe(self, models: Models):
+        return "This step will create a new dlt pipeline that loads data from the chess.com API."
+
+    async def run(self, sdk: ContinueSDK):
+
+        # running commands to get started when creating a new dlt pipeline
+        await sdk.run([
+            'python3 -m venv env',
+            'source env/bin/activate',
+            'pip install dlt',
+            'dlt --non-interactive init chess duckdb',
+            'pip install -r requirements.txt',
+            'pip install pandas streamlit'  # Needed for the pipeline show step later
+        ], name="Set up Python environment", description=dedent(f"""\
+            Running the following commands:
+            - `python3 -m venv env`: Create a Python virtual environment
+            - `source env/bin/activate`: Activate the virtual environment
+            - `pip install dlt`: Install dlt
+            - `dlt init chess duckdb`: Create a new dlt pipeline called "chess" that loads data into a local DuckDB instance
+            - `pip install -r requirements.txt`: Install the Python dependencies for the pipeline"""))
+
+
+class AddTransformStep(Step):
+    hide: bool = True
+
+    # e.g. "Use the `python-chess` library to decode the moves in the game data"
+    transform_description: str
+
+    async def run(self, sdk: ContinueSDK):
+        source_name = 'chess'
+        filename = f'{source_name}_pipeline.py'
+        abs_filepath = os.path.join(sdk.ide.workspace_directory, filename)
+
+        await sdk.run_step(MessageStep(message=dedent("""\
+                This step will customize your resource function with a transform of your choice:
+                - Add a filter or map transformation depending on your request
+                - Load the data into a local DuckDB instance
+                - Open up a Streamlit app for you to view the data"""), name="Write transformation function"))
+
+        # Open the file and highlight the function to be edited
+        await sdk.ide.setFileOpen(abs_filepath)
+        await sdk.ide.highlightCode(range_in_file=RangeInFile(
+            filepath=abs_filepath,
+            range=Range.from_shorthand(47, 0, 51, 0)
+        ))
+
+        with open(os.path.join(os.path.dirname(__file__), 'dlt_transform_docs.md')) as f:
+            dlt_transform_docs = f.read()
+
+        prompt = dedent(f"""\
+            Task: Write a transform function using the description below and then use `add_map` or `add_filter` from the `dlt` library to attach it a resource.
+
+            Description: {self.transform_description}
+
+            Here are some docs pages that will help you better understand how to use `dlt`.
+                          
+            {dlt_transform_docs}""")
+
+        # edit the pipeline to add a tranform function and attach it to a resource
+        await sdk.edit_file(
+            filename=filename,
+            prompt=prompt,
+            name=f"Writing transform function {AI_ASSISTED_STRING}"
+        )
+
+        # run the pipeline and load the data
+        await sdk.run(f'python3 {filename}', name="Run the pipeline", description=f"Running `python3 {filename}` to load the data into a local DuckDB instance")
+
+        # run a streamlit app to show the data
+        await sdk.run(f'dlt pipeline {source_name}_pipeline show', name="Show data in a Streamlit app", description=f"Running `dlt pipeline {source_name} show` to show the data in a Streamlit app, where you can view and play with the data.")
diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
index 428ac9cc..818168ba 100644
--- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
+++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/main.py
@@ -3,8 +3,8 @@ from textwrap import dedent
 from ...core.main import Step
 from ...core.sdk import ContinueSDK
 from ...steps.core.core import WaitForUserInputStep
-from ...steps.main import MessageStep
-from .steps import SetupPipelineStep, ValidatePipelineStep
+from ...steps.core.core import MessageStep
+from .steps import SetupPipelineStep, ValidatePipelineStep, RunQueryStep
 
 
 class CreatePipelineRecipe(Step):
@@ -26,5 +26,6 @@ class CreatePipelineRecipe(Step):
         )
         await sdk.run_step(
             SetupPipelineStep(api_description=text_observation.text) >>
-            ValidatePipelineStep()
+            ValidatePipelineStep() >>
+            RunQueryStep()
         )
diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
index 511abd1f..e59cc51c 100644
--- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
+++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
@@ -5,7 +5,7 @@ import time
 
 from ...models.main import Range
 from ...models.filesystem import RangeInFile
-from ...steps.main import MessageStep
+from ...steps.core.core import MessageStep
 from ...core.sdk import Models
 from ...core.observation import DictObservation, InternalErrorObservation
 from ...models.filesystem_edit import AddFile, FileEdit
@@ -30,7 +30,7 @@ class SetupPipelineStep(Step):
     async def run(self, sdk: ContinueSDK):
         sdk.context.set("api_description", self.api_description)
 
-        source_name = (await sdk.models.gpt35()).complete(
+        source_name = sdk.models.gpt35.complete(
             f"Write a snake_case name for the data source described by {self.api_description}: ").strip()
         filename = f'{source_name}.py'
 
@@ -51,7 +51,7 @@ class SetupPipelineStep(Step):
 
         # editing the resource function to call the requested API
         resource_function_range = Range.from_shorthand(15, 0, 29, 0)
-        await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=resource_function_range), "#00ff0022")
+        await sdk.ide.highlightCode(RangeInFile(filepath=os.path.join(await sdk.ide.getWorkspaceDirectory(), filename), range=resource_function_range))
 
         # sdk.set_loading_message("Writing code to call the API...")
         await sdk.edit_file(
@@ -86,13 +86,13 @@ class ValidatePipelineStep(Step):
         #         """)))
 
         # test that the API call works
-        output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API")
+        output = await sdk.run(f'python3 {filename}', name="Test the pipeline", description=f"Running `python3 {filename}` to test loading data from the API", handle_error=False)
 
         # If it fails, return the error
-        if "Traceback" in output:
+        if "Traceback" in output or "SyntaxError" in output:
             output = "Traceback" + output.split("Traceback")[-1]
             file_content = await sdk.ide.readFile(os.path.join(workspace_dir, filename))
-            suggestion = (await sdk.models.gpt35()).complete(dedent(f"""\
+            suggestion = sdk.models.gpt35.complete(dedent(f"""\
                 ```python
                 {file_content}
                 ```
@@ -104,7 +104,7 @@ class ValidatePipelineStep(Step):
 
                 This is a brief summary of the error followed by a suggestion on how it can be fixed by editing the resource function:"""))
 
-            api_documentation_url = (await sdk.models.gpt35()).complete(dedent(f"""\
+            api_documentation_url = sdk.models.gpt35.complete(dedent(f"""\
                 The API I am trying to call is the '{sdk.context.get('api_description')}'. I tried calling it in the @resource function like this:
                 ```python       
                 {file_content}
@@ -134,15 +134,16 @@ class ValidatePipelineStep(Step):
         # load the data into the DuckDB instance
         await sdk.run(f'python3 {filename}', name="Load data into DuckDB", description=f"Running python3 {filename} to load data into DuckDB")
 
-        table_name = f"{source_name}.{source_name}_resource"
         tables_query_code = dedent(f'''\
             import duckdb
 
             # connect to DuckDB instance
             conn = duckdb.connect(database="{source_name}.duckdb")
 
+            conn.execute("SET search_path = '{source_name}_data';")
+
             # get table names
-            rows = conn.execute("SELECT * FROM {table_name};").fetchall()
+            rows = conn.execute("SELECT * FROM _dlt_loads;").fetchall()
 
             # print table names
             for row in rows:
@@ -150,4 +151,27 @@ class ValidatePipelineStep(Step):
 
         query_filename = os.path.join(workspace_dir, "query.py")
         await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code), name="Add query.py file", description="Adding a file called `query.py` to the workspace that will run a test query on the DuckDB instance")
-        await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected")
+
+
+class RunQueryStep(Step):
+    hide: bool = True
+
+    async def run(self, sdk: ContinueSDK):
+        output = await sdk.run('env/bin/python3 query.py', name="Run test query", description="Running `env/bin/python3 query.py` to test that the data was loaded into DuckDB as expected", handle_error=False)
+
+        if "Traceback" in output or "SyntaxError" in output:
+            suggestion = sdk.models.gpt35.complete(dedent(f"""\
+                ```python
+                {await sdk.ide.readFile(os.path.join(sdk.ide.workspace_directory, "query.py"))}
+                ```
+                This above code is a query that runs on the DuckDB instance. While attempting to run the query, the following error occurred:
+
+                ```ascii
+                {output}
+                ```
+
+                This is a brief summary of the error followed by a suggestion on how it can be fixed:"""))
+
+            sdk.raise_exception(
+                title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion)
+            )
diff --git a/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py b/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
index 82876a08..5994aa89 100644
--- a/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
+++ b/continuedev/src/continuedev/recipes/WritePytestsRecipe/main.py
@@ -38,7 +38,7 @@ class WritePytestsRecipe(Step):
             "{self.instructions}"
 
             Here is a complete set of pytest unit tests:""")
-        tests = (await sdk.models.gpt35()).complete(prompt)
+        tests = sdk.models.gpt35.complete(prompt)
 
         await sdk.apply_filesystem_edit(AddFile(filepath=path, content=tests))
 
diff --git a/continuedev/src/continuedev/server/ide.py b/continuedev/src/continuedev/server/ide.py
index 007eb2b4..f4ea1071 100644
--- a/continuedev/src/continuedev/server/ide.py
+++ b/continuedev/src/continuedev/server/ide.py
@@ -1,5 +1,6 @@
 # This is a separate server from server/main.py
 import asyncio
+from functools import cached_property
 import json
 import os
 from typing import Any, Dict, List, Type, TypeVar, Union
@@ -137,7 +138,7 @@ class IdeProtocolServer(AbstractIdeProtocolServer):
             "sessionId": session_id
         })
 
-    async def highlightCode(self, range_in_file: RangeInFile, color: str):
+    async def highlightCode(self, range_in_file: RangeInFile, color: str = "#00ff0022"):
         await self._send_json("highlightCode", {
             "rangeInFile": range_in_file.dict(),
             "color": color
@@ -199,6 +200,10 @@ class IdeProtocolServer(AbstractIdeProtocolServer):
         resp = await self._send_and_receive_json({}, WorkspaceDirectoryResponse, "workspaceDirectory")
         return resp.workspaceDirectory
 
+    @cached_property
+    def workspace_directory(self) -> str:
+        return asyncio.run(self.getWorkspaceDirectory())
+
     async def getHighlightedCode(self) -> List[RangeInFile]:
         resp = await self._send_and_receive_json({}, HighlightedCodeResponse, "highlightedCode")
         return resp.highlightedCode
diff --git a/continuedev/src/continuedev/server/ide_protocol.py b/continuedev/src/continuedev/server/ide_protocol.py
index 4622d6ff..a937ad75 100644
--- a/continuedev/src/continuedev/server/ide_protocol.py
+++ b/continuedev/src/continuedev/server/ide_protocol.py
@@ -1,5 +1,5 @@
 from typing import Any, List
-from abc import ABC, abstractmethod
+from abc import ABC, abstractmethod, abstractproperty
 
 from ..models.main import Traceback
 from ..models.filesystem_edit import FileEdit, FileSystemEdit, EditDiff
@@ -90,3 +90,7 @@ class AbstractIdeProtocolServer(ABC):
     @abstractmethod
     async def runCommand(self, command: str) -> str:
         """Run a command"""
+
+    @abstractproperty
+    def workspace_directory(self) -> str:
+        """Get the workspace directory"""
diff --git a/continuedev/src/continuedev/steps/chroma.py b/continuedev/src/continuedev/steps/chroma.py
index 7bb9389e..058455b2 100644
--- a/continuedev/src/continuedev/steps/chroma.py
+++ b/continuedev/src/continuedev/steps/chroma.py
@@ -56,7 +56,7 @@ class AnswerQuestionChroma(Step):
 
             Here is the answer:""")
 
-        answer = (await sdk.models.gpt35()).complete(prompt)
+        answer = sdk.models.gpt35.complete(prompt)
         # Make paths relative to the workspace directory
         answer = answer.replace(await sdk.ide.getWorkspaceDirectory(), "")
 
diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py
index 413bc195..40e992e7 100644
--- a/continuedev/src/continuedev/steps/core/core.py
+++ b/continuedev/src/continuedev/steps/core/core.py
@@ -1,4 +1,5 @@
 # These steps are depended upon by ContinueSDK
+import os
 import subprocess
 from textwrap import dedent
 from typing import Coroutine, List, Union
@@ -23,6 +24,17 @@ class ReversibleStep(Step):
         raise NotImplementedError
 
 
+class MessageStep(Step):
+    name: str = "Message"
+    message: str
+
+    async def describe(self, models: Models) -> Coroutine[str, None, None]:
+        return self.message
+
+    async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
+        return TextObservation(text=self.message)
+
+
 class FileSystemEditStep(ReversibleStep):
     edit: FileSystemEdit
     _diff: Union[EditDiff, None] = None
@@ -38,10 +50,18 @@ class FileSystemEditStep(ReversibleStep):
         # Where and when should file saves happen?
 
 
+def output_contains_error(output: str) -> bool:
+    return "Traceback" in output or "SyntaxError" in output
+
+
+AI_ASSISTED_STRING = "(✨ AI-Assisted ✨)"
+
+
 class ShellCommandsStep(Step):
     cmds: List[str]
     cwd: Union[str, None] = None
     name: str = "Run Shell Commands"
+    handle_error: bool = True
 
     _err_text: Union[str, None] = None
 
@@ -50,13 +70,26 @@ class ShellCommandsStep(Step):
             return f"Error when running shell commands:\n```\n{self._err_text}\n```"
 
         cmds_str = "\n".join(self.cmds)
-        return (await models.gpt35()).complete(f"{cmds_str}\n\nSummarize what was done in these shell commands, using markdown bullet points:")
+        return models.gpt35.complete(f"{cmds_str}\n\nSummarize what was done in these shell commands, using markdown bullet points:")
 
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
         cwd = await sdk.ide.getWorkspaceDirectory() if self.cwd is None else self.cwd
 
         for cmd in self.cmds:
             output = await sdk.ide.runCommand(cmd)
+            if self.handle_error and output is not None and output_contains_error(output):
+                suggestion = sdk.models.gpt35.complete(dedent(f"""\
+                    While running the command `{cmd}`, the following error occurred:
+
+                    ```ascii
+                    {output}
+                    ```
+
+                    This is a brief summary of the error followed by a suggestion on how it can be fixed:"""), with_context=sdk.chat_context)
+
+                sdk.raise_exception(
+                    title="Error while running query", message=output, with_step=MessageStep(name=f"Suggestion to solve error {AI_ASSISTED_STRING}", message=suggestion)
+                )
 
         return TextObservation(text=output)
 
@@ -100,7 +133,7 @@ class Gpt35EditCodeStep(Step):
             return a + b
         <|endoftext|>
 
-        Now complete the real thing:
+        Now complete the real thing. Do NOT rewrite the prefix or suffix.
 
         <file_prefix>
         {file_prefix}
@@ -110,12 +143,13 @@ class Gpt35EditCodeStep(Step):
         {code}
         <commit_msg>
         {user_request}
-        <commit_after>""")
+        <commit_after>
+        """)
 
     _prompt_and_completion: str = ""
 
     async def describe(self, models: Models) -> Coroutine[str, None, None]:
-        return (await models.gpt35()).complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:")
+        return models.gpt35.complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:")
 
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
         rif_with_contents = []
@@ -134,7 +168,7 @@ class Gpt35EditCodeStep(Step):
             prompt = self._prompt.format(
                 code=rif.contents, user_request=self.user_input, file_prefix=segs[0], file_suffix=segs[1])
 
-            completion = str((await sdk.models.gpt35()).complete(prompt))
+            completion = str(sdk.models.gpt35.complete(prompt))
             eot_token = "<|endoftext|>"
             completion = completion.removesuffix(eot_token)
 
@@ -242,5 +276,4 @@ class WaitForUserConfirmationStep(Step):
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
         self.description = self.prompt
         resp = await sdk.wait_for_user_input()
-        self.hide = True
         return TextObservation(text=resp)
diff --git a/continuedev/src/continuedev/steps/draft/migration.py b/continuedev/src/continuedev/steps/draft/migration.py
index b386bed8..f3b36b5e 100644
--- a/continuedev/src/continuedev/steps/draft/migration.py
+++ b/continuedev/src/continuedev/steps/draft/migration.py
@@ -13,7 +13,7 @@ class MigrationStep(Step):
         recent_edits = await sdk.ide.get_recent_edits(self.edited_file)
         recent_edits_string = "\n\n".join(
             map(lambda x: x.to_string(), recent_edits))
-        description = await (await sdk.models.gpt35()).complete(f"{recent_edits_string}\n\nGenerate a short description of the migration made in the above changes:\n")
+        description = await sdk.models.gpt35.complete(f"{recent_edits_string}\n\nGenerate a short description of the migration made in the above changes:\n")
         await sdk.run([
             "cd libs",
             "poetry run alembic revision --autogenerate -m " + description,
diff --git a/continuedev/src/continuedev/steps/main.py b/continuedev/src/continuedev/steps/main.py
index 69c98bd4..24335b4f 100644
--- a/continuedev/src/continuedev/steps/main.py
+++ b/continuedev/src/continuedev/steps/main.py
@@ -144,7 +144,7 @@ class FasterEditHighlightedCodeStep(Step):
         for rif in rif_with_contents:
             rif_dict[rif.filepath] = rif.contents
 
-        completion = (await sdk.models.gpt35()).complete(prompt)
+        completion = sdk.models.gpt35.complete(prompt)
 
         # Temporarily doing this to generate description.
         self._prompt = prompt
@@ -212,7 +212,7 @@ class StarCoderEditHighlightedCodeStep(Step):
     _prompt_and_completion: str = ""
 
     async def describe(self, models: Models) -> Coroutine[str, None, None]:
-        return (await models.gpt35()).complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:")
+        return models.gpt35.complete(f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points:")
 
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
         range_in_files = await sdk.ide.getHighlightedCode()
@@ -239,7 +239,7 @@ class StarCoderEditHighlightedCodeStep(Step):
         for rif in rif_with_contents:
             prompt = self._prompt.format(
                 code=rif.contents, user_request=self.user_input)
-            completion = str((await sdk.models.starcoder()).complete(prompt))
+            completion = str(sdk.models.starcoder.complete(prompt))
             eot_token = "<|endoftext|>"
             if completion.endswith(eot_token):
                 completion = completion[:completion.rindex(eot_token)]
@@ -317,17 +317,6 @@ class SolveTracebackStep(Step):
         return None
 
 
-class MessageStep(Step):
-    name: str = "Message"
-    message: str
-
-    async def describe(self, models: Models) -> Coroutine[str, None, None]:
-        return self.message
-
-    async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
-        return TextObservation(text=self.message)
-
-
 class EmptyStep(Step):
     hide: bool = True
 
diff --git a/continuedev/src/continuedev/steps/search_directory.py b/continuedev/src/continuedev/steps/search_directory.py
new file mode 100644
index 00000000..9f4594b9
--- /dev/null
+++ b/continuedev/src/continuedev/steps/search_directory.py
@@ -0,0 +1,68 @@
+import asyncio
+from textwrap import dedent
+from typing import List
+
+from ..models.filesystem import RangeInFile
+from ..models.main import Range
+from ..core.main import Step
+from ..core.sdk import ContinueSDK
+import os
+import re
+
+# Already have some code for this somewhere
+IGNORE_DIRS = ["env", "venv", ".venv"]
+IGNORE_FILES = [".env"]
+
+
+def find_all_matches_in_dir(pattern: str, dirpath: str) -> List[RangeInFile]:
+    range_in_files = []
+    for root, dirs, files in os.walk(dirpath):
+        dirname = os.path.basename(root)
+        if dirname.startswith(".") or dirname in IGNORE_DIRS:
+            continue
+        for file in files:
+            if file in IGNORE_FILES:
+                continue
+            with open(os.path.join(root, file), "r") as f:
+                # Find the index of all occurences of the pattern in the file. Use re.
+                file_content = f.read()
+                results = re.finditer(pattern, file_content)
+                range_in_files += [
+                    RangeInFile(filepath=os.path.join(root, file), range=Range.from_indices(
+                        file_content, result.start(), result.end()))
+                    for result in results
+                ]
+
+    return range_in_files
+
+
+class WriteRegexPatternStep(Step):
+    user_request: str
+
+    async def run(self, sdk: ContinueSDK):
+        # Ask the user for a regex pattern
+        pattern = sdk.models.gpt35.complete(dedent(f"""\
+            This is the user request:
+
+            {self.user_request}
+
+            Please write either a regex pattern or just a string that be used with python's re module to find all matches requested by the user. It will be used as `re.findall(<PATTERN_YOU_WILL_WRITE>, file_content)`. Your output should be only the regex or string, nothing else:"""))
+
+        return pattern
+
+
+class EditAllMatchesStep(Step):
+    pattern: str
+    user_request: str
+    directory: str | None = None
+
+    async def run(self, sdk: ContinueSDK):
+        # Search all files for a given string
+        range_in_files = find_all_matches_in_dir(self.pattern, self.directory or await sdk.ide.getWorkspaceDirectory())
+
+        tasks = [asyncio.create_task(sdk.edit_file(
+            range=range_in_file.range,
+            filename=range_in_file.filepath,
+            prompt=self.user_request
+        )) for range_in_file in range_in_files]
+        await asyncio.gather(*tasks)
diff --git a/continuedev/src/continuedev/steps/steps_on_startup.py b/continuedev/src/continuedev/steps/steps_on_startup.py
index b1376e8a..fbdbbcff 100644
--- a/continuedev/src/continuedev/steps/steps_on_startup.py
+++ b/continuedev/src/continuedev/steps/steps_on_startup.py
@@ -3,11 +3,13 @@ from .main import UserInputStep
 from ..recipes.CreatePipelineRecipe.main import CreatePipelineRecipe
 from ..recipes.DeployPipelineAirflowRecipe.main import DeployPipelineAirflowRecipe
 
+from ..recipes.AddTransformRecipe.main import AddTransformRecipe
 
 step_name_to_step_class = {
     "UserInputStep": UserInputStep,
     "CreatePipelineRecipe": CreatePipelineRecipe,
-    "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe
+    "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe,
+    "AddTransformRecipe": AddTransformRecipe
 }
 
 
diff --git a/docs/docs/walkthroughs/create-a-recipe.md b/docs/docs/walkthroughs/create-a-recipe.md
index 60bfe9a8..3b80df8a 100644
--- a/docs/docs/walkthroughs/create-a-recipe.md
+++ b/docs/docs/walkthroughs/create-a-recipe.md
@@ -17,8 +17,6 @@ continue/continuedev/src/continuedev/recipes
 
 ## 1. Create a step
 
-
-
 ### a. Start by creating a subclass of Step
 
 You should first consider what will be the parameters of your recipe. These are defined as attributes in the step, as with `input_file_path: str` below
@@ -33,7 +31,7 @@ If you'd like to override the default description of your steps, which is just t
 
 - Return a static string
 - Store state in a class attribute (prepend with a double underscore, which signifies (through Pydantic) that this is not a parameter for the Step, just internal state) during the run method, and then grab this in the describe method.
-- Use state in conjunction with the `models` parameter of the describe method to autogenerate a description with a language model. For example, if you'd used an attribute called `__code_written` to store a string representing some code that was written, you could implement describe as `return (await models.gpt35()).complete(f"{self.\_\_code_written}\n\nSummarize the changes made in the above code.")`.
+- Use state in conjunction with the `models` parameter of the describe method to autogenerate a description with a language model. For example, if you'd used an attribute called `__code_written` to store a string representing some code that was written, you could implement describe as `return models.gpt35.complete(f"{self.\_\_code_written}\n\nSummarize the changes made in the above code.")`.
 
 ## 2. Compose steps together into a complete recipe
 
diff --git a/extension/package-lock.json b/extension/package-lock.json
index 0b0e063b..061b6342 100644
--- a/extension/package-lock.json
+++ b/extension/package-lock.json
@@ -1,12 +1,12 @@
 {
   "name": "continue",
-  "version": "0.0.23",
+  "version": "0.0.25",
   "lockfileVersion": 2,
   "requires": true,
   "packages": {
     "": {
       "name": "continue",
-      "version": "0.0.23",
+      "version": "0.0.25",
       "license": "Apache-2.0",
       "dependencies": {
         "@electron/rebuild": "^3.2.10",
diff --git a/extension/package.json b/extension/package.json
index c979a435..66ade224 100644
--- a/extension/package.json
+++ b/extension/package.json
@@ -14,7 +14,7 @@
   "displayName": "Continue",
   "pricing": "Free",
   "description": "Refine code 10x faster",
-  "version": "0.0.23",
+  "version": "0.0.25",
   "publisher": "Continue",
   "engines": {
     "vscode": "^1.74.0"
diff --git a/extension/scripts/continuedev-0.1.1-py3-none-any.whl b/extension/scripts/continuedev-0.1.1-py3-none-any.whl
index e9d03c6e..4c89d23e 100644
--- a/extension/scripts/continuedev-0.1.1-py3-none-any.whl
+++ b/extension/scripts/continuedev-0.1.1-py3-none-any.whl
diff --git a/netlify.toml b/netlify.toml
new file mode 100644
index 00000000..9a888e2a
--- /dev/null
+++ b/netlify.toml
@@ -0,0 +1,4 @@
+[[redirects]]
+  from = "/docs/*"
+  to = "/:splat"
+  status = 200
author	Nate Sesti <sestinj@gmail.com>	2023-06-11 23:38:17 -0700
committer	Nate Sesti <sestinj@gmail.com>	2023-06-11 23:38:17 -0700
commit	e61fffea48256030c01ca02b5a33d8f3125c733d (patch)
tree	88870aed8b4936d12506cc4eec0f9954ed3e1831
parent	bb44ad69a91be1d678baa04acb07777b8cd325ed (diff)
parent	695051efc31165f70c9e776158cff53066968795 (diff)
download	sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.tar.gz sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.tar.bz2 sncontinue-e61fffea48256030c01ca02b5a33d8f3125c733d.zip