22 files changed, 920 insertions, 380 deletions
diff --git a/continuedev/poetry.lock b/continuedev/poetry.lock
index 93aaf82b..a49a570f 100644
--- a/continuedev/poetry.lock
+++ b/continuedev/poetry.lock
@@ -360,6 +360,21 @@ files = [
 dev = ["attribution (==1.6.2)", "black (==23.3.0)", "flit (==3.8.0)", "mypy (==1.2.0)", "ufmt (==2.1.0)", "usort (==1.0.6)"]
 
 [[package]]
+name = "directory-tree"
+version = "0.0.3.1"
+description = "Utility Package that Displays out the Tree Structure of a Particular Directory."
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "directory_tree-0.0.3.1-py3-none-any.whl", hash = "sha256:72411e4f1534afaaccadb21fc082c727a680b6a74e8d21a1406ffbe51389cd85"},
+    {file = "directory_tree-0.0.3.1.tar.gz", hash = "sha256:e4f40d60a45c4cdc0bc8e9ee29311f554dee6c969241c0eef8bcd92b4d4bcd4a"},
+]
+
+[package.extras]
+dev = ["pytest (>=3.7)"]
+
+[[package]]
 name = "fastapi"
 version = "0.95.1"
 description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
@@ -586,6 +601,38 @@ files = [
 ]
 
 [[package]]
+name = "jsonref"
+version = "1.1.0"
+description = "jsonref is a library for automatic dereferencing of JSON Reference objects for Python."
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jsonref-1.1.0-py3-none-any.whl", hash = "sha256:590dc7773df6c21cbf948b5dac07a72a251db28b0238ceecce0a2abfa8ec30a9"},
+    {file = "jsonref-1.1.0.tar.gz", hash = "sha256:32fe8e1d85af0fdefbebce950af85590b22b60f9e95443176adbde4e1ecea552"},
+]
+
+[[package]]
+name = "jsonschema"
+version = "4.17.3"
+description = "An implementation of JSON Schema validation for Python"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jsonschema-4.17.3-py3-none-any.whl", hash = "sha256:a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6"},
+    {file = "jsonschema-4.17.3.tar.gz", hash = "sha256:0f864437ab8b6076ba6707453ef8f98a6a0d512a80e93f8abdb676f737ecb60d"},
+]
+
+[package.dependencies]
+attrs = ">=17.4.0"
+pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
+
+[package.extras]
+format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
+format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
+
+[[package]]
 name = "langchain"
 version = "0.0.171"
 description = "Building applications with LLMs through composability"
@@ -1055,6 +1102,43 @@ dotenv = ["python-dotenv (>=0.10.4)"]
 email = ["email-validator (>=1.0.3)"]
 
 [[package]]
+name = "pyrsistent"
+version = "0.19.3"
+description = "Persistent/Functional/Immutable data structures"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "pyrsistent-0.19.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:20460ac0ea439a3e79caa1dbd560344b64ed75e85d8703943e0b66c2a6150e4a"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4c18264cb84b5e68e7085a43723f9e4c1fd1d935ab240ce02c0324a8e01ccb64"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4b774f9288dda8d425adb6544e5903f1fb6c273ab3128a355c6b972b7df39dcf"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-win32.whl", hash = "sha256:5a474fb80f5e0d6c9394d8db0fc19e90fa540b82ee52dba7d246a7791712f74a"},
+    {file = "pyrsistent-0.19.3-cp310-cp310-win_amd64.whl", hash = "sha256:49c32f216c17148695ca0e02a5c521e28a4ee6c5089f97e34fe24163113722da"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f0774bf48631f3a20471dd7c5989657b639fd2d285b861237ea9e82c36a415a9"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3ab2204234c0ecd8b9368dbd6a53e83c3d4f3cab10ecaf6d0e772f456c442393"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e42296a09e83028b3476f7073fcb69ffebac0e66dbbfd1bd847d61f74db30f19"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-win32.whl", hash = "sha256:64220c429e42a7150f4bfd280f6f4bb2850f95956bde93c6fda1b70507af6ef3"},
+    {file = "pyrsistent-0.19.3-cp311-cp311-win_amd64.whl", hash = "sha256:016ad1afadf318eb7911baa24b049909f7f3bb2c5b1ed7b6a8f21db21ea3faa8"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:c4db1bd596fefd66b296a3d5d943c94f4fac5bcd13e99bffe2ba6a759d959a28"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aeda827381f5e5d65cced3024126529ddc4289d944f75e090572c77ceb19adbf"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:42ac0b2f44607eb92ae88609eda931a4f0dfa03038c44c772e07f43e738bcac9"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-win32.whl", hash = "sha256:e8f2b814a3dc6225964fa03d8582c6e0b6650d68a232df41e3cc1b66a5d2f8d1"},
+    {file = "pyrsistent-0.19.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9bb60a40a0ab9aba40a59f68214eed5a29c6274c83b2cc206a359c4a89fa41b"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:a2471f3f8693101975b1ff85ffd19bb7ca7dd7c38f8a81701f67d6b4f97b87d8"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cc5d149f31706762c1f8bda2e8c4f8fead6e80312e3692619a75301d3dbb819a"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3311cb4237a341aa52ab8448c27e3a9931e2ee09561ad150ba94e4cfd3fc888c"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-win32.whl", hash = "sha256:f0e7c4b2f77593871e918be000b96c8107da48444d57005b6a6bc61fb4331b2c"},
+    {file = "pyrsistent-0.19.3-cp38-cp38-win_amd64.whl", hash = "sha256:c147257a92374fde8498491f53ffa8f4822cd70c0d85037e09028e478cababb7"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b735e538f74ec31378f5a1e3886a26d2ca6351106b4dfde376a26fc32a044edc"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:99abb85579e2165bd8522f0c0138864da97847875ecbd45f3e7e2af569bfc6f2"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3a8cb235fa6d3fd7aae6a4f1429bbb1fec1577d978098da1252f0489937786f3"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-win32.whl", hash = "sha256:c74bed51f9b41c48366a286395c67f4e894374306b197e62810e0fdaf2364da2"},
+    {file = "pyrsistent-0.19.3-cp39-cp39-win_amd64.whl", hash = "sha256:878433581fc23e906d947a6814336eee031a00e6defba224234169ae3d3d6a98"},
+    {file = "pyrsistent-0.19.3-py3-none-any.whl", hash = "sha256:ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64"},
+    {file = "pyrsistent-0.19.3.tar.gz", hash = "sha256:1a2994773706bbb4995c31a97bc94f1418314923bd1048c6d964837040376440"},
+]
+
+[[package]]
 name = "python-dateutil"
 version = "2.8.2"
 description = "Extensions to the standard Python datetime module"
@@ -1737,4 +1821,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.9"
-content-hash = "17910714e3ad780ae7222b62c98539489d198aea67e5c7e4a9fc7672207f500f"
+content-hash = "3ba2a7278fda36a059d76e227be94b0cb5e2efc9396b47a9642b916680214d9f"
diff --git a/continuedev/pyproject.toml b/continuedev/pyproject.toml
index af6ff045..64d88b8c 100644
--- a/continuedev/pyproject.toml
+++ b/continuedev/pyproject.toml
@@ -21,6 +21,9 @@ urllib3 = "1.26.15"
 gpt-index = "^0.6.8"
 posthog = "^3.0.1"
 tiktoken = "^0.4.0"
+jsonref = "^1.1.0"
+jsonschema = "^4.17.3"
+directory-tree = "^0.0.3.1"
 
 [tool.poetry.scripts]
 typegen = "src.continuedev.models.generate_json_schema:main" 
diff --git a/continuedev/src/continuedev/core/abstract_sdk.py b/continuedev/src/continuedev/core/abstract_sdk.py
index 017e75ef..7bd3da6c 100644
--- a/continuedev/src/continuedev/core/abstract_sdk.py
+++ b/continuedev/src/continuedev/core/abstract_sdk.py
@@ -85,9 +85,5 @@ class AbstractContinueSDK(ABC):
         pass
 
     @abstractmethod
-    def add_chat_context(self, content: str, role: ChatMessageRole = "assistant"):
-        pass
-
-    @abstractmethod
     async def get_chat_context(self) -> List[ChatMessage]:
         pass
diff --git a/continuedev/src/continuedev/core/autopilot.py b/continuedev/src/continuedev/core/autopilot.py
index ee249c0b..b9308409 100644
--- a/continuedev/src/continuedev/core/autopilot.py
+++ b/continuedev/src/continuedev/core/autopilot.py
@@ -2,6 +2,8 @@ from functools import cached_property
 import traceback
 import time
 from typing import Any, Callable, Coroutine, Dict, List
+
+from aiohttp import ClientPayloadError
 from ..models.filesystem_edit import FileEditWithFullContents
 from ..libs.llm import LLM
 from .observation import Observation, InternalErrorObservation
@@ -15,6 +17,21 @@ from .sdk import ContinueSDK
 import asyncio
 from ..libs.util.step_name_to_steps import get_step_from_name
 from ..libs.util.traceback_parsers import get_python_traceback, get_javascript_traceback
+from openai import error as openai_errors
+
+
+def get_error_title(e: Exception) -> str:
+    if isinstance(e, openai_errors.APIError):
+        return "OpenAI is overloaded with requests. Please try again."
+    elif isinstance(e, openai_errors.RateLimitError):
+        return "This OpenAI API key has been rate limited. Please try again."
+    elif isinstance(e, openai_errors.Timeout):
+        return "OpenAI timed out. Please try again."
+    elif isinstance(e, openai_errors.InvalidRequestError) and e.code == "context_length_exceeded":
+        return e._message
+    elif isinstance(e, ClientPayloadError):
+        return "The request to OpenAI failed. Please try again."
+    return e.__repr__()
 
 
 class Autopilot(ContinueBaseModel):
@@ -40,11 +57,14 @@ class Autopilot(ContinueBaseModel):
         keep_untouched = (cached_property,)
 
     def get_full_state(self) -> FullState:
-        return FullState(history=self.history, active=self._active, user_input_queue=self._main_user_input_queue)
+        return FullState(history=self.history, active=self._active, user_input_queue=self._main_user_input_queue, default_model=self.continue_sdk.config.default_model)
 
     async def get_available_slash_commands(self) -> List[Dict]:
         return list(map(lambda x: {"name": x.name, "description": x.description}, self.continue_sdk.config.slash_commands)) or []
 
+    async def change_default_model(self, model: str):
+        self.continue_sdk.update_default_model(model)
+
     async def clear_history(self):
         self.history = History.from_empty()
         self._main_user_input_queue = []
@@ -105,6 +125,7 @@ class Autopilot(ContinueBaseModel):
     _step_depth: int = 0
 
     async def retry_at_index(self, index: int):
+        self.history.timeline[index].step.hide = True
         self._retry_queue.post(str(index), None)
 
     async def delete_at_index(self, index: int):
@@ -162,7 +183,8 @@ class Autopilot(ContinueBaseModel):
 
             error_string = e.message if is_continue_custom_exception else '\n\n'.join(
                 traceback.format_tb(e.__traceback__)) + f"\n\n{e.__repr__()}"
-            error_title = e.title if is_continue_custom_exception else e.__repr__()
+            error_title = e.title if is_continue_custom_exception else get_error_title(
+                e)
 
             # Attach an InternalErrorObservation to the step and unhide it.
             print(f"Error while running step: \n{error_string}\n{error_title}")
@@ -181,6 +203,7 @@ class Autopilot(ContinueBaseModel):
 
             # i is now the index of the step that we want to show/rerun
             self.history.timeline[i].observation = observation
+            self.history.timeline[i].active = False
 
             await self.update_subscribers()
 
@@ -205,16 +228,16 @@ class Autopilot(ContinueBaseModel):
         # Add observation to history, unless already attached error observation
         if not caught_error:
             self.history.timeline[index_of_history_node].observation = observation
+            self.history.timeline[index_of_history_node].active = False
             await self.update_subscribers()
 
         # Update its description
-        if step.description is None:
-            async def update_description():
-                step.description = await step.describe(self.continue_sdk.models)
-                # Update subscribers with new description
-                await self.update_subscribers()
+        async def update_description():
+            step.description = await step.describe(self.continue_sdk.models)
+            # Update subscribers with new description
+            await self.update_subscribers()
 
-            asyncio.create_task(update_description())
+        asyncio.create_task(update_description())
 
         return observation
 
diff --git a/continuedev/src/continuedev/core/config.py b/continuedev/src/continuedev/core/config.py
index 652320fb..ed5d785a 100644
--- a/continuedev/src/continuedev/core/config.py
+++ b/continuedev/src/continuedev/core/config.py
@@ -86,3 +86,49 @@ def load_config(config_file: str) -> ContinueConfig:
     else:
         raise ValueError(f'Unknown config file extension: {ext}')
     return ContinueConfig(**config_dict)
+
+
+def load_global_config() -> ContinueConfig:
+    """
+    Load the global config file and return a ContinueConfig object.
+    """
+    global_dir = os.path.expanduser('~/.continue')
+    if not os.path.exists(global_dir):
+        os.mkdir(global_dir)
+
+    yaml_path = os.path.join(global_dir, 'config.yaml')
+    if os.path.exists(yaml_path):
+        with open(config_path, 'r') as f:
+            try:
+                config_dict = yaml.safe_load(f)
+            except:
+                return ContinueConfig()
+    else:
+        config_path = os.path.join(global_dir, 'config.json')
+        if not os.path.exists(config_path):
+            with open(config_path, 'w') as f:
+                json.dump(dict(ContinueConfig()), f)
+        with open(config_path, 'r') as f:
+            try:
+                config_dict = json.load(f)
+            except:
+                return ContinueConfig()
+    return ContinueConfig(**config_dict)
+
+
+def update_global_config(config: ContinueConfig):
+    """
+    Update the config file with the given ContinueConfig object.
+    """
+    global_dir = os.path.expanduser('~/.continue')
+    if not os.path.exists(global_dir):
+        os.mkdir(global_dir)
+
+    yaml_path = os.path.join(global_dir, 'config.yaml')
+    if os.path.exists(yaml_path):
+        with open(config_path, 'w') as f:
+            yaml.dump(config.dict(), f)
+    else:
+        config_path = os.path.join(global_dir, 'config.json')
+        with open(config_path, 'w') as f:
+            json.dump(config.dict(), f)
diff --git a/continuedev/src/continuedev/core/main.py b/continuedev/src/continuedev/core/main.py
index 0c7ec67f..1d2b0cad 100644
--- a/continuedev/src/continuedev/core/main.py
+++ b/continuedev/src/continuedev/core/main.py
@@ -1,18 +1,96 @@
+import json
 from textwrap import dedent
 from typing import Callable, Coroutine, Dict, Generator, List, Literal, Tuple, Union
 
 from ..models.main import ContinueBaseModel
 from pydantic import validator
 from .observation import Observation
+from pydantic.schema import schema
 
-ChatMessageRole = Literal["assistant", "user", "system"]
+ChatMessageRole = Literal["assistant", "user", "system", "function"]
+
+
+class FunctionCall(ContinueBaseModel):
+    name: str
+    arguments: str
 
 
 class ChatMessage(ContinueBaseModel):
     role: ChatMessageRole
-    content: str
+    content: Union[str, None] = None
+    name: Union[str, None] = None
     # A summary for pruning chat context to fit context window. Often the Step name.
     summary: str
+    function_call: Union[FunctionCall, None] = None
+
+    def to_dict(self, with_functions: bool) -> Dict:
+        d = self.dict()
+        del d["summary"]
+        if d["function_call"] is not None:
+            d["function_call"]["name"] = d["function_call"]["name"].replace(
+                " ", "")
+
+        if d["content"] is None:
+            d["content"] = ""
+        for key, value in list(d.items()):
+            if value is None:
+                del d[key]
+
+        if not with_functions:
+            d["role"] = "assistant"
+            if "name" in d:
+                del d["name"]
+            if "function_call" in d:
+                del d["function_call"]
+        return d
+
+
+def resolve_refs(schema_data):
+    def traverse(obj):
+        if isinstance(obj, dict):
+            if '$ref' in obj:
+                ref = obj['$ref']
+                parts = ref.split('/')
+                ref_obj = schema_data
+                for part in parts[1:]:
+                    ref_obj = ref_obj[part]
+                return traverse(ref_obj)
+            else:
+                for key, value in obj.items():
+                    obj[key] = traverse(value)
+        elif isinstance(obj, list):
+            for i in range(len(obj)):
+                obj[i] = traverse(obj[i])
+        return obj
+
+    return traverse(schema_data)
+
+
+unincluded_parameters = ["system_message", "chat_context",
+                         "manage_own_chat_context", "hide", "name", "description"]
+
+
+def step_to_json_schema(step) -> str:
+    pydantic_class = step.__class__
+    schema_data = schema([pydantic_class])
+    resolved_schema = resolve_refs(schema_data)
+    parameters = resolved_schema["definitions"][pydantic_class.__name__]
+    for parameter in unincluded_parameters:
+        if parameter in parameters["properties"]:
+            del parameters["properties"][parameter]
+    return {
+        "name": step.name.replace(" ", ""),
+        "description": step.description or "",
+        "parameters": parameters
+    }
+
+
+def step_to_fn_call_arguments(step: "Step") -> str:
+    args = step.dict()
+    for parameter in unincluded_parameters:
+        if parameter in args:
+            del args[parameter]
+    return json.dumps(args)
 
 
 class HistoryNode(ContinueBaseModel):
@@ -21,11 +99,20 @@ class HistoryNode(ContinueBaseModel):
     observation: Union[Observation, None]
     depth: int
     deleted: bool = False
+    active: bool = True
 
     def to_chat_messages(self) -> List[ChatMessage]:
-        if self.step.description is None:
+        if self.step.description is None or self.step.manage_own_chat_context:
             return self.step.chat_context
-        return self.step.chat_context + [ChatMessage(role="assistant", content=self.step.description, summary=self.step.name)]
+        return self.step.chat_context + [
+            ChatMessage(
+                role="function",
+                name=self.step.__class__.__name__,
+                content=json.dumps({
+                    "description": self.step.description or "Function complete",
+                }),
+                summary=f"Ran function {self.step.name}"
+            )]
 
 
 class History(ContinueBaseModel):
@@ -113,6 +200,7 @@ class FullState(ContinueBaseModel):
     history: History
     active: bool
     user_input_queue: List[str]
+    default_model: str
 
 
 class ContinueSDK:
@@ -142,6 +230,7 @@ class Step(ContinueBaseModel):
 
     system_message: Union[str, None] = None
     chat_context: List[ChatMessage] = []
+    manage_own_chat_context: bool = False
 
     class Config:
         copy_on_model_validation = False
diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py
index fb13dd27..0b417959 100644
--- a/continuedev/src/continuedev/core/policy.py
+++ b/continuedev/src/continuedev/core/policy.py
@@ -15,7 +15,7 @@ from ..recipes.WritePytestsRecipe.main import WritePytestsRecipe
 from ..recipes.ContinueRecipeRecipe.main import ContinueStepStep
 from ..steps.comment_code import CommentCodeStep
 from ..steps.react import NLDecisionStep
-from ..steps.chat import SimpleChatStep
+from ..steps.chat import SimpleChatStep, ChatWithFunctions, EditFileStep, AddFileStep
 from ..recipes.DDtoBQRecipe.main import DDtoBQRecipe
 from ..steps.core.core import MessageStep
 from ..libs.util.step_name_to_steps import get_step_from_name
@@ -28,7 +28,7 @@ class DemoPolicy(Policy):
         # At the very start, run initial Steps spcecified in the config
         if history.get_current() is None:
             return (
-                MessageStep(name="Welcome to Continue!", message=dedent("""\
+                MessageStep(name="Welcome to Continue", message=dedent("""\
                     Type '/' to see the list of available slash commands. If you highlight code, edits and explanations will be localized to the highlighted range. Otherwise, the currently open file is used. In both cases, the code is combined with the previous steps to construct the context.""")) >>
                 WelcomeStep() >>
                 # SetupContinueWorkspaceStep() >>
@@ -50,6 +50,7 @@ class DemoPolicy(Policy):
                         return get_step_from_name(slash_command.step_name, params)
 
             # return EditHighlightedCodeStep(user_input=user_input)
+            return ChatWithFunctions(user_input=user_input)
             return NLDecisionStep(user_input=user_input, steps=[
                 (EditHighlightedCodeStep(user_input=user_input),
                  "Edit the highlighted code"),
diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py
index d6acc404..62361250 100644
--- a/continuedev/src/continuedev/core/sdk.py
+++ b/continuedev/src/continuedev/core/sdk.py
@@ -6,7 +6,7 @@ import os
 from ..steps.core.core import DefaultModelEditCodeStep
 from ..models.main import Range
 from .abstract_sdk import AbstractContinueSDK
-from .config import ContinueConfig, load_config
+from .config import ContinueConfig, load_config, load_global_config, update_global_config
 from ..models.filesystem_edit import FileEdit, FileSystemEdit, AddFile, DeleteFile, AddDirectory, DeleteDirectory
 from ..models.filesystem import RangeInFile
 from ..libs.llm.hf_inference_api import HuggingFaceInferenceAPI
@@ -26,6 +26,15 @@ class Models:
     def __init__(self, sdk: "ContinueSDK"):
         self.sdk = sdk
 
+    def __load_openai_model(self, model: str) -> OpenAI:
+        async def load_openai_model():
+            api_key = await self.sdk.get_user_secret(
+                'OPENAI_API_KEY', 'Enter your OpenAI API key, OR press enter to try for free')
+            if api_key == "":
+                return ProxyServer(self.sdk.ide.unique_id, model)
+            return OpenAI(api_key=api_key, default_model=model)
+        return asyncio.get_event_loop().run_until_complete(load_openai_model())
+
     @cached_property
     def starcoder(self):
         async def load_starcoder():
@@ -36,33 +45,19 @@ class Models:
 
     @cached_property
     def gpt35(self):
-        async def load_gpt35():
-            api_key = await self.sdk.get_user_secret(
-                'OPENAI_API_KEY', 'Enter your OpenAI API key, OR press enter to try for free')
-            if api_key == "":
-                return ProxyServer(self.sdk.ide.unique_id, "gpt-3.5-turbo")
-            return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo")
-        return asyncio.get_event_loop().run_until_complete(load_gpt35())
+        return self.__load_openai_model("gpt-3.5-turbo")
+
+    @cached_property
+    def gpt350613(self):
+        return self.__load_openai_model("gpt-3.5-turbo-0613")
 
     @cached_property
     def gpt3516k(self):
-        async def load_gpt3516k():
-            api_key = await self.sdk.get_user_secret(
-                'OPENAI_API_KEY', 'Enter your OpenAI API key, OR press enter to try for free')
-            if api_key == "":
-                return ProxyServer(self.sdk.ide.unique_id, "gpt-3.5-turbo-16k")
-            return OpenAI(api_key=api_key, default_model="gpt-3.5-turbo-16k")
-        return asyncio.get_event_loop().run_until_complete(load_gpt3516k())
+        return self.__load_openai_model("gpt-3.5-turbo-16k")
 
     @cached_property
     def gpt4(self):
-        async def load_gpt4():
-            api_key = await self.sdk.get_user_secret(
-                'OPENAI_API_KEY', 'Enter your OpenAI API key, OR press enter to try for free')
-            if api_key == "":
-                return ProxyServer(self.sdk.ide.unique_id, "gpt-4")
-            return OpenAI(api_key=api_key, default_model="gpt-4")
-        return asyncio.get_event_loop().run_until_complete(load_gpt4())
+        return self.__load_openai_model("gpt-4")
 
     def __model_from_name(self, model_name: str):
         if model_name == "starcoder":
@@ -76,7 +71,7 @@ class Models:
         else:
             raise Exception(f"Unknown model {model_name}")
 
-    @cached_property
+    @property
     def default(self):
         default_model = self.sdk.config.default_model
         return self.__model_from_name(default_model) if default_model is not None else self.gpt35
@@ -102,7 +97,7 @@ class ContinueSDK(AbstractContinueSDK):
     async def _ensure_absolute_path(self, path: str) -> str:
         if os.path.isabs(path):
             return path
-        return os.path.join(await self.ide.getWorkspaceDirectory(), path)
+        return os.path.join(self.ide.workspace_directory, path)
 
     async def run_step(self, step: Step) -> Coroutine[Observation, None, None]:
         return await self.__autopilot._run_singular_step(step)
@@ -144,15 +139,15 @@ class ContinueSDK(AbstractContinueSDK):
         return await self.run_step(FileSystemEditStep(edit=AddFile(filepath=filepath, content=content)))
 
     async def delete_file(self, filename: str):
-        filepath = await self._ensure_absolute_path(filename)
+        filename = await self._ensure_absolute_path(filename)
         return await self.run_step(FileSystemEditStep(edit=DeleteFile(filepath=filename)))
 
     async def add_directory(self, path: str):
-        filepath = await self._ensure_absolute_path(path)
+        path = await self._ensure_absolute_path(path)
         return await self.run_step(FileSystemEditStep(edit=AddDirectory(path=path)))
 
     async def delete_directory(self, path: str):
-        filepath = await self._ensure_absolute_path(path)
+        path = await self._ensure_absolute_path(path)
         return await self.run_step(FileSystemEditStep(edit=DeleteDirectory(path=path)))
 
     async def get_user_secret(self, env_var: str, prompt: str) -> str:
@@ -168,7 +163,12 @@ class ContinueSDK(AbstractContinueSDK):
         elif os.path.exists(json_path):
             return load_config(json_path)
         else:
-            return ContinueConfig()
+            return load_global_config()
+
+    def update_default_model(self, model: str):
+        config = self.config
+        config.default_model = model
+        update_global_config(config)
 
     def set_loading_message(self, message: str):
         # self.__autopilot.set_loading_message(message)
@@ -177,10 +177,6 @@ class ContinueSDK(AbstractContinueSDK):
     def raise_exception(self, message: str, title: str, with_step: Union[Step, None] = None):
         raise ContinueCustomException(message, title, with_step)
 
-    def add_chat_context(self, content: str, summary: Union[str, None] = None, role: ChatMessageRole = "assistant"):
-        self.history.timeline[self.history.current_index].step.chat_context.append(
-            ChatMessage(content=content, role=role, summary=summary))
-
     async def get_chat_context(self) -> List[ChatMessage]:
         history_context = self.history.to_chat_history()
         highlighted_code = await self.ide.getHighlightedCode()
@@ -198,8 +194,15 @@ class ContinueSDK(AbstractContinueSDK):
 
         for rif in highlighted_code:
             code = await self.ide.readRangeInFile(rif)
-            history_context.append(ChatMessage(
-                content=f"{preface} ({rif.filepath}):\n```\n{code}\n```", role="user", summary=f"{preface}: {rif.filepath}"))
+            msg = ChatMessage(content=f"{preface} ({rif.filepath}):\n```\n{code}\n```",
+                              role="user", summary=f"{preface}: {rif.filepath}")
+
+            # Don't insert after latest user message or function call
+            i = -1
+            if history_context[i].role == "user" or history_context[i].role == "function":
+                i -= 1
+            history_context.insert(i, msg)
+
         return history_context
 
     async def update_ui(self):
diff --git a/continuedev/src/continuedev/libs/llm/__init__.py b/continuedev/src/continuedev/libs/llm/__init__.py
index 108eedf1..4c4de213 100644
--- a/continuedev/src/continuedev/libs/llm/__init__.py
+++ b/continuedev/src/continuedev/libs/llm/__init__.py
@@ -13,12 +13,12 @@ class LLM(ABC):
         """Return the completion of the text with the given temperature."""
         raise NotImplementedError
 
-    def stream_chat(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
-        """Yield a stream of chat messages."""
+    def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+        """Stream the completion through generator."""
         raise NotImplementedError
 
-    def with_system_message(self, system_message: Union[str, None]):
-        """Return a new model with the given system message."""
+    async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+        """Stream the chat through generator."""
         raise NotImplementedError
 
     def count_tokens(self, text: str):
diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py
index 5d65eb22..a3ca5c80 100644
--- a/continuedev/src/continuedev/libs/llm/openai.py
+++ b/continuedev/src/continuedev/libs/llm/openai.py
@@ -1,28 +1,14 @@
-import asyncio
 from functools import cached_property
 import time
 from typing import Any, Coroutine, Dict, Generator, List, Union
 from ...core.main import ChatMessage
 import openai
-import aiohttp
 from ..llm import LLM
-from pydantic import BaseModel, validator
-import tiktoken
-
-DEFAULT_MAX_TOKENS = 2048
-MAX_TOKENS_FOR_MODEL = {
-    "gpt-3.5-turbo": 4096 - DEFAULT_MAX_TOKENS,
-    "gpt-3.5-turbo-16k": 16384 - DEFAULT_MAX_TOKENS,
-    "gpt-4": 8192 - DEFAULT_MAX_TOKENS
-}
-CHAT_MODELS = {
-    "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4"
-}
+from ..util.count_tokens import DEFAULT_MAX_TOKENS, compile_chat_messages, CHAT_MODELS, DEFAULT_ARGS, count_tokens, prune_raw_prompt_from_top
 
 
 class OpenAI(LLM):
     api_key: str
-    completion_count: int = 0
     default_model: str
 
     def __init__(self, api_key: str, default_model: str, system_message: str = None):
@@ -36,192 +22,59 @@ class OpenAI(LLM):
     def name(self):
         return self.default_model
 
-    @cached_property
-    def __encoding_for_model(self):
-        aliases = {
-            "gpt-3.5-turbo": "gpt3"
-        }
-        return tiktoken.encoding_for_model(self.default_model)
+    @property
+    def default_args(self):
+        return DEFAULT_ARGS | {"model": self.default_model}
 
     def count_tokens(self, text: str):
-        return len(self.__encoding_for_model.encode(text, disallowed_special=()))
-
-    def __prune_chat_history(self, chat_history: List[ChatMessage], max_tokens: int, tokens_for_completion: int):
-        total_tokens = tokens_for_completion + \
-            sum(self.count_tokens(message.content) for message in chat_history)
-
-        # 1. Replace beyond last 5 messages with summary
-        i = 0
-        while total_tokens > max_tokens and i < len(chat_history) - 5:
-            message = chat_history[0]
-            total_tokens -= self.count_tokens(message.content)
-            total_tokens += self.count_tokens(message.summary)
-            message.content = message.summary
-            i += 1
-
-        # 2. Remove entire messages until the last 5
-        while len(chat_history) > 5 and total_tokens > max_tokens:
-            message = chat_history.pop(0)
-            total_tokens -= self.count_tokens(message.content)
+        return count_tokens(self.default_model, text)
 
-        # 3. Truncate message in the last 5
-        i = 0
-        while total_tokens > max_tokens:
-            message = chat_history[0]
-            total_tokens -= self.count_tokens(message.content)
-            total_tokens += self.count_tokens(message.summary)
-            message.content = message.summary
-            i += 1
-
-        # 4. Remove entire messages in the last 5
-        while total_tokens > max_tokens and len(chat_history) > 0:
-            message = chat_history.pop(0)
-            total_tokens -= self.count_tokens(message.content)
-
-        return chat_history
+    async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+        args = self.default_args | kwargs
+        args["stream"] = True
 
-    def with_system_message(self, system_message: Union[str, None]):
-        return OpenAI(api_key=self.api_key, default_model=self.default_model, system_message=system_message)
+        if args["model"] in CHAT_MODELS:
+            async for chunk in await openai.ChatCompletion.acreate(
+                messages=compile_chat_messages(
+                    args["model"], with_history, prompt, functions=None),
+                **args,
+            ):
+                if "content" in chunk.choices[0].delta:
+                    yield chunk.choices[0].delta.content
+                else:
+                    continue
+        else:
+            async for chunk in await openai.Completion.acreate(prompt=prompt, **args):
+                yield chunk.choices[0].text
 
-    async def stream_chat(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
-        self.completion_count += 1
-        args = {"max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0.5, "top_p": 1,
-                "frequency_penalty": 0, "presence_penalty": 0} | kwargs
+    async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+        args = self.default_args | kwargs
         args["stream"] = True
-        args["model"] = "gpt-3.5-turbo"
+        args["model"] = self.default_model if self.default_model in CHAT_MODELS else "gpt-3.5-turbo-0613"
+        if not args["model"].endswith("0613") and "functions" in args:
+            del args["functions"]
 
         async for chunk in await openai.ChatCompletion.acreate(
-            messages=self.compile_chat_messages(with_history, prompt),
+            messages=compile_chat_messages(
+                args["model"], messages, functions=args.get("functions", None)),
             **args,
         ):
-            if "content" in chunk.choices[0].delta:
-                yield chunk.choices[0].delta.content
-            else:
-                continue
-
-    def compile_chat_messages(self, msgs: List[ChatMessage], prompt: str) -> List[Dict]:
-        msgs = self.__prune_chat_history(msgs, MAX_TOKENS_FOR_MODEL[self.default_model], self.count_tokens(
-            prompt) + 1000 + self.count_tokens(self.system_message or ""))
-        history = []
-        if self.system_message:
-            history.append({
-                "role": "system",
-                "content": self.system_message
-            })
-        history += [{"role": msg.role, "content": msg.content} for msg in msgs]
-        history.append({
-            "role": "user",
-            "content": prompt
-        })
-
-        return history
-
-    def stream_complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
-        self.completion_count += 1
-        args = {"model": self.default_model, "max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0.5,
-                "top_p": 1, "frequency_penalty": 0, "presence_penalty": 0, "suffix": None} | kwargs
-        args["stream"] = True
-
-        if args["model"] in CHAT_MODELS:
-            generator = openai.ChatCompletion.create(
-                messages=self.compile_chat_messages(with_history, prompt),
-                **args,
-            )
-            for chunk in generator:
-                yield chunk.choices[0].message.content
-        else:
-            generator = openai.Completion.create(
-                prompt=prompt,
-                **args,
-            )
-            for chunk in generator:
-                yield chunk.choices[0].text
+            yield chunk.choices[0].delta
 
     async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]:
-        t1 = time.time()
-
-        self.completion_count += 1
-        args = {"model": self.default_model, "max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0.5, "top_p": 1,
-                "frequency_penalty": 0, "presence_penalty": 0, "stream": False} | kwargs
+        args = self.default_args | kwargs
 
         if args["model"] in CHAT_MODELS:
             resp = (await openai.ChatCompletion.acreate(
-                messages=self.compile_chat_messages(with_history, prompt),
+                messages=compile_chat_messages(
+                    args["model"], with_history, prompt, functions=None),
                 **args,
             )).choices[0].message.content
         else:
             resp = (await openai.Completion.acreate(
-                prompt=prompt,
+                prompt=prune_raw_prompt_from_top(
+                    args["model"], prompt, args["max_tokens"]),
                 **args,
             )).choices[0].text
 
-        t2 = time.time()
-        print("Completion time:", t2 - t1)
         return resp
-
-    def edit(self, inp: str, instruction: str) -> str:
-        try:
-            resp = openai.Edit.create(
-                input=inp,
-                instruction=instruction,
-                model='text-davinci-edit-001'
-            ).choices[0].text
-            return resp
-        except Exception as e:
-            print("OpenAI error:", e)
-            raise e
-
-    def parallel_edit(self, inputs: list[str], instructions: Union[List[str], str], **kwargs) -> list[str]:
-        args = {"temperature": 0.5, "top_p": 1} | kwargs
-        args['model'] = 'text-davinci-edit-001'
-
-        async def fn():
-            async with aiohttp.ClientSession() as session:
-                tasks = []
-
-                async def get(input, instruction):
-                    async with session.post("https://api.openai.com/v1/edits", headers={
-                        "Content-Type": "application/json",
-                        "Authorization": "Bearer " + self.api_key
-                    }, json={"model": args["model"], "input": input, "instruction": instruction, "temperature": args["temperature"], "max_tokens": args["max_tokens"], "suffix": args["suffix"]}) as resp:
-                        json = await resp.json()
-                        if "error" in json:
-                            print("ERROR IN GPT-3 RESPONSE: ", json)
-                            return None
-                        return json["choices"][0]["text"]
-
-                for i in range(len(inputs)):
-                    tasks.append(get(inputs[i], instructions[i] if isinstance(
-                        instructions, list) else instructions))
-
-                return await asyncio.gather(*tasks)
-
-        return asyncio.run(fn())
-
-    def parallel_complete(self, prompts: list[str], suffixes: Union[list[str], None] = None, **kwargs) -> list[str]:
-        self.completion_count += len(prompts)
-        args = {"model": self.default_model, "max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0.5,
-                "top_p": 1, "frequency_penalty": 0, "presence_penalty": 0} | kwargs
-
-        async def fn():
-            async with aiohttp.ClientSession() as session:
-                tasks = []
-
-                async def get(prompt, suffix):
-                    async with session.post("https://api.openai.com/v1/completions", headers={
-                        "Content-Type": "application/json",
-                        "Authorization": "Bearer " + self.api_key
-                    }, json={"model": args["model"], "prompt": prompt, "temperature": args["temperature"], "max_tokens": args["max_tokens"], "suffix": suffix}) as resp:
-                        json = await resp.json()
-                        if "error" in json:
-                            print("ERROR IN GPT-3 RESPONSE: ", json)
-                            return None
-                        return json["choices"][0]["text"]
-
-                for i in range(len(prompts)):
-                    tasks.append(asyncio.ensure_future(
-                        get(prompts[i], suffixes[i] if suffixes else None)))
-
-                return await asyncio.gather(*tasks)
-
-        return asyncio.run(fn())
diff --git a/continuedev/src/continuedev/libs/llm/proxy_server.py b/continuedev/src/continuedev/libs/llm/proxy_server.py
index 84c94d62..ccdb2002 100644
--- a/continuedev/src/continuedev/libs/llm/proxy_server.py
+++ b/continuedev/src/continuedev/libs/llm/proxy_server.py
@@ -1,21 +1,10 @@
 from functools import cached_property
 import json
 from typing import Any, Coroutine, Dict, Generator, List, Literal, Union
-import requests
-import tiktoken
 import aiohttp
-
 from ...core.main import ChatMessage
 from ..llm import LLM
-
-MAX_TOKENS_FOR_MODEL = {
-    "gpt-3.5-turbo": 4097,
-    "gpt-4": 4097,
-}
-DEFAULT_MAX_TOKENS = 2048
-CHAT_MODELS = {
-    "gpt-3.5-turbo", "gpt-4"
-}
+from ..util.count_tokens import DEFAULT_ARGS, DEFAULT_MAX_TOKENS, compile_chat_messages, CHAT_MODELS, count_tokens
 
 # SERVER_URL = "http://127.0.0.1:8080"
 SERVER_URL = "https://proxy-server-l6vsfbzhba-uw.a.run.app"
@@ -32,64 +21,65 @@ class ProxyServer(LLM):
         self.system_message = system_message
         self.name = default_model
 
-    @cached_property
-    def __encoding_for_model(self):
-        aliases = {
-            "gpt-3.5-turbo": "gpt3"
-        }
-        return tiktoken.encoding_for_model(self.default_model)
+    @property
+    def default_args(self):
+        return DEFAULT_ARGS | {"model": self.default_model}
 
     def count_tokens(self, text: str):
-        return len(self.__encoding_for_model.encode(text, disallowed_special=()))
-
-    def __prune_chat_history(self, chat_history: List[ChatMessage], max_tokens: int, tokens_for_completion: int):
-        tokens = tokens_for_completion
-        for i in range(len(chat_history) - 1, -1, -1):
-            message = chat_history[i]
-            tokens += self.count_tokens(message.content)
-            if tokens > max_tokens:
-                return chat_history[i + 1:]
-        return chat_history
-
-    def compile_chat_messages(self, msgs: List[ChatMessage], prompt: str) -> List[Dict]:
-        msgs = self.__prune_chat_history(msgs, MAX_TOKENS_FOR_MODEL[self.default_model], self.count_tokens(
-            prompt) + 1000 + self.count_tokens(self.system_message or ""))
-        history = []
-        if self.system_message:
-            history.append({
-                "role": "system",
-                "content": self.system_message
-            })
-        history += [{"role": msg.role, "content": msg.content} for msg in msgs]
-        history.append({
-            "role": "user",
-            "content": prompt
-        })
-
-        return history
+        return count_tokens(self.default_model, text)
 
     async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]:
+        args = self.default_args | kwargs
+
         async with aiohttp.ClientSession() as session:
             async with session.post(f"{SERVER_URL}/complete", json={
-                "chat_history": self.compile_chat_messages(with_history, prompt),
-                "model": self.default_model,
+                "messages": compile_chat_messages(args["model"], with_history, prompt, functions=None),
                 "unique_id": self.unique_id,
+                **args
             }) as resp:
                 try:
                     return json.loads(await resp.text())
-                except json.JSONDecodeError:
+                except:
                     raise Exception(await resp.text())
 
-    async def stream_chat(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+    async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, Generator[Union[Any, List, Dict], None, None]]:
+        args = self.default_args | kwargs
+        messages = compile_chat_messages(
+            self.default_model, messages, None, functions=args.get("functions", None))
+
+        async with aiohttp.ClientSession() as session:
+            async with session.post(f"{SERVER_URL}/stream_chat", json={
+                "messages": messages,
+                "unique_id": self.unique_id,
+                **args
+            }) as resp:
+                # This is streaming application/json instaed of text/event-stream
+                async for line in resp.content.iter_chunks():
+                    if line[1]:
+                        try:
+                            json_chunk = line[0].decode("utf-8")
+                            json_chunk = "{}" if json_chunk == "" else json_chunk
+                            chunks = json_chunk.split("\n")
+                            for chunk in chunks:
+                                if chunk.strip() != "":
+                                    yield json.loads(chunk)
+                        except:
+                            raise Exception(str(line[0]))
+
+    async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]:
+        args = self.default_args | kwargs
+        messages = compile_chat_messages(
+            self.default_model, with_history, prompt, functions=args.get("functions", None))
+
         async with aiohttp.ClientSession() as session:
             async with session.post(f"{SERVER_URL}/stream_complete", json={
-                "chat_history": self.compile_chat_messages(with_history, prompt),
-                "model": self.default_model,
+                "messages": messages,
                 "unique_id": self.unique_id,
+                **args
             }) as resp:
-                async for line in resp.content:
+                async for line in resp.content.iter_any():
                     if line:
                         try:
                             yield line.decode("utf-8")
-                        except json.JSONDecodeError:
+                        except:
                             raise Exception(str(line))
diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py
new file mode 100644
index 00000000..047a47e4
--- /dev/null
+++ b/continuedev/src/continuedev/libs/util/count_tokens.py
@@ -0,0 +1,101 @@
+import json
+from typing import Dict, List, Union
+from ...core.main import ChatMessage
+import tiktoken
+
+aliases = {}
+DEFAULT_MAX_TOKENS = 2048
+MAX_TOKENS_FOR_MODEL = {
+    "gpt-3.5-turbo": 4096,
+    "gpt-3.5-turbo-0613": 4096,
+    "gpt-3.5-turbo-16k": 16384,
+    "gpt-4": 8192
+}
+CHAT_MODELS = {
+    "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-3.5-turbo-0613"
+}
+DEFAULT_ARGS = {"max_tokens": DEFAULT_MAX_TOKENS, "temperature": 0.5, "top_p": 1,
+                "frequency_penalty": 0, "presence_penalty": 0}
+
+
+def encoding_for_model(model: str):
+    return tiktoken.encoding_for_model(aliases.get(model, model))
+
+
+def count_tokens(model: str, text: Union[str, None]):
+    if text is None:
+        return 0
+    encoding = encoding_for_model(model)
+    return len(encoding.encode(text, disallowed_special=()))
+
+
+def prune_raw_prompt_from_top(model: str, prompt: str, tokens_for_completion: int):
+    max_tokens = MAX_TOKENS_FOR_MODEL.get(
+        model, DEFAULT_MAX_TOKENS) - tokens_for_completion
+    encoding = encoding_for_model(model)
+    tokens = encoding.encode(prompt, disallowed_special=())
+    if len(tokens) <= max_tokens:
+        return prompt
+    else:
+        return encoding.decode(tokens[-max_tokens:])
+
+
+def prune_chat_history(model: str, chat_history: List[ChatMessage], max_tokens: int, tokens_for_completion: int):
+    total_tokens = tokens_for_completion + \
+        sum(count_tokens(model, message.content)
+            for message in chat_history)
+
+    # 1. Replace beyond last 5 messages with summary
+    i = 0
+    while total_tokens > max_tokens and i < len(chat_history) - 5:
+        message = chat_history[0]
+        total_tokens -= count_tokens(model, message.content)
+        total_tokens += count_tokens(model, message.summary)
+        message.content = message.summary
+        i += 1
+
+    # 2. Remove entire messages until the last 5
+    while len(chat_history) > 5 and total_tokens > max_tokens and len(chat_history) > 0:
+        message = chat_history.pop(0)
+        total_tokens -= count_tokens(model, message.content)
+
+    # 3. Truncate message in the last 5
+    i = 0
+    while total_tokens > max_tokens and len(chat_history) > 0 and i < len(chat_history):
+        message = chat_history[i]
+        total_tokens -= count_tokens(model, message.content)
+        total_tokens += count_tokens(model, message.summary)
+        message.content = message.summary
+        i += 1
+
+    # 4. Remove entire messages in the last 5
+    while total_tokens > max_tokens and len(chat_history) > 0:
+        message = chat_history.pop(0)
+        total_tokens -= count_tokens(model, message.content)
+
+    return chat_history
+
+
+def compile_chat_messages(model: str, msgs: List[ChatMessage], prompt: Union[str, None] = None, functions: Union[List, None] = None, system_message: Union[str, None] = None) -> List[Dict]:
+    prompt_tokens = count_tokens(model, prompt)
+    if functions is not None:
+        for function in functions:
+            prompt_tokens += count_tokens(model, json.dumps(function))
+
+    msgs = prune_chat_history(model,
+                              msgs, MAX_TOKENS_FOR_MODEL[model], prompt_tokens + 1000 + count_tokens(model, system_message))
+    history = []
+    if system_message:
+        history.append({
+            "role": "system",
+            "content": system_message
+        })
+    history += [msg.to_dict(with_functions=functions is not None)
+                for msg in msgs]
+    if prompt:
+        history.append({
+            "role": "user",
+            "content": prompt
+        })
+
+    return history
diff --git a/continuedev/src/continuedev/libs/util/telemetry.py b/continuedev/src/continuedev/libs/util/telemetry.py
index 03ec93c6..bd9fde9d 100644
--- a/continuedev/src/continuedev/libs/util/telemetry.py
+++ b/continuedev/src/continuedev/libs/util/telemetry.py
@@ -1,12 +1,22 @@
+from typing import Any
 from posthog import Posthog
 from ...core.config import load_config
+import os
+from dotenv import load_dotenv
+
+load_dotenv()
+in_codespaces = os.getenv("CODESPACES") == "true"
 
 # The personal API key is necessary only if you want to use local evaluation of feature flags.
 posthog = Posthog('phc_JS6XFROuNbhJtVCEdTSYk6gl5ArRrTNMpCcguAXlSPs',
                   host='https://app.posthog.com')
 
 
-def capture_event(unique_id: str, event_name, event_properties):
+def capture_event(unique_id: str, event_name: str, event_properties: Any):
     config = load_config('.continue/config.json')
-    if config.allow_anonymous_telemetry:
-        posthog.capture(unique_id, event_name, event_properties)
+    if not config.allow_anonymous_telemetry:
+        return
+
+    if in_codespaces:
+        event_properties['codespaces'] = True
+    posthog.capture(unique_id, event_name, event_properties)
diff --git a/continuedev/src/continuedev/models/filesystem.py b/continuedev/src/continuedev/models/filesystem.py
index ede636c5..b709dd21 100644
--- a/continuedev/src/continuedev/models/filesystem.py
+++ b/continuedev/src/continuedev/models/filesystem.py
@@ -100,7 +100,7 @@ class FileSystem(AbstractModel):
 
     @classmethod
     def read_range_in_str(self, s: str, r: Range) -> str:
-        lines = s.splitlines()[r.start.line:r.end.line + 1]
+        lines = s.split("\n")[r.start.line:r.end.line + 1]
         if len(lines) == 0:
             return ""
 
diff --git a/continuedev/src/continuedev/models/main.py b/continuedev/src/continuedev/models/main.py
index fceba284..c9011b29 100644
--- a/continuedev/src/continuedev/models/main.py
+++ b/continuedev/src/continuedev/models/main.py
@@ -76,6 +76,12 @@ class Range(BaseModel):
     def overlaps_with(self, other: "Range") -> bool:
         return not (self.end < other.start or self.start > other.end)
 
+    def to_full_lines(self) -> "Range":
+        return Range(
+            start=Position(line=self.start.line, character=0),
+            end=Position(line=self.end.line + 1, character=0)
+        )
+
     @staticmethod
     def from_indices(string: str, start_index: int, end_index: int) -> "Range":
         return Range(
diff --git a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
index 91515dc2..60218ef9 100644
--- a/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
+++ b/continuedev/src/continuedev/recipes/CreatePipelineRecipe/steps.py
@@ -139,14 +139,8 @@ class ValidatePipelineStep(Step):
             # connect to DuckDB instance
             conn = duckdb.connect(database="{source_name}.duckdb")
 
-            conn.execute("SET search_path = '{source_name}_data';")
-
-            # get table names
-            rows = conn.execute("SELECT * FROM _dlt_loads;").fetchall()
-
-            # print table names
-            for row in rows:
-                print(row)''')
+            # list all tables
+            print(conn.sql("DESCRIBE"))''')
 
         query_filename = os.path.join(workspace_dir, "query.py")
         await sdk.apply_filesystem_edit(AddFile(filepath=query_filename, content=tables_query_code), name="Add query.py file", description="Adding a file called `query.py` to the workspace that will run a test query on the DuckDB instance")
diff --git a/continuedev/src/continuedev/server/gui.py b/continuedev/src/continuedev/server/gui.py
index cf046734..cc6235e9 100644
--- a/continuedev/src/continuedev/server/gui.py
+++ b/continuedev/src/continuedev/server/gui.py
@@ -77,6 +77,8 @@ class GUIProtocolServer(AbstractGUIProtocolServer):
                 self.on_reverse_to_index(data["index"])
             elif message_type == "retry_at_index":
                 self.on_retry_at_index(data["index"])
+            elif message_type == "change_default_model":
+                self.on_change_default_model(data["model"])
             elif message_type == "clear_history":
                 self.on_clear_history()
             elif message_type == "delete_at_index":
@@ -116,6 +118,9 @@ class GUIProtocolServer(AbstractGUIProtocolServer):
         asyncio.create_task(
             self.session.autopilot.retry_at_index(index))
 
+    def on_change_default_model(self, model: str):
+        asyncio.create_task(self.session.autopilot.change_default_model(model))
+
     def on_clear_history(self):
         asyncio.create_task(self.session.autopilot.clear_history())
 
diff --git a/continuedev/src/continuedev/server/gui_protocol.py b/continuedev/src/continuedev/server/gui_protocol.py
index d9506c6f..66839d9b 100644
--- a/continuedev/src/continuedev/server/gui_protocol.py
+++ b/continuedev/src/continuedev/server/gui_protocol.py
@@ -36,6 +36,10 @@ class AbstractGUIProtocolServer(ABC):
         """Called when the user requests a retry at a previous index"""
 
     @abstractmethod
+    def on_change_default_model(self):
+        """Called when the user requests to change the default model"""
+
+    @abstractmethod
     def on_clear_history(self):
         """Called when the user requests to clear the history"""
 
diff --git a/continuedev/src/continuedev/steps/chat.py b/continuedev/src/continuedev/steps/chat.py
index fd7457d9..54d9c657 100644
--- a/continuedev/src/continuedev/steps/chat.py
+++ b/continuedev/src/continuedev/steps/chat.py
@@ -1,8 +1,19 @@
-from textwrap import dedent
-from typing import List
-from ..core.main import Step
-from ..core.sdk import ContinueSDK
+import json
+from typing import Any, Coroutine, List
+
+from .main import EditHighlightedCodeStep
 from .core.core import MessageStep
+from ..core.main import FunctionCall, Models
+from ..core.main import ChatMessage, Step, step_to_json_schema
+from ..core.sdk import ContinueSDK
+import openai
+import os
+from dotenv import load_dotenv
+from directory_tree import display_tree
+
+load_dotenv()
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+openai.api_key = OPENAI_API_KEY
 
 
 class SimpleChatStep(Step):
@@ -13,9 +24,205 @@ class SimpleChatStep(Step):
         self.description = f"```{self.user_input}```\n\n"
         await sdk.update_ui()
 
-        async for chunk in sdk.models.default.stream_chat(self.user_input, with_history=await sdk.get_chat_context()):
+        async for chunk in sdk.models.default.stream_complete(self.user_input, with_history=await sdk.get_chat_context()):
             self.description += chunk
             await sdk.update_ui()
 
         self.name = (await sdk.models.gpt35.complete(
             f"Write a short title for the following chat message: {self.description}")).strip()
+
+
+class AddFileStep(Step):
+    name: str = "Add File"
+    description = "Add a file to the workspace."
+    filename: str
+    file_contents: str
+
+    async def describe(self, models: Models) -> Coroutine[Any, Any, Coroutine[str, None, None]]:
+        return f"Added a file named `{self.filename}` to the workspace."
+
+    async def run(self, sdk: ContinueSDK):
+        try:
+            await sdk.add_file(self.filename, self.file_contents)
+        except FileNotFoundError:
+            self.description = f"File {self.filename} does not exist."
+            return
+        currently_open_file = (await sdk.ide.getOpenFiles())[0]
+        await sdk.ide.setFileOpen(os.path.join(sdk.ide.workspace_directory, self.filename))
+        await sdk.ide.setFileOpen(currently_open_file)
+
+
+class DeleteFileStep(Step):
+    name: str = "Delete File"
+    description = "Delete a file from the workspace."
+    filename: str
+
+    async def describe(self, models: Models) -> Coroutine[Any, Any, Coroutine[str, None, None]]:
+        return f"Deleted a file named `{self.filename}` from the workspace."
+
+    async def run(self, sdk: ContinueSDK):
+        await sdk.delete_file(self.filename)
+
+
+class AddDirectoryStep(Step):
+    name: str = "Add Directory"
+    description = "Add a directory to the workspace."
+    directory_name: str
+
+    async def describe(self, models: Models) -> Coroutine[Any, Any, Coroutine[str, None, None]]:
+        return f"Added a directory named `{self.directory_name}` to the workspace."
+
+    async def run(self, sdk: ContinueSDK):
+        try:
+            await sdk.add_directory(self.directory_name)
+        except FileExistsError:
+            self.description = f"Directory {self.directory_name} already exists."
+
+
+class RunTerminalCommandStep(Step):
+    name: str = "Run Terminal Command"
+    description: str = "Run a terminal command."
+    command: str
+
+    async def describe(self, models: Models) -> Coroutine[Any, Any, Coroutine[str, None, None]]:
+        return f"Ran the terminal command `{self.command}`."
+
+    async def run(self, sdk: ContinueSDK):
+        await sdk.wait_for_user_confirmation(f"Run the following terminal command?\n\n```bash\n{self.command}\n```")
+        await sdk.run(self.command)
+
+
+class ViewDirectoryTreeStep(Step):
+    name: str = "View Directory Tree"
+    description: str = "View the directory tree to learn which folder and files exist."
+
+    async def describe(self, models: Models) -> Coroutine[Any, Any, Coroutine[str, None, None]]:
+        return f"Viewed the directory tree."
+
+    async def run(self, sdk: ContinueSDK):
+        self.description = f"```\n{display_tree(sdk.ide.workspace_directory, True)}\n```"
+
+
+class EditFileStep(Step):
+    name: str = "Edit File"
+    description: str = "Edit a file in the workspace that is not currently open."
+    filename: str
+    instructions: str
+    hide: bool = True
+
+    async def run(self, sdk: ContinueSDK):
+        await sdk.edit_file(self.filename, self.instructions)
+
+
+class ChatWithFunctions(Step):
+    user_input: str
+    functions: List[Step] = [AddFileStep(filename="", file_contents=""),
+                             EditFileStep(filename="", instructions=""),
+                             EditHighlightedCodeStep(user_input=""),
+                             ViewDirectoryTreeStep(), AddDirectoryStep(directory_name=""),
+                             DeleteFileStep(filename=""), RunTerminalCommandStep(command="")]
+    name: str = "Chat"
+    manage_own_chat_context: bool = True
+
+    async def run(self, sdk: ContinueSDK):
+        self.description = f"```{self.user_input}```\n\nDeciding next steps...\n\n"
+        await sdk.update_ui()
+
+        step_name_step_class_map = {
+            step.name.replace(" ", ""): step.__class__ for step in self.functions}
+
+        functions = [step_to_json_schema(
+            function) for function in self.functions]
+
+        self.chat_context.append(ChatMessage(
+            role="user",
+            content=self.user_input,
+            summary=self.user_input
+        ))
+
+        last_function_called_index_in_history = None
+        while True:
+            was_function_called = False
+            func_args = ""
+            func_name = ""
+            msg_content = ""
+            msg_step = None
+
+            async for msg_chunk in sdk.models.gpt350613.stream_chat(await sdk.get_chat_context(), functions=functions):
+                if "content" in msg_chunk and msg_chunk["content"] is not None:
+                    msg_content += msg_chunk["content"]
+                    # if last_function_called_index_in_history is not None:
+                    #     while sdk.history.timeline[last_function_called_index].step.hide:
+                    #         last_function_called_index += 1
+                    #     sdk.history.timeline[last_function_called_index_in_history].step.description = msg_content
+                    if msg_step is None:
+                        msg_step = MessageStep(
+                            name="Chat",
+                            message=msg_chunk["content"]
+                        )
+                        await sdk.run_step(msg_step)
+                    else:
+                        msg_step.description = msg_content
+                    await sdk.update_ui()
+                elif "function_call" in msg_chunk or func_name != "":
+                    was_function_called = True
+                    if "function_call" in msg_chunk:
+                        if "arguments" in msg_chunk["function_call"]:
+                            func_args += msg_chunk["function_call"]["arguments"]
+                        if "name" in msg_chunk["function_call"]:
+                            func_name += msg_chunk["function_call"]["name"]
+
+            if not was_function_called:
+                self.chat_context.append(ChatMessage(
+                    role="assistant",
+                    content=msg_content,
+                    summary=msg_content
+                ))
+                break
+            else:
+                if func_name == "python" and "python" not in step_name_step_class_map:
+                    # GPT must be fine-tuned to believe this exists, but it doesn't always
+                    func_name = "EditHighlightedCodeStep"
+                    func_args = json.dumps({"user_input": self.user_input})
+                    # self.chat_context.append(ChatMessage(
+                    #     role="assistant",
+                    #     content=None,
+                    #     function_call=FunctionCall(
+                    #         name=func_name,
+                    #         arguments=func_args
+                    #     ),
+                    #     summary=f"Ran function {func_name}"
+                    # ))
+                    # self.chat_context.append(ChatMessage(
+                    #     role="user",
+                    #     content="The 'python' function does not exist. Don't call it. Try again to call another function.",
+                    #     summary="'python' function does not exist."
+                    # ))
+                    # msg_step.hide = True
+                    # continue
+                # Call the function, then continue to chat
+                func_args = "{}" if func_args == "" else func_args
+                fn_call_params = json.loads(func_args)
+                self.chat_context.append(ChatMessage(
+                    role="assistant",
+                    content=None,
+                    function_call=FunctionCall(
+                        name=func_name,
+                        arguments=func_args
+                    ),
+                    summary=f"Ran function {func_name}"
+                ))
+                last_function_called_index_in_history = sdk.history.current_index + 1
+                step_to_run = step_name_step_class_map[func_name](
+                    **fn_call_params)
+
+                if func_name == "AddFileStep":
+                    step_to_run.hide = True
+                    self.description += f"\nAdded file `{func_args['filename']}`"
+                elif func_name == "AddDirectoryStep":
+                    step_to_run.hide = True
+                    self.description += f"\nAdded directory `{func_args['directory_name']}`"
+                else:
+                    self.description += f"\n`Running function {func_name}`\n\n"
+                await sdk.run_step(step_to_run)
+                await sdk.update_ui()
diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py
index 417398b7..0d82b228 100644
--- a/continuedev/src/continuedev/steps/core/core.py
+++ b/continuedev/src/continuedev/steps/core/core.py
@@ -10,7 +10,7 @@ from ...models.filesystem_edit import EditDiff, FileEdit, FileEditWithFullConten
 from ...models.filesystem import FileSystem, RangeInFile, RangeInFileWithContents
 from ...core.observation import Observation, TextObservation, TracebackObservation, UserInputObservation
 from ...core.main import Step, SequentialStep
-from ...libs.llm.openai import MAX_TOKENS_FOR_MODEL
+from ...libs.util.count_tokens import MAX_TOKENS_FOR_MODEL, DEFAULT_MAX_TOKENS
 import difflib
 
 
@@ -116,37 +116,39 @@ class DefaultModelEditCodeStep(Step):
     name: str = "Editing Code"
     hide = False
     _prompt: str = dedent("""\
-        Take the file prefix and suffix into account, but only rewrite the commit before as specified in the commit message. Here's an example:
+        Take the file prefix and suffix into account, but only rewrite the code_to_edit as specified in the user_request. The code you write in modified_code_to_edit will replace the code between the code_to_edit tags. Do NOT preface your answer or write anything other than code. The </modified_code_to_edit> tag should be written to indicate the end of the modified code section. Do not ever use nested tags.
+
+        Example:
 
         <file_prefix>
-        a = 5
-        b = 4
+        class Database:
+            def __init__(self):
+                self._data = {{}}
+            
+            def get(self, key):
+                return self._data[key]
 
+        </file_prefix>
+        <code_to_edit>
+            def set(self, key, value):
+                self._data[key] = value
+        </code_to_edit>
         <file_suffix>
 
-        def mul(a, b):
-            return a * b
-        <commit_before>
-        def sum():
-            return a + b
-        <commit_msg>
-        Make a and b parameters of sum
-        <commit_after>
-        def sum(a, b):
-            return a + b
-        <|endoftext|>
-
-        Now complete the real thing. Do NOT rewrite the prefix or suffix. You are only to write the code that goes in "commit_after".
+            def clear_all():
+                self._data = {{}}
+        </file_suffix>
+        <user_request>
+        Raise an error if the key already exists.
+        </user_request>
+        <modified_code_to_edit>
+            def set(self, key, value):
+                if key in self._data:
+                    raise KeyError(f"Key {{key}} already exists")
+                self._data[key] = value
+        </modified_code_to_edit>
 
-        <file_prefix>
-        {file_prefix}
-        <file_suffix>
-        {file_suffix}
-        <commit_before>
-        {code}
-        <commit_msg>
-        {user_request}
-        <commit_after>
+        Main task:
         """)
 
     _prompt_and_completion: str = ""
@@ -154,14 +156,19 @@ class DefaultModelEditCodeStep(Step):
     async def describe(self, models: Models) -> Coroutine[str, None, None]:
         description = await models.gpt35.complete(
             f"{self._prompt_and_completion}\n\nPlease give brief a description of the changes made above using markdown bullet points. Be concise and only mention changes made to the commit before, not prefix or suffix:")
-        return description
+        self.name = await models.gpt35.complete(f"Write a very short title to describe this requested change: '{self.user_input}'. This is the title:")
+        return f"`{self.user_input}`\n\n" + description
 
     async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]:
-        self.name = self.user_input
+        self.description = f"{self.user_input}"
         await sdk.update_ui()
 
         rif_with_contents = []
-        for range_in_file in self.range_in_files:
+        for range_in_file in map(lambda x: RangeInFile(
+            filepath=x.filepath,
+            # Only consider the range line-by-line. Maybe later don't if it's only a single line.
+            range=x.range.to_full_lines()
+        ), self.range_in_files):
             file_contents = await sdk.ide.readRangeInFile(range_in_file)
             rif_with_contents.append(
                 RangeInFileWithContents.from_range_in_file(range_in_file, file_contents))
@@ -174,7 +181,7 @@ class DefaultModelEditCodeStep(Step):
             await sdk.ide.setFileOpen(rif.filepath)
 
             model_to_use = sdk.models.default
-            
+
             full_file_contents = await sdk.ide.readFile(rif.filepath)
 
             full_file_contents_lst = full_file_contents.split("\n")
@@ -185,60 +192,178 @@ class DefaultModelEditCodeStep(Step):
             cur_end_line = len(full_file_contents_lst) - 1
 
             def cut_context(model_to_use, total_tokens, cur_start_line, cur_end_line):
-                        
+
                 if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]:
                     while cur_end_line > min_end_line:
-                        total_tokens -= model_to_use.count_tokens(full_file_contents_lst[cur_end_line])
+                        total_tokens -= model_to_use.count_tokens(
+                            full_file_contents_lst[cur_end_line])
                         cur_end_line -= 1
                         if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]:
                             return cur_start_line, cur_end_line
-                    
+
                     if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]:
                         while cur_start_line < max_start_line:
                             cur_start_line += 1
-                            total_tokens -= model_to_use.count_tokens(full_file_contents_lst[cur_end_line])
+                            total_tokens -= model_to_use.count_tokens(
+                                full_file_contents_lst[cur_end_line])
                             if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]:
                                 return cur_start_line, cur_end_line
-                            
+
                 return cur_start_line, cur_end_line
 
-            if model_to_use.name == "gpt-4":
+            # We don't know here all of the functions being passed in.
+            # We care because if this prompt itself goes over the limit, then the entire message will have to be cut from the completion.
+            # Overflow won't happen, but prune_chat_messages in count_tokens.py will cut out this whole thing, instead of us cutting out only as many lines as we need.
+            BUFFER_FOR_FUNCTIONS = 200
+            total_tokens = model_to_use.count_tokens(
+                full_file_contents + self._prompt + self.user_input) + DEFAULT_MAX_TOKENS + BUFFER_FOR_FUNCTIONS
+
+            model_to_use = sdk.models.default
+            if model_to_use.name == "gpt-3.5-turbo":
+                if total_tokens > MAX_TOKENS_FOR_MODEL["gpt-3.5-turbo"]:
+                    model_to_use = sdk.models.gpt3516k
+
+            cur_start_line, cur_end_line = cut_context(
+                model_to_use, total_tokens, cur_start_line, cur_end_line)
 
-                total_tokens = model_to_use.count_tokens(full_file_contents)
-                cur_start_line, cur_end_line = cut_context(model_to_use, total_tokens, cur_start_line, cur_end_line)
+            code_before = "\n".join(
+                full_file_contents_lst[cur_start_line:max_start_line])
+            code_after = "\n".join(
+                full_file_contents_lst[min_end_line:cur_end_line - 1])
 
-            elif model_to_use.name  == "gpt-3.5-turbo" or model_to_use.name == "gpt-3.5-turbo-16k":
+            segs = [code_before, code_after]
+            if segs[0].strip() == "":
+                segs[0] = segs[0].strip()
+            if segs[1].strip() == "":
+                segs[1] = segs[1].strip()
 
-                if sdk.models.gpt35.count_tokens(full_file_contents) > MAX_TOKENS_FOR_MODEL["gpt-3.5-turbo"]:
+            # Move any surrounding blank line in rif.contents to the prefix/suffix
+            if len(rif.contents) > 0:
+                first_line = rif.contents.splitlines(keepends=True)[0]
+                while first_line.strip() == "":
+                    segs[0] += first_line
+                    rif.contents = rif.contents[len(first_line):]
+                    first_line = rif.contents.splitlines(keepends=True)[0]
 
-                    model_to_use = sdk.models.gpt3516k
-                    total_tokens = model_to_use.count_tokens(full_file_contents)
-                    cur_start_line, cur_end_line = cut_context(model_to_use, total_tokens, cur_start_line, cur_end_line)
+                last_line = rif.contents.splitlines(keepends=True)[-1]
+                while last_line.strip() == "":
+                    segs[1] = last_line + segs[1]
+                    rif.contents = rif.contents[:len(
+                        rif.contents) - len(last_line)]
+                    last_line = rif.contents.splitlines(keepends=True)[-1]
 
-            else:
+                while rif.contents.startswith("\n"):
+                    segs[0] += "\n"
+                    rif.contents = rif.contents[1:]
+                while rif.contents.endswith("\n"):
+                    segs[1] = "\n" + segs[1]
+                    rif.contents = rif.contents[:-1]
 
-                raise Exception("Unknown default model")
-                      
-            code_before = "".join(full_file_contents_lst[cur_start_line:max_start_line])
-            code_after = "".join(full_file_contents_lst[min_end_line:cur_end_line])
+            # .format(code=rif.contents, user_request=self.user_input, file_prefix=segs[0], file_suffix=segs[1])
+            prompt = self._prompt
+            if segs[0].strip() != "":
+                prompt += dedent(f"""
+<file_prefix>
+{segs[0]}
+</file_prefix>""")
+            prompt += dedent(f"""
+<code_to_edit>
+{rif.contents}
+</code_to_edit>""")
+            if segs[1].strip() != "":
+                prompt += dedent(f"""
+<file_suffix>
+{segs[1]}
+</file_suffix>""")
+            prompt += dedent(f"""
+<user_request>
+{self.user_input}
+</user_request>
+<modified_code_to_edit>
+""")
 
-            segs = [code_before, code_after]
+            lines = []
+            unfinished_line = ""
+            i = 0
+            original_lines = rif.contents.split("\n")
 
-            prompt = self._prompt.format(
-                code=rif.contents, user_request=self.user_input, file_prefix=segs[0], file_suffix=segs[1])
+            async def add_line(i: int, line: str):
+                if i == 0:
+                    # First line indentation, because the model will assume that it is replacing in this way
+                    line = original_lines[0].replace(
+                        original_lines[0].strip(), "") + line
 
-            completion = str(await model_to_use.complete(prompt, with_history=await sdk.get_chat_context()))
+                if i < len(original_lines):
+                    # Replace original line
+                    range = Range.from_shorthand(
+                        rif.range.start.line + i, rif.range.start.character if i == 0 else 0, rif.range.start.line + i + 1, 0)
+                else:
+                    # Insert a line
+                    range = Range.from_shorthand(
+                        rif.range.start.line + i, 0, rif.range.start.line + i, 0)
 
-            eot_token = "<|endoftext|>"
-            completion = completion.removesuffix(eot_token)
+                await sdk.ide.applyFileSystemEdit(FileEdit(
+                    filepath=rif.filepath,
+                    range=range,
+                    replacement=line + "\n"
+                ))
 
-            # Remove tags and If it accidentally includes prefix or suffix, remove it
-            if completion.strip().startswith("```"):
-                completion = completion.strip().removeprefix("```").removesuffix("```")
-            completion = completion.replace("<file_prefix>", "").replace("<file_suffix>", "").replace(
-                "<commit_before>", "").replace("<commit_msg>", "").replace("<commit_after>", "")
-            completion = completion.removeprefix(segs[0])
-            completion = completion.removesuffix(segs[1])
+            lines_of_prefix_copied = 0
+            line_below_highlighted_range = segs[1].lstrip().split("\n")[0]
+            should_stop = False
+            async for chunk in model_to_use.stream_complete(prompt, with_history=await sdk.get_chat_context(), temperature=0):
+                if should_stop:
+                    break
+                chunk_lines = chunk.split("\n")
+                chunk_lines[0] = unfinished_line + chunk_lines[0]
+                if chunk.endswith("\n"):
+                    unfinished_line = ""
+                    chunk_lines.pop()  # because this will be an empty string
+                else:
+                    unfinished_line = chunk_lines.pop()
+                lines.extend(chunk_lines)
+
+                for line in chunk_lines:
+                    if "</modified_code_to_edit>" in line:
+                        break
+                    elif "```" in line or "<modified_code_to_edit>" in line or "<file_prefix>" in line or "</file_prefix>" in line or "<file_suffix>" in line or "</file_suffix>" in line or "<user_request>" in line or "</user_request>" in line or "<code_to_edit>" in line or "</code_to_edit>" in line:
+                        continue
+                    elif (lines_of_prefix_copied > 0 or i == 0) and lines_of_prefix_copied < len(segs[0].splitlines()) and line == full_file_contents_lst[lines_of_prefix_copied]:
+                        # This is a sketchy way of stopping it from repeating the file_prefix. Is a bug if output happens to have a matching line
+                        lines_of_prefix_copied += 1
+                        continue
+                    elif i < len(original_lines) and line == original_lines[i]:
+                        i += 1
+                        continue
+                    # Because really short lines might be expected to be repeated !heuristic!
+                    elif line.strip() == line_below_highlighted_range.strip() and len(line.strip()) > 4:
+                        should_stop = True
+                        break
+                    await add_line(i, line)
+                    i += 1
+
+            # Add the unfinished line
+            if unfinished_line != "":
+                unfinished_line = unfinished_line.replace(
+                    "</modified_code_to_edit>", "").replace("</code_to_edit>", "").replace("```", "").replace("</file_suffix>", "").replace("</file_prefix", "").replace(
+                    "<modified_code_to_edit>", "").replace("<code_to_edit>", "").replace("<file_suffix>", "").replace("<file_prefix", "")
+                if not i < len(original_lines) or not unfinished_line == original_lines[i]:
+                    await add_line(i, unfinished_line)
+                lines.append(unfinished_line)
+                i += 1
+
+            # Remove the leftover original lines
+            while i < len(original_lines):
+                range = Range.from_shorthand(
+                    rif.range.start.line + i, rif.range.start.character, rif.range.start.line + i, len(original_lines[i]) + 1)
+                await sdk.ide.applyFileSystemEdit(FileEdit(
+                    filepath=rif.filepath,
+                    range=range,
+                    replacement=""
+                ))
+                i += 1
+
+            completion = "\n".join(lines)
 
             self._prompt_and_completion += prompt + completion
 
@@ -256,16 +381,10 @@ class DefaultModelEditCodeStep(Step):
                 elif line.startswith(" "):
                     index += 1
 
-            await sdk.ide.applyFileSystemEdit(FileEdit(
-                filepath=rif.filepath,
-                range=rif.range,
-                replacement=completion
-            ))
-
             current_hl_start = None
             last_hl = None
             rifs_to_highlight = []
-            for line in sorted(list(lines_to_highlight)):
+            for line in lines_to_highlight:
                 if current_hl_start is None:
                     current_hl_start = line
                 elif line != last_hl + 1:
diff --git a/continuedev/src/continuedev/steps/main.py b/continuedev/src/continuedev/steps/main.py
index 5ba86c53..5caac180 100644
--- a/continuedev/src/continuedev/steps/main.py
+++ b/continuedev/src/continuedev/steps/main.py
@@ -1,7 +1,7 @@
 import os
 from typing import Coroutine, List, Union
 
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 
 from ..libs.llm import LLM
 from ..models.main import Traceback, Range
@@ -246,8 +246,10 @@ class StarCoderEditHighlightedCodeStep(Step):
 
 
 class EditHighlightedCodeStep(Step):
-    user_input: str
+    user_input: str = Field(
+        ..., title="User Input", description="The natural language request describing how to edit the code")
     hide = True
+    description: str = "Change the contents of the currently highlighted code or open file"
 
     async def describe(self, models: Models) -> Coroutine[str, None, None]:
         return "Editing code"
diff --git a/continuedev/src/continuedev/steps/on_traceback.py b/continuedev/src/continuedev/steps/on_traceback.py
index 053b4ef4..3f8c5a76 100644
--- a/continuedev/src/continuedev/steps/on_traceback.py
+++ b/continuedev/src/continuedev/steps/on_traceback.py
@@ -1,5 +1,5 @@
 import os
-from ..core.main import Step
+from ..core.main import ChatMessage, Step
 from ..core.sdk import ContinueSDK
 from .chat import SimpleChatStep
 
@@ -16,7 +16,11 @@ class DefaultOnTracebackStep(Step):
             for seg in segs:
                 if seg.startswith(os.path.sep) and os.path.exists(seg) and os.path.commonprefix([seg, sdk.ide.workspace_directory]) == sdk.ide.workspace_directory:
                     file_contents = await sdk.ide.readFile(seg)
-                    await sdk.add_chat_context(f"The contents of {seg}:\n```\n{file_contents}\n```", "", "user")
+                    self.chat_context.append(ChatMessage(
+                        role="user",
+                        content=f"The contents of {seg}:\n```\n{file_contents}\n```",
+                        summary=""
+                    ))
 
         await sdk.run_step(SimpleChatStep(
             name="Help With Traceback",