From 3ded151331933c9a1352cc46c3cc67c5733d1c86 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 16 Jul 2023 16:16:41 -0700 Subject: ggml --- continuedev/src/continuedev/core/sdk.py | 6 ++ continuedev/src/continuedev/libs/llm/ggml.py | 99 ++++++++++++++++++++++ .../src/continuedev/libs/util/count_tokens.py | 7 +- continuedev/src/continuedev/steps/chat.py | 2 +- continuedev/src/continuedev/steps/core/core.py | 8 +- 5 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 continuedev/src/continuedev/libs/llm/ggml.py (limited to 'continuedev') diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py index 8649cd58..22393746 100644 --- a/continuedev/src/continuedev/core/sdk.py +++ b/continuedev/src/continuedev/core/sdk.py @@ -11,6 +11,7 @@ from ..models.filesystem_edit import FileEdit, FileSystemEdit, AddFile, DeleteFi from ..models.filesystem import RangeInFile from ..libs.llm.hf_inference_api import HuggingFaceInferenceAPI from ..libs.llm.openai import OpenAI +from ..libs.llm.ggml import GGML from .observation import Observation from ..server.ide_protocol import AbstractIdeProtocolServer from .main import Context, ContinueCustomException, HighlightedRangeContext, History, Step, ChatMessage, ChatMessageRole @@ -59,6 +60,10 @@ class Models: def gpt4(self): return self.__load_openai_model("gpt-4") + @cached_property + def ggml(self): + return GGML("", "ggml") + def __model_from_name(self, model_name: str): if model_name == "starcoder": return self.starcoder @@ -73,6 +78,7 @@ class Models: @property def default(self): + return self.ggml default_model = self.sdk.config.default_model return self.__model_from_name(default_model) if default_model is not None else self.gpt35 diff --git a/continuedev/src/continuedev/libs/llm/ggml.py b/continuedev/src/continuedev/libs/llm/ggml.py new file mode 100644 index 00000000..bef0d993 --- /dev/null +++ b/continuedev/src/continuedev/libs/llm/ggml.py @@ -0,0 +1,99 @@ +from functools import cached_property +import json +from typing import Any, Coroutine, Dict, Generator, List, Union + +import aiohttp +from ...core.main import ChatMessage +import openai +from ..llm import LLM +from ..util.count_tokens import DEFAULT_MAX_TOKENS, compile_chat_messages, CHAT_MODELS, DEFAULT_ARGS, count_tokens, prune_raw_prompt_from_top +import certifi +import ssl + +ca_bundle_path = certifi.where() +ssl_context = ssl.create_default_context(cafile=ca_bundle_path) + +SERVER_URL = "http://localhost:8000" + + +class GGML(LLM): + api_key: str + default_model: str + + def __init__(self, api_key: str, default_model: str, system_message: str = None): + self.api_key = api_key + self.default_model = default_model + self.system_message = system_message + + openai.api_key = api_key + + @cached_property + def name(self): + return self.default_model + + @property + def default_args(self): + return {**DEFAULT_ARGS, "model": self.default_model} + + def count_tokens(self, text: str): + return count_tokens(self.default_model, text) + + async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + args = self.default_args.copy() + args.update(kwargs) + args["stream"] = True + + args = {**self.default_args, **kwargs} + messages = compile_chat_messages( + self.default_model, with_history, args["max_tokens"], prompt, functions=args.get("functions", None)) + + async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with session.post(f"{SERVER_URL}/v1/completions", json={ + "messages": messages, + **args + }) as resp: + async for line in resp.content.iter_any(): + if line: + try: + yield line.decode("utf-8") + except: + raise Exception(str(line)) + + async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + args = {**self.default_args, **kwargs} + messages = compile_chat_messages( + self.default_model, messages, args["max_tokens"], None, functions=args.get("functions", None)) + args["stream"] = True + + async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with session.post(f"{SERVER_URL}/v1/chat/completions", json={ + "messages": messages, + **args + }) as resp: + # This is streaming application/json instaed of text/event-stream + async for line in resp.content.iter_chunks(): + if line[1]: + try: + json_chunk = line[0].decode("utf-8") + if json_chunk.startswith(": ping - ") or json_chunk.startswith("data: [DONE]"): + continue + json_chunk = "{}" if json_chunk == "" else json_chunk + chunks = json_chunk.split("\n") + for chunk in chunks: + if chunk.strip() != "": + yield json.loads(chunk[6:])["choices"][0]["delta"] + except: + raise Exception(str(line[0])) + + async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + args = {**self.default_args, **kwargs} + + async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with session.post(f"{SERVER_URL}/v1/completions", json={ + "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None), + **args + }) as resp: + try: + return await resp.text() + except: + raise Exception(await resp.text()) diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py index 73be0717..e1baeca1 100644 --- a/continuedev/src/continuedev/libs/util/count_tokens.py +++ b/continuedev/src/continuedev/libs/util/count_tokens.py @@ -3,13 +3,16 @@ from typing import Dict, List, Union from ...core.main import ChatMessage import tiktoken -aliases = {} +aliases = { + "ggml": "gpt-3.5-turbo", +} DEFAULT_MAX_TOKENS = 2048 MAX_TOKENS_FOR_MODEL = { "gpt-3.5-turbo": 4096, "gpt-3.5-turbo-0613": 4096, "gpt-3.5-turbo-16k": 16384, - "gpt-4": 8192 + "gpt-4": 8192, + "ggml": 2048 } CHAT_MODELS = { "gpt-3.5-turbo", "gpt-3.5-turbo-16k", "gpt-4", "gpt-3.5-turbo-0613" diff --git a/continuedev/src/continuedev/steps/chat.py b/continuedev/src/continuedev/steps/chat.py index a10319d8..1df1e0bf 100644 --- a/continuedev/src/continuedev/steps/chat.py +++ b/continuedev/src/continuedev/steps/chat.py @@ -27,7 +27,7 @@ class SimpleChatStep(Step): async def run(self, sdk: ContinueSDK): completion = "" messages = self.messages or await sdk.get_chat_context() - async for chunk in sdk.models.gpt4.stream_chat(messages, temperature=0.5): + async for chunk in sdk.models.default.stream_chat(messages, temperature=0.5): if sdk.current_step_was_deleted(): return diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 4b35a758..0b067d7d 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -4,6 +4,7 @@ import subprocess from textwrap import dedent from typing import Coroutine, List, Literal, Union +from ...libs.llm.ggml import GGML from ...models.main import Range from ...libs.llm.prompt_utils import MarkdownStyleEncoderDecoder from ...models.filesystem_edit import EditDiff, FileEdit, FileEditWithFullContents, FileSystemEdit @@ -180,7 +181,7 @@ class DefaultModelEditCodeStep(Step): # We don't know here all of the functions being passed in. # We care because if this prompt itself goes over the limit, then the entire message will have to be cut from the completion. # Overflow won't happen, but prune_chat_messages in count_tokens.py will cut out this whole thing, instead of us cutting out only as many lines as we need. - model_to_use = sdk.models.gpt4 + model_to_use = sdk.models.default max_tokens = DEFAULT_MAX_TOKENS TOKENS_TO_BE_CONSIDERED_LARGE_RANGE = 1200 @@ -442,6 +443,11 @@ class DefaultModelEditCodeStep(Step): completion_lines_covered = 0 repeating_file_suffix = False line_below_highlighted_range = file_suffix.lstrip().split("\n")[0] + + if isinstance(model_to_use, GGML): + messages = [ChatMessage( + role="user", content=f"```\n{rif.contents}\n```\n{self.user_input}\n```\n", summary=self.user_input)] + async for chunk in model_to_use.stream_chat(messages, temperature=0, max_tokens=max_tokens): # Stop early if it is repeating the file_suffix or the step was deleted if repeating_file_suffix: -- cgit v1.2.3-70-g09d2 From a3f4a2a59d6785499f3ce0c4af80b57b02de1b1f Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 16 Jul 2023 16:55:24 -0700 Subject: better prompt for editing --- continuedev/src/continuedev/core/config.py | 2 +- continuedev/src/continuedev/core/sdk.py | 5 ++-- continuedev/src/continuedev/libs/llm/ggml.py | 33 ++++++++------------------ continuedev/src/continuedev/steps/core/core.py | 5 ++-- 4 files changed, 17 insertions(+), 28 deletions(-) (limited to 'continuedev') diff --git a/continuedev/src/continuedev/core/config.py b/continuedev/src/continuedev/core/config.py index 6e430c04..957609c5 100644 --- a/continuedev/src/continuedev/core/config.py +++ b/continuedev/src/continuedev/core/config.py @@ -76,7 +76,7 @@ class ContinueConfig(BaseModel): server_url: Optional[str] = None allow_anonymous_telemetry: Optional[bool] = True default_model: Literal["gpt-3.5-turbo", "gpt-3.5-turbo-16k", - "gpt-4"] = 'gpt-4' + "gpt-4", "ggml"] = 'gpt-4' custom_commands: Optional[List[CustomCommand]] = [CustomCommand( name="test", description="This is an example custom command. Use /config to edit it and create more", diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py index 9389e1e9..eb60109c 100644 --- a/continuedev/src/continuedev/core/sdk.py +++ b/continuedev/src/continuedev/core/sdk.py @@ -82,7 +82,7 @@ class Models: @cached_property def ggml(self): - return GGML("", "ggml") + return GGML() def __model_from_name(self, model_name: str): if model_name == "starcoder": @@ -93,12 +93,13 @@ class Models: return self.gpt3516k elif model_name == "gpt-4": return self.gpt4 + elif model_name == "ggml": + return self.ggml else: raise Exception(f"Unknown model {model_name}") @property def default(self): - return self.ggml default_model = self.sdk.config.default_model return self.__model_from_name(default_model) if default_model is not None else self.gpt4 diff --git a/continuedev/src/continuedev/libs/llm/ggml.py b/continuedev/src/continuedev/libs/llm/ggml.py index bef0d993..d3589b70 100644 --- a/continuedev/src/continuedev/libs/llm/ggml.py +++ b/continuedev/src/continuedev/libs/llm/ggml.py @@ -4,39 +4,27 @@ from typing import Any, Coroutine, Dict, Generator, List, Union import aiohttp from ...core.main import ChatMessage -import openai from ..llm import LLM -from ..util.count_tokens import DEFAULT_MAX_TOKENS, compile_chat_messages, CHAT_MODELS, DEFAULT_ARGS, count_tokens, prune_raw_prompt_from_top -import certifi -import ssl - -ca_bundle_path = certifi.where() -ssl_context = ssl.create_default_context(cafile=ca_bundle_path) +from ..util.count_tokens import compile_chat_messages, DEFAULT_ARGS, count_tokens SERVER_URL = "http://localhost:8000" class GGML(LLM): - api_key: str - default_model: str - def __init__(self, api_key: str, default_model: str, system_message: str = None): - self.api_key = api_key - self.default_model = default_model + def __init__(self, system_message: str = None): self.system_message = system_message - openai.api_key = api_key - @cached_property def name(self): - return self.default_model + return "ggml" @property def default_args(self): - return {**DEFAULT_ARGS, "model": self.default_model} + return {**DEFAULT_ARGS, "model": self.name, "max_tokens": 1024} def count_tokens(self, text: str): - return count_tokens(self.default_model, text) + return count_tokens(self.name, text) async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() @@ -45,9 +33,9 @@ class GGML(LLM): args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.default_model, with_history, args["max_tokens"], prompt, functions=args.get("functions", None)) + self.name, with_history, args["max_tokens"], prompt, functions=args.get("functions", None)) - async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with aiohttp.ClientSession() as session: async with session.post(f"{SERVER_URL}/v1/completions", json={ "messages": messages, **args @@ -62,10 +50,10 @@ class GGML(LLM): async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.default_model, messages, args["max_tokens"], None, functions=args.get("functions", None)) + self.name, messages, args["max_tokens"], None, functions=args.get("functions", None)) args["stream"] = True - async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with aiohttp.ClientSession() as session: async with session.post(f"{SERVER_URL}/v1/chat/completions", json={ "messages": messages, **args @@ -77,7 +65,6 @@ class GGML(LLM): json_chunk = line[0].decode("utf-8") if json_chunk.startswith(": ping - ") or json_chunk.startswith("data: [DONE]"): continue - json_chunk = "{}" if json_chunk == "" else json_chunk chunks = json_chunk.split("\n") for chunk in chunks: if chunk.strip() != "": @@ -88,7 +75,7 @@ class GGML(LLM): async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: args = {**self.default_args, **kwargs} - async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: + async with aiohttp.ClientSession() as session: async with session.post(f"{SERVER_URL}/v1/completions", json={ "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None), **args diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 2c9d8c01..d5a7cd9a 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -192,7 +192,8 @@ class DefaultModelEditCodeStep(Step): # We care because if this prompt itself goes over the limit, then the entire message will have to be cut from the completion. # Overflow won't happen, but prune_chat_messages in count_tokens.py will cut out this whole thing, instead of us cutting out only as many lines as we need. model_to_use = sdk.models.default - max_tokens = DEFAULT_MAX_TOKENS + max_tokens = MAX_TOKENS_FOR_MODEL.get( + model_to_use.name, DEFAULT_MAX_TOKENS) / 2 TOKENS_TO_BE_CONSIDERED_LARGE_RANGE = 1200 if model_to_use.count_tokens(rif.contents) > TOKENS_TO_BE_CONSIDERED_LARGE_RANGE: @@ -498,7 +499,7 @@ Please output the code to be inserted at the cursor in order to fulfill the user if isinstance(model_to_use, GGML): messages = [ChatMessage( - role="user", content=f"```\n{rif.contents}\n```\n{self.user_input}\n```\n", summary=self.user_input)] + role="user", content=f"```\n{rif.contents}\n```\n\nUser request: \"{self.user_input}\"\n\nThis is the code after changing to perfectly comply with the user request. It does not include any placeholder code, only real implementations:\n\n```\n", summary=self.user_input)] generator = model_to_use.stream_chat( messages, temperature=0, max_tokens=max_tokens) -- cgit v1.2.3-70-g09d2 From 2c27204336a2ca112c3c84058544d3b8656ac70f Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Sun, 16 Jul 2023 22:12:44 -0700 Subject: templated system messages --- continuedev/poetry.lock | 65 +++++++++++++++++++++- continuedev/pyproject.toml | 3 +- continuedev/src/continuedev/core/config.py | 1 + continuedev/src/continuedev/core/sdk.py | 10 ++-- continuedev/src/continuedev/libs/llm/ggml.py | 6 +- .../src/continuedev/libs/llm/hf_inference_api.py | 3 +- continuedev/src/continuedev/libs/llm/openai.py | 6 +- .../src/continuedev/libs/llm/proxy_server.py | 6 +- .../src/continuedev/libs/util/count_tokens.py | 7 ++- .../src/continuedev/libs/util/templating.py | 39 +++++++++++++ 10 files changed, 127 insertions(+), 19 deletions(-) create mode 100644 continuedev/src/continuedev/libs/util/templating.py (limited to 'continuedev') diff --git a/continuedev/poetry.lock b/continuedev/poetry.lock index a49a570f..625aabc9 100644 --- a/continuedev/poetry.lock +++ b/continuedev/poetry.lock @@ -297,6 +297,18 @@ files = [ {file = "charset_normalizer-3.1.0-py3-none-any.whl", hash = "sha256:3d9098b479e78c85080c98e1e35ff40b4a31d8953102bb0fd7d1b6f8a2111a3d"}, ] +[[package]] +name = "chevron" +version = "0.14.0" +description = "Mustache templating language renderer" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443"}, + {file = "chevron-0.14.0.tar.gz", hash = "sha256:87613aafdf6d77b6a90ff073165a61ae5086e21ad49057aa0e53681601800ebf"}, +] + [[package]] name = "click" version = "8.1.3" @@ -600,6 +612,25 @@ files = [ {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, ] +[[package]] +name = "importlib-resources" +version = "6.0.0" +description = "Read resources from Python packages" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "importlib_resources-6.0.0-py3-none-any.whl", hash = "sha256:d952faee11004c045f785bb5636e8f885bed30dc3c940d5d42798a2a4541c185"}, + {file = "importlib_resources-6.0.0.tar.gz", hash = "sha256:4cf94875a8368bd89531a756df9a9ebe1f150e0f885030b461237bc7f2d905f2"}, +] + +[package.dependencies] +zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + [[package]] name = "jsonref" version = "1.1.0" @@ -626,6 +657,8 @@ files = [ [package.dependencies] attrs = ">=17.4.0" +importlib-resources = {version = ">=1.4.0", markers = "python_version < \"3.9\""} +pkgutil-resolve-name = {version = ">=1.3.10", markers = "python_version < \"3.9\""} pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2" [package.extras] @@ -1024,6 +1057,18 @@ sql-other = ["SQLAlchemy (>=1.4.16)"] test = ["hypothesis (>=6.34.2)", "pytest (>=7.0.0)", "pytest-asyncio (>=0.17.0)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.6.3)"] +[[package]] +name = "pkgutil-resolve-name" +version = "1.3.10" +description = "Resolve a name to an object." +category = "main" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pkgutil_resolve_name-1.3.10-py3-none-any.whl", hash = "sha256:ca27cc078d25c5ad71a9de0a7a330146c4e014c2462d9af19c6b828280649c5e"}, + {file = "pkgutil_resolve_name-1.3.10.tar.gz", hash = "sha256:357d6c9e6a755653cfd78893817c0853af365dd51ec97f3d358a819373bbd174"}, +] + [[package]] name = "posthog" version = "3.0.1" @@ -1818,7 +1863,23 @@ files = [ idna = ">=2.0" multidict = ">=4.0" +[[package]] +name = "zipp" +version = "3.16.2" +description = "Backport of pathlib-compatible object wrapper for zip files" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, +] + +[package.extras] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] + [metadata] lock-version = "2.0" -python-versions = "^3.9" -content-hash = "3ba2a7278fda36a059d76e227be94b0cb5e2efc9396b47a9642b916680214d9f" +python-versions = "^3.8.1" +content-hash = "82510deb9f4afb5bc38db0dfd88ad88005fa0b6221c24e8c1700c006360f3f88" diff --git a/continuedev/pyproject.toml b/continuedev/pyproject.toml index 6727e29a..3077de1c 100644 --- a/continuedev/pyproject.toml +++ b/continuedev/pyproject.toml @@ -6,7 +6,7 @@ authors = ["Nate Sesti "] readme = "README.md" [tool.poetry.dependencies] -python = "^3.8" +python = "^3.8.1" diff-match-patch = "^20230430" fastapi = "^0.95.1" typer = "^0.7.0" @@ -24,6 +24,7 @@ tiktoken = "^0.4.0" jsonref = "^1.1.0" jsonschema = "^4.17.3" directory-tree = "^0.0.3.1" +chevron = "^0.14.0" [tool.poetry.scripts] typegen = "src.continuedev.models.generate_json_schema:main" diff --git a/continuedev/src/continuedev/core/config.py b/continuedev/src/continuedev/core/config.py index 957609c5..91a47c8e 100644 --- a/continuedev/src/continuedev/core/config.py +++ b/continuedev/src/continuedev/core/config.py @@ -85,6 +85,7 @@ class ContinueConfig(BaseModel): slash_commands: Optional[List[SlashCommand]] = DEFAULT_SLASH_COMMANDS on_traceback: Optional[List[OnTracebackSteps]] = [ OnTracebackSteps(step_name="DefaultOnTracebackStep")] + system_message: Optional[str] = None # Want to force these to be the slash commands for now @validator('slash_commands', pre=True) diff --git a/continuedev/src/continuedev/core/sdk.py b/continuedev/src/continuedev/core/sdk.py index eb60109c..ac57c122 100644 --- a/continuedev/src/continuedev/core/sdk.py +++ b/continuedev/src/continuedev/core/sdk.py @@ -34,10 +34,12 @@ MODEL_PROVIDER_TO_ENV_VAR = { class Models: provider_keys: Dict[ModelProvider, str] = {} model_providers: List[ModelProvider] + system_message: str def __init__(self, sdk: "ContinueSDK", model_providers: List[ModelProvider]): self.sdk = sdk self.model_providers = model_providers + self.system_message = sdk.config.system_message @classmethod async def create(cls, sdk: "ContinueSDK", with_providers: List[ModelProvider] = ["openai"]) -> "Models": @@ -53,12 +55,12 @@ class Models: def __load_openai_model(self, model: str) -> OpenAI: api_key = self.provider_keys["openai"] if api_key == "": - return ProxyServer(self.sdk.ide.unique_id, model) - return OpenAI(api_key=api_key, default_model=model) + return ProxyServer(self.sdk.ide.unique_id, model, system_message=self.system_message) + return OpenAI(api_key=api_key, default_model=model, system_message=self.system_message) def __load_hf_inference_api_model(self, model: str) -> HuggingFaceInferenceAPI: api_key = self.provider_keys["hf_inference_api"] - return HuggingFaceInferenceAPI(api_key=api_key, model=model) + return HuggingFaceInferenceAPI(api_key=api_key, model=model, system_message=self.system_message) @cached_property def starcoder(self): @@ -82,7 +84,7 @@ class Models: @cached_property def ggml(self): - return GGML() + return GGML(system_message=self.system_message) def __model_from_name(self, model_name: str): if model_name == "starcoder": diff --git a/continuedev/src/continuedev/libs/llm/ggml.py b/continuedev/src/continuedev/libs/llm/ggml.py index d3589b70..6007fdb4 100644 --- a/continuedev/src/continuedev/libs/llm/ggml.py +++ b/continuedev/src/continuedev/libs/llm/ggml.py @@ -33,7 +33,7 @@ class GGML(LLM): args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.name, with_history, args["max_tokens"], prompt, functions=args.get("functions", None)) + self.name, with_history, args["max_tokens"], prompt, functions=args.get("functions", None), system_message=self.system_message) async with aiohttp.ClientSession() as session: async with session.post(f"{SERVER_URL}/v1/completions", json={ @@ -50,7 +50,7 @@ class GGML(LLM): async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.name, messages, args["max_tokens"], None, functions=args.get("functions", None)) + self.name, messages, args["max_tokens"], None, functions=args.get("functions", None), system_message=self.system_message) args["stream"] = True async with aiohttp.ClientSession() as session: @@ -77,7 +77,7 @@ class GGML(LLM): async with aiohttp.ClientSession() as session: async with session.post(f"{SERVER_URL}/v1/completions", json={ - "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None), + "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None, system_message=self.system_message), **args }) as resp: try: diff --git a/continuedev/src/continuedev/libs/llm/hf_inference_api.py b/continuedev/src/continuedev/libs/llm/hf_inference_api.py index 803ba122..7e11fbbe 100644 --- a/continuedev/src/continuedev/libs/llm/hf_inference_api.py +++ b/continuedev/src/continuedev/libs/llm/hf_inference_api.py @@ -11,9 +11,10 @@ class HuggingFaceInferenceAPI(LLM): api_key: str model: str - def __init__(self, api_key: str, model: str): + def __init__(self, api_key: str, model: str, system_message: str = None): self.api_key = api_key self.model = model + self.system_message = system_message # TODO: Nothing being done with this def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs): """Return the completion of the text with the given temperature.""" diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py index f0877d90..d973f19e 100644 --- a/continuedev/src/continuedev/libs/llm/openai.py +++ b/continuedev/src/continuedev/libs/llm/openai.py @@ -37,7 +37,7 @@ class OpenAI(LLM): if args["model"] in CHAT_MODELS: async for chunk in await openai.ChatCompletion.acreate( messages=compile_chat_messages( - args["model"], with_history, args["max_tokens"], prompt, functions=None), + args["model"], with_history, args["max_tokens"], prompt, functions=None, system_message=self.system_message), **args, ): if "content" in chunk.choices[0].delta: @@ -58,7 +58,7 @@ class OpenAI(LLM): async for chunk in await openai.ChatCompletion.acreate( messages=compile_chat_messages( - args["model"], messages, args["max_tokens"], functions=args.get("functions", None)), + args["model"], messages, args["max_tokens"], functions=args.get("functions", None), system_message=self.system_message), **args, ): yield chunk.choices[0].delta @@ -69,7 +69,7 @@ class OpenAI(LLM): if args["model"] in CHAT_MODELS: resp = (await openai.ChatCompletion.acreate( messages=compile_chat_messages( - args["model"], with_history, args["max_tokens"], prompt, functions=None), + args["model"], with_history, args["max_tokens"], prompt, functions=None, system_message=self.system_message), **args, )).choices[0].message.content else: diff --git a/continuedev/src/continuedev/libs/llm/proxy_server.py b/continuedev/src/continuedev/libs/llm/proxy_server.py index eab6e441..3ec492f3 100644 --- a/continuedev/src/continuedev/libs/llm/proxy_server.py +++ b/continuedev/src/continuedev/libs/llm/proxy_server.py @@ -38,7 +38,7 @@ class ProxyServer(LLM): async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: async with session.post(f"{SERVER_URL}/complete", json={ - "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None), + "messages": compile_chat_messages(args["model"], with_history, args["max_tokens"], prompt, functions=None, system_message=self.system_message), "unique_id": self.unique_id, **args }) as resp: @@ -50,7 +50,7 @@ class ProxyServer(LLM): async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, Generator[Union[Any, List, Dict], None, None]]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.default_model, messages, args["max_tokens"], None, functions=args.get("functions", None)) + self.default_model, messages, args["max_tokens"], None, functions=args.get("functions", None), system_message=self.system_message) async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: async with session.post(f"{SERVER_URL}/stream_chat", json={ @@ -74,7 +74,7 @@ class ProxyServer(LLM): async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( - self.default_model, with_history, args["max_tokens"], prompt, functions=args.get("functions", None)) + self.default_model, with_history, args["max_tokens"], prompt, functions=args.get("functions", None), system_message=self.system_message) async with aiohttp.ClientSession(connector=aiohttp.TCPConnector(ssl_context=ssl_context)) as session: async with session.post(f"{SERVER_URL}/stream_complete", json={ diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py index e1baeca1..1ca98fe6 100644 --- a/continuedev/src/continuedev/libs/util/count_tokens.py +++ b/continuedev/src/continuedev/libs/util/count_tokens.py @@ -1,6 +1,7 @@ import json from typing import Dict, List, Union from ...core.main import ChatMessage +from .templating import render_system_message import tiktoken aliases = { @@ -85,13 +86,15 @@ def compile_chat_messages(model: str, msgs: List[ChatMessage], max_tokens: int, for function in functions: prompt_tokens += count_tokens(model, json.dumps(function)) + rendered_system_message = render_system_message(system_message) + msgs = prune_chat_history(model, - msgs, MAX_TOKENS_FOR_MODEL[model], prompt_tokens + max_tokens + count_tokens(model, system_message)) + msgs, MAX_TOKENS_FOR_MODEL[model], prompt_tokens + max_tokens + count_tokens(model, rendered_system_message)) history = [] if system_message: history.append({ "role": "system", - "content": system_message + "content": rendered_system_message }) history += [msg.to_dict(with_functions=functions is not None) for msg in msgs] diff --git a/continuedev/src/continuedev/libs/util/templating.py b/continuedev/src/continuedev/libs/util/templating.py new file mode 100644 index 00000000..ebfc2e31 --- /dev/null +++ b/continuedev/src/continuedev/libs/util/templating.py @@ -0,0 +1,39 @@ +import os +import chevron + + +def get_vars_in_template(template): + """ + Get the variables in a template + """ + return [token[1] for token in chevron.tokenizer.tokenize(template) if token[0] == 'variable'] + + +def escape_var(var: str) -> str: + """ + Escape a variable so it can be used in a template + """ + return var.replace(os.path.sep, '').replace('.', '') + + +def render_system_message(system_message: str) -> str: + """ + Render system message with mustache syntax. + Right now it only supports rendering absolute file paths as their contents. + """ + vars = get_vars_in_template(system_message) + + args = {} + for var in vars: + if var.startswith(os.path.sep): + # Escape vars which are filenames, because mustache doesn't allow / in variable names + escaped_var = escape_var(var) + system_message = system_message.replace( + var, escaped_var) + + if os.path.exists(var): + args[escaped_var] = open(var, 'r').read() + else: + args[escaped_var] = '' + + return chevron.render(system_message, args) -- cgit v1.2.3-70-g09d2 From bfea0307075308eabc1a91283e56ab3b52ea880c Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Mon, 17 Jul 2023 10:19:54 -0500 Subject: use difflib to give edit change description --- continuedev/src/continuedev/steps/core/core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'continuedev') diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index d5a7cd9a..41988000 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -1,6 +1,7 @@ # These steps are depended upon by ContinueSDK import os import subprocess +import difflib from textwrap import dedent from typing import Coroutine, List, Literal, Union @@ -172,13 +173,12 @@ class DefaultModelEditCodeStep(Step): if self._previous_contents.strip() == self._new_contents.strip(): description = "No edits were made" else: + changes = '\n'.join(difflib.ndiff(self._previous_contents.splitlines(), self._new_contents.splitlines())) description = await models.gpt3516k.complete(dedent(f"""\ - ```original - {self._previous_contents} - ``` + Diff summary: "{self.user_input}" - ```new - {self._new_contents} + ```diff + {changes} ``` Please give brief a description of the changes made above using markdown bullet points. Be concise:""")) -- cgit v1.2.3-70-g09d2 From f509a2b30d8bee581d1bfd91586acc54e9209599 Mon Sep 17 00:00:00 2001 From: Ty Dunn Date: Mon, 17 Jul 2023 11:36:21 -0500 Subject: align on `code section` --- continuedev/src/continuedev/core/policy.py | 2 +- extension/react-app/src/components/ComboBox.tsx | 6 +++--- extension/react-app/src/components/PillButton.tsx | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'continuedev') diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index bc897357..d007c92b 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -58,7 +58,7 @@ class DemoPolicy(Policy): if history.get_current() is None: return ( MessageStep(name="Welcome to Continue", message=dedent("""\ - - Highlight code and ask a question or give instructions + - Highlight code section and ask a question or give instructions - Use `cmd+m` (Mac) / `ctrl+m` (Windows) to open Continue - Use `/help` to ask questions about how to use Continue""")) >> WelcomeStep() >> diff --git a/extension/react-app/src/components/ComboBox.tsx b/extension/react-app/src/components/ComboBox.tsx index dbebd534..8136399a 100644 --- a/extension/react-app/src/components/ComboBox.tsx +++ b/extension/react-app/src/components/ComboBox.tsx @@ -245,11 +245,11 @@ const ComboBox = React.forwardRef((props: ComboBoxProps, ref) => { props.onToggleAddContext(); }} > - Highlight to Add Context + Highlight code section ) : ( { props.onToggleAddContext(); }} @@ -261,7 +261,7 @@ const ComboBox = React.forwardRef((props: ComboBoxProps, ref) => {