From dc90631c443db710e1c92a556497e403d9f9b8be Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Thu, 20 Jul 2023 12:19:56 -0700 Subject: fix mutable default arg with_history bug --- continuedev/src/continuedev/libs/llm/__init__.py | 6 +++--- continuedev/src/continuedev/libs/llm/anthropic.py | 6 +++--- continuedev/src/continuedev/libs/llm/ggml.py | 6 +++--- continuedev/src/continuedev/libs/llm/hf_inference_api.py | 2 +- continuedev/src/continuedev/libs/llm/openai.py | 6 +++--- continuedev/src/continuedev/libs/llm/proxy_server.py | 6 +++--- continuedev/src/continuedev/libs/util/count_tokens.py | 14 ++++++++------ 7 files changed, 24 insertions(+), 22 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/libs/llm/__init__.py b/continuedev/src/continuedev/libs/llm/__init__.py index 4c4de213..2766db4b 100644 --- a/continuedev/src/continuedev/libs/llm/__init__.py +++ b/continuedev/src/continuedev/libs/llm/__init__.py @@ -9,15 +9,15 @@ from pydantic import BaseModel class LLM(ABC): system_message: Union[str, None] = None - async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + async def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, str]: """Return the completion of the text with the given temperature.""" raise NotImplementedError - def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + def stream_complete(self, prompt, with_history: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: """Stream the completion through generator.""" raise NotImplementedError - async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_chat(self, messages: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: """Stream the chat through generator.""" raise NotImplementedError diff --git a/continuedev/src/continuedev/libs/llm/anthropic.py b/continuedev/src/continuedev/libs/llm/anthropic.py index c82895c6..625d4e57 100644 --- a/continuedev/src/continuedev/libs/llm/anthropic.py +++ b/continuedev/src/continuedev/libs/llm/anthropic.py @@ -54,7 +54,7 @@ class AnthropicLLM(LLM): prompt += AI_PROMPT return prompt - async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_complete(self, prompt, with_history: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() args.update(kwargs) args["stream"] = True @@ -66,7 +66,7 @@ class AnthropicLLM(LLM): ): yield chunk.completion - async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_chat(self, messages: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() args.update(kwargs) args["stream"] = True @@ -83,7 +83,7 @@ class AnthropicLLM(LLM): "content": chunk.completion } - async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + async def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, str]: args = {**self.default_args, **kwargs} args = self._transform_args(args) diff --git a/continuedev/src/continuedev/libs/llm/ggml.py b/continuedev/src/continuedev/libs/llm/ggml.py index 6007fdb4..4889a556 100644 --- a/continuedev/src/continuedev/libs/llm/ggml.py +++ b/continuedev/src/continuedev/libs/llm/ggml.py @@ -26,7 +26,7 @@ class GGML(LLM): def count_tokens(self, text: str): return count_tokens(self.name, text) - async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_complete(self, prompt, with_history: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() args.update(kwargs) args["stream"] = True @@ -47,7 +47,7 @@ class GGML(LLM): except: raise Exception(str(line)) - async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_chat(self, messages: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( self.name, messages, args["max_tokens"], None, functions=args.get("functions", None), system_message=self.system_message) @@ -72,7 +72,7 @@ class GGML(LLM): except: raise Exception(str(line[0])) - async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + async def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, str]: args = {**self.default_args, **kwargs} async with aiohttp.ClientSession() as session: diff --git a/continuedev/src/continuedev/libs/llm/hf_inference_api.py b/continuedev/src/continuedev/libs/llm/hf_inference_api.py index 7e11fbbe..36f03270 100644 --- a/continuedev/src/continuedev/libs/llm/hf_inference_api.py +++ b/continuedev/src/continuedev/libs/llm/hf_inference_api.py @@ -16,7 +16,7 @@ class HuggingFaceInferenceAPI(LLM): self.model = model self.system_message = system_message # TODO: Nothing being done with this - def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs): + def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs): """Return the completion of the text with the given temperature.""" API_URL = f"https://api-inference.huggingface.co/models/{self.model}" headers = { diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py index 64bb39a2..96a4ab71 100644 --- a/continuedev/src/continuedev/libs/llm/openai.py +++ b/continuedev/src/continuedev/libs/llm/openai.py @@ -42,7 +42,7 @@ class OpenAI(LLM): def count_tokens(self, text: str): return count_tokens(self.default_model, text) - async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_complete(self, prompt, with_history: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() args.update(kwargs) args["stream"] = True @@ -72,7 +72,7 @@ class OpenAI(LLM): self.write_log(f"Completion:\n\n{completion}") - async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_chat(self, messages: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = self.default_args.copy() args.update(kwargs) args["stream"] = True @@ -93,7 +93,7 @@ class OpenAI(LLM): completion += chunk.choices[0].delta.content self.write_log(f"Completion: \n\n{completion}") - async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + async def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, str]: args = {**self.default_args, **kwargs} if args["model"] in CHAT_MODELS: diff --git a/continuedev/src/continuedev/libs/llm/proxy_server.py b/continuedev/src/continuedev/libs/llm/proxy_server.py index bd50fe02..b1bb8f06 100644 --- a/continuedev/src/continuedev/libs/llm/proxy_server.py +++ b/continuedev/src/continuedev/libs/llm/proxy_server.py @@ -38,7 +38,7 @@ class ProxyServer(LLM): def count_tokens(self, text: str): return count_tokens(self.default_model, text) - async def complete(self, prompt: str, with_history: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, str]: + async def complete(self, prompt: str, with_history: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, str]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( @@ -57,7 +57,7 @@ class ProxyServer(LLM): except: raise Exception(await resp.text()) - async def stream_chat(self, messages: List[ChatMessage] = [], **kwargs) -> Coroutine[Any, Any, Generator[Union[Any, List, Dict], None, None]]: + async def stream_chat(self, messages: List[ChatMessage] = None, **kwargs) -> Coroutine[Any, Any, Generator[Union[Any, List, Dict], None, None]]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( args["model"], messages, args["max_tokens"], None, functions=args.get("functions", None), system_message=self.system_message) @@ -89,7 +89,7 @@ class ProxyServer(LLM): self.write_log(f"Completion: \n\n{completion}") - async def stream_complete(self, prompt, with_history: List[ChatMessage] = [], **kwargs) -> Generator[Union[Any, List, Dict], None, None]: + async def stream_complete(self, prompt, with_history: List[ChatMessage] = None, **kwargs) -> Generator[Union[Any, List, Dict], None, None]: args = {**self.default_args, **kwargs} messages = compile_chat_messages( self.default_model, with_history, args["max_tokens"], prompt, functions=args.get("functions", None), system_message=self.system_message) diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py index 987aa722..6e0a3b88 100644 --- a/continuedev/src/continuedev/libs/util/count_tokens.py +++ b/continuedev/src/continuedev/libs/util/count_tokens.py @@ -101,13 +101,15 @@ def prune_chat_history(model: str, chat_history: List[ChatMessage], max_tokens: TOKEN_BUFFER_FOR_SAFETY = 100 -def compile_chat_messages(model: str, msgs: List[ChatMessage], max_tokens: int, prompt: Union[str, None] = None, functions: Union[List, None] = None, system_message: Union[str, None] = None) -> List[Dict]: +def compile_chat_messages(model: str, msgs: Union[List[ChatMessage], None], max_tokens: int, prompt: Union[str, None] = None, functions: Union[List, None] = None, system_message: Union[str, None] = None) -> List[Dict]: """ The total number of tokens is system_message + sum(msgs) + functions + prompt after it is converted to a message """ + msgs_copy = msgs.copy() if msgs is not None else [] + if prompt is not None: prompt_msg = ChatMessage(role="user", content=prompt, summary=prompt) - msgs += [prompt_msg] + msgs_copy += [prompt_msg] if system_message is not None: # NOTE: System message takes second precedence to user prompt, so it is placed just before @@ -116,7 +118,7 @@ def compile_chat_messages(model: str, msgs: List[ChatMessage], max_tokens: int, system_chat_msg = ChatMessage( role="system", content=rendered_system_message, summary=rendered_system_message) # insert at second-to-last position - msgs.insert(-1, system_chat_msg) + msgs_copy.insert(-1, system_chat_msg) # Add tokens from functions function_tokens = 0 @@ -124,11 +126,11 @@ def compile_chat_messages(model: str, msgs: List[ChatMessage], max_tokens: int, for function in functions: function_tokens += count_tokens(model, json.dumps(function)) - msgs = prune_chat_history( - model, msgs, MAX_TOKENS_FOR_MODEL[model], function_tokens + max_tokens + TOKEN_BUFFER_FOR_SAFETY) + msgs_copy = prune_chat_history( + model, msgs_copy, MAX_TOKENS_FOR_MODEL[model], function_tokens + max_tokens + TOKEN_BUFFER_FOR_SAFETY) history = [msg.to_dict(with_functions=functions is not None) - for msg in msgs] + for msg in msgs_copy] # Move system message back to start if system_message is not None and len(history) >= 2 and history[-2]["role"] == "system": -- cgit v1.2.3-70-g09d2 From 00efacfc3df025f359a8aac86dad8b273d5fd350 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Thu, 20 Jul 2023 16:30:30 -0700 Subject: deep copy --- continuedev/src/continuedev/libs/llm/openai.py | 2 +- continuedev/src/continuedev/libs/util/count_tokens.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/libs/llm/openai.py b/continuedev/src/continuedev/libs/llm/openai.py index 96a4ab71..a0773c1d 100644 --- a/continuedev/src/continuedev/libs/llm/openai.py +++ b/continuedev/src/continuedev/libs/llm/openai.py @@ -81,7 +81,7 @@ class OpenAI(LLM): del args["functions"] messages = compile_chat_messages( - args["model"], messages, args["max_tokens"], functions=args.get("functions", None), system_message=self.system_message) + args["model"], messages, args["max_tokens"], None, functions=args.get("functions", None), system_message=self.system_message) self.write_log(f"Prompt: \n\n{format_chat_messages(messages)}") completion = "" async for chunk in await openai.ChatCompletion.acreate( diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py index 6e0a3b88..cea91470 100644 --- a/continuedev/src/continuedev/libs/util/count_tokens.py +++ b/continuedev/src/continuedev/libs/util/count_tokens.py @@ -105,7 +105,8 @@ def compile_chat_messages(model: str, msgs: Union[List[ChatMessage], None], max_ """ The total number of tokens is system_message + sum(msgs) + functions + prompt after it is converted to a message """ - msgs_copy = msgs.copy() if msgs is not None else [] + msgs_copy = [msg.copy(deep=True) + for msg in msgs] if msgs is not None else [] if prompt is not None: prompt_msg = ChatMessage(role="user", content=prompt, summary=prompt) -- cgit v1.2.3-70-g09d2 From 20b399a20ed36815e40fda292f0bfb1e1b30aed8 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Thu, 20 Jul 2023 16:40:40 -0700 Subject: don't summarize last user message --- continuedev/src/continuedev/libs/util/count_tokens.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/libs/util/count_tokens.py b/continuedev/src/continuedev/libs/util/count_tokens.py index cea91470..c58ae499 100644 --- a/continuedev/src/continuedev/libs/util/count_tokens.py +++ b/continuedev/src/continuedev/libs/util/count_tokens.py @@ -73,9 +73,9 @@ def prune_chat_history(model: str, chat_history: List[ChatMessage], max_tokens: message = chat_history.pop(0) total_tokens -= count_tokens(model, message.content) - # 3. Truncate message in the last 5 + # 3. Truncate message in the last 5, except last 1 i = 0 - while total_tokens > max_tokens and len(chat_history) > 0 and i < len(chat_history): + while total_tokens > max_tokens and len(chat_history) > 0 and i < len(chat_history) - 1: message = chat_history[i] total_tokens -= count_tokens(model, message.content) total_tokens += count_tokens(model, message.summary) -- cgit v1.2.3-70-g09d2 From 0cd32ba813f5506c0871159658728b8ce31825e1 Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Fri, 21 Jul 2023 01:49:28 -0700 Subject: fix for top-of-file pruning in default edit step --- continuedev/src/continuedev/steps/core/core.py | 14 +++++++------- extension/package-lock.json | 4 ++-- extension/package.json | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 4afc36e8..98600f8b 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -220,13 +220,13 @@ class DefaultModelEditCodeStep(Step): if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: break - if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: - while cur_start_line < max_start_line: - cur_start_line += 1 - total_tokens -= model_to_use.count_tokens( - full_file_contents_lst[cur_end_line]) - if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: - break + if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: + while cur_start_line < max_start_line: + cur_start_line += 1 + total_tokens -= model_to_use.count_tokens( + full_file_contents_lst[cur_start_line]) + if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: + break # Now use the found start/end lines to get the prefix and suffix strings file_prefix = "\n".join( diff --git a/extension/package-lock.json b/extension/package-lock.json index 7ca62d4a..d44b84c4 100644 --- a/extension/package-lock.json +++ b/extension/package-lock.json @@ -1,12 +1,12 @@ { "name": "continue", - "version": "0.0.186", + "version": "0.0.187", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "continue", - "version": "0.0.186", + "version": "0.0.187", "license": "Apache-2.0", "dependencies": { "@electron/rebuild": "^3.2.10", diff --git a/extension/package.json b/extension/package.json index 76b80ed7..34bc8bc4 100644 --- a/extension/package.json +++ b/extension/package.json @@ -14,7 +14,7 @@ "displayName": "Continue", "pricing": "Free", "description": "The open-source coding autopilot", - "version": "0.0.186", + "version": "0.0.187", "publisher": "Continue", "engines": { "vscode": "^1.67.0" -- cgit v1.2.3-70-g09d2 From a87e66758731a9e76c9c394dc2190b9882ddbceb Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Fri, 21 Jul 2023 19:51:23 -0700 Subject: clean pii from telemetry --- .../src/continuedev/libs/util/commonregex.py | 138 +++++++++++++++++++++ continuedev/src/continuedev/libs/util/telemetry.py | 7 +- 2 files changed, 144 insertions(+), 1 deletion(-) create mode 100644 continuedev/src/continuedev/libs/util/commonregex.py (limited to 'continuedev/src') diff --git a/continuedev/src/continuedev/libs/util/commonregex.py b/continuedev/src/continuedev/libs/util/commonregex.py new file mode 100644 index 00000000..55da7fc0 --- /dev/null +++ b/continuedev/src/continuedev/libs/util/commonregex.py @@ -0,0 +1,138 @@ +# coding: utf-8 +import json +import re +from typing import Any, Dict + +date = re.compile( + '(?:(?]+[^\s`!()\[\]{};:\'".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)', re.IGNORECASE) +email = re.compile( + "([a-z0-9!#$%&'*+\/=?^_`{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)", re.IGNORECASE) +ip = re.compile('(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)', re.IGNORECASE) +ipv6 = re.compile( + '\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?", + "unix_absolute_filepath": "", + "dates": "", + "times": "