From 94341653cae5b9af6e33f480847dfb562aa7578c Mon Sep 17 00:00:00 2001 From: Nate Sesti Date: Wed, 21 Jun 2023 14:02:56 -0700 Subject: first slightly working version of suggestions --- continuedev/src/continuedev/server/ide.py | 6 +- continuedev/src/continuedev/server/ide_protocol.py | 2 +- continuedev/src/continuedev/steps/core/core.py | 448 ++++++++++----------- 3 files changed, 226 insertions(+), 230 deletions(-) (limited to 'continuedev') diff --git a/continuedev/src/continuedev/server/ide.py b/continuedev/src/continuedev/server/ide.py index c83fbc8a..c2ebaccf 100644 --- a/continuedev/src/continuedev/server/ide.py +++ b/continuedev/src/continuedev/server/ide.py @@ -146,8 +146,10 @@ class IdeProtocolServer(AbstractIdeProtocolServer): # ------------------------------- # # Request actions in IDE, doesn't matter which Session - def showSuggestion(): - pass + async def showSuggestion(self, file_edit: FileEdit): + await self._send_json("showSuggestion", { + "edit": file_edit.dict() + }) async def setFileOpen(self, filepath: str, open: bool = True): # Autopilot needs access to this. diff --git a/continuedev/src/continuedev/server/ide_protocol.py b/continuedev/src/continuedev/server/ide_protocol.py index 2dcedc30..79820c36 100644 --- a/continuedev/src/continuedev/server/ide_protocol.py +++ b/continuedev/src/continuedev/server/ide_protocol.py @@ -12,7 +12,7 @@ class AbstractIdeProtocolServer(ABC): """Handle a json message""" @abstractmethod - def showSuggestion(): + def showSuggestion(self, file_edit: FileEdit): """Show a suggestion to the user""" @abstractmethod diff --git a/continuedev/src/continuedev/steps/core/core.py b/continuedev/src/continuedev/steps/core/core.py index 71a5b5b2..eb6a00c6 100644 --- a/continuedev/src/continuedev/steps/core/core.py +++ b/continuedev/src/continuedev/steps/core/core.py @@ -159,253 +159,247 @@ class DefaultModelEditCodeStep(Step): self.name = await models.gpt35.complete(f"Write a very short title to describe this requested change: '{self.user_input}'. This is the title:") return f"`{self.user_input}`\n\n" + description - async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: - self.description = f"`{self.user_input}`" - await sdk.update_ui() - - rif_with_contents = [] - for range_in_file in map(lambda x: RangeInFile( - filepath=x.filepath, - # Only consider the range line-by-line. Maybe later don't if it's only a single line. - range=x.range.to_full_lines() - ), self.range_in_files): - file_contents = await sdk.ide.readRangeInFile(range_in_file) - rif_with_contents.append( - RangeInFileWithContents.from_range_in_file(range_in_file, file_contents)) - - rif_dict = {} - for rif in rif_with_contents: - rif_dict[rif.filepath] = rif.contents - - for rif in rif_with_contents: - await sdk.ide.setFileOpen(rif.filepath) - - model_to_use = sdk.models.default - - full_file_contents = await sdk.ide.readFile(rif.filepath) - - full_file_contents_lst = full_file_contents.split("\n") - - max_start_line = rif.range.start.line - min_end_line = rif.range.end.line - cur_start_line = 0 - cur_end_line = len(full_file_contents_lst) - 1 - - def cut_context(model_to_use, total_tokens, cur_start_line, cur_end_line): - - if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: - while cur_end_line > min_end_line: - total_tokens -= model_to_use.count_tokens( - full_file_contents_lst[cur_end_line]) - cur_end_line -= 1 - if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: - return cur_start_line, cur_end_line - - if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: - while cur_start_line < max_start_line: - cur_start_line += 1 - total_tokens -= model_to_use.count_tokens( - full_file_contents_lst[cur_end_line]) - if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: - return cur_start_line, cur_end_line - - return cur_start_line, cur_end_line - - if model_to_use.name == "gpt-4": - - total_tokens = model_to_use.count_tokens(full_file_contents + self._prompt) - cur_start_line, cur_end_line = cut_context( - model_to_use, total_tokens, cur_start_line, cur_end_line) - - elif model_to_use.name == "gpt-3.5-turbo" or model_to_use.name == "gpt-3.5-turbo-16k": - - if sdk.models.gpt35.count_tokens(full_file_contents) > MAX_TOKENS_FOR_MODEL["gpt-3.5-turbo"]: - - model_to_use = sdk.models.gpt3516k - total_tokens = model_to_use.count_tokens( - full_file_contents + self._prompt) - cur_start_line, cur_end_line = cut_context( - model_to_use, total_tokens, cur_start_line, cur_end_line) - - else: - - raise Exception("Unknown default model") + async def get_prompt_parts(self, rif: RangeInFileWithContents, sdk: ContinueSDK, full_file_contents: str): + # If using 3.5 and overflows, upgrade to 3.5.16k + model_to_use = sdk.models.default + if model_to_use.name == "gpt-3.5-turbo": + if sdk.models.gpt35.count_tokens(full_file_contents) > MAX_TOKENS_FOR_MODEL["gpt-3.5-turbo"]: + model_to_use = sdk.models.gpt3516k + + # Remove tokens from the end first, and then the start to clear space + # This part finds the start and end lines + full_file_contents_lst = full_file_contents.split("\n") + max_start_line = rif.range.start.line + min_end_line = rif.range.end.line + cur_start_line = 0 + cur_end_line = len(full_file_contents_lst) - 1 + + total_tokens = model_to_use.count_tokens( + full_file_contents + self._prompt) + + if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: + while cur_end_line > min_end_line: + total_tokens -= model_to_use.count_tokens( + full_file_contents_lst[cur_end_line]) + cur_end_line -= 1 + if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: + return cur_start_line, cur_end_line + + if total_tokens > MAX_TOKENS_FOR_MODEL[model_to_use.name]: + while cur_start_line < max_start_line: + cur_start_line += 1 + total_tokens -= model_to_use.count_tokens( + full_file_contents_lst[cur_end_line]) + if total_tokens < MAX_TOKENS_FOR_MODEL[model_to_use.name]: + return cur_start_line, cur_end_line + + # Now use the found start/end lines to get the prefix and suffix strings + file_prefix = "\n".join( + full_file_contents_lst[cur_start_line:max_start_line]) + file_suffix = "\n".join( + full_file_contents_lst[min_end_line:cur_end_line - 1]) + + # Move any surrounding blank line in rif.contents to the prefix/suffix + # TODO: Keep track of start line of the range, because it's needed below for offset stuff + rif_start_line = rif.range.start.line + if len(rif.contents) > 0: + first_line = rif.contents.splitlines(keepends=True)[0] + while first_line.strip() == "": + file_prefix += first_line + rif.contents = rif.contents[len(first_line):] + first_line = rif.contents.splitlines(keepends=True)[0] - code_before = "\n".join( - full_file_contents_lst[cur_start_line:max_start_line]) - code_after = "\n".join( - full_file_contents_lst[min_end_line:cur_end_line - 1]) + last_line = rif.contents.splitlines(keepends=True)[-1] + while last_line.strip() == "": + file_suffix = last_line + file_suffix + rif.contents = rif.contents[:len( + rif.contents) - len(last_line)] + last_line = rif.contents.splitlines(keepends=True)[-1] - segs = [code_before, code_after] - if segs[0].strip() == "": - segs[0] = segs[0].strip() - if segs[1].strip() == "": - segs[1] = segs[1].strip() + while rif.contents.startswith("\n"): + file_prefix += "\n" + rif.contents = rif.contents[1:] + while rif.contents.endswith("\n"): + file_suffix = "\n" + file_suffix + rif.contents = rif.contents[:-1] - # Move any surrounding blank line in rif.contents to the prefix/suffix - if len(rif.contents) > 0: - first_line = rif.contents.splitlines(keepends=True)[0] - while first_line.strip() == "": - segs[0] += first_line - rif.contents = rif.contents[len(first_line):] - first_line = rif.contents.splitlines(keepends=True)[0] + return file_prefix, rif.contents, file_suffix, model_to_use - last_line = rif.contents.splitlines(keepends=True)[-1] - while last_line.strip() == "": - segs[1] = last_line + segs[1] - rif.contents = rif.contents[:len( - rif.contents) - len(last_line)] - last_line = rif.contents.splitlines(keepends=True)[-1] - - while rif.contents.startswith("\n"): - segs[0] += "\n" - rif.contents = rif.contents[1:] - while rif.contents.endswith("\n"): - segs[1] = "\n" + segs[1] - rif.contents = rif.contents[:-1] - - # .format(code=rif.contents, user_request=self.user_input, file_prefix=segs[0], file_suffix=segs[1]) - prompt = self._prompt - if segs[0].strip() != "": - prompt += dedent(f""" + def compile_prompt(self, file_prefix: str, contents: str, file_suffix: str, sdk: ContinueSDK) -> str: + prompt = self._prompt + if file_prefix.strip() != "": + prompt += dedent(f""" -{segs[0]} +{file_prefix} """) - prompt += dedent(f""" + prompt += dedent(f""" -{rif.contents} +{contents} """) - if segs[1].strip() != "": - prompt += dedent(f""" + if file_suffix.strip() != "": + prompt += dedent(f""" -{segs[1]} +{file_suffix} """) - prompt += dedent(f""" + prompt += dedent(f""" {self.user_input} """) - lines = [] - unfinished_line = "" - i = 0 - original_lines = rif.contents.split("\n") - - async def add_line(i: int, line: str): - if i == 0: - # First line indentation, because the model will assume that it is replacing in this way - line = original_lines[0].replace( - original_lines[0].strip(), "") + line - - if i < len(original_lines): - # Replace original line - range = Range.from_shorthand( - rif.range.start.line + i, rif.range.start.character if i == 0 else 0, rif.range.start.line + i + 1, 0) - else: - # Insert a line - range = Range.from_shorthand( - rif.range.start.line + i, 0, rif.range.start.line + i, 0) - - await sdk.ide.applyFileSystemEdit(FileEdit( - filepath=rif.filepath, - range=range, - replacement=line + "\n" - )) - - lines_of_prefix_copied = 0 - line_below_highlighted_range = segs[1].lstrip().split("\n")[0] - should_stop = False - async for chunk in model_to_use.stream_chat(prompt, with_history=await sdk.get_chat_context(), temperature=0): - if should_stop: + return prompt + + def is_end_line(self, line: str) -> bool: + return "" in line + + def line_to_be_ignored(self, line: str) -> bool: + return "```" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line + + async def stream_rif(self, rif: RangeInFileWithContents, sdk: ContinueSDK): + full_file_contents = await sdk.ide.readFile(rif.filepath) + + file_prefix, contents, file_suffix, model_to_use = await self.get_prompt_parts( + rif, sdk, full_file_contents) + prompt = self.compile_prompt(file_prefix, contents, file_suffix, sdk) + + full_file_contents_lines = full_file_contents.split("\n") + original_lines = rif.contents.split("\n") + i = 0 + lines = [] + unfinished_line = "" + + current_block = [] + offset_from_blocks = 0 + + async def insert_line(line: str, line_no: int): + nonlocal current_block + # Insert line, highlight green, highlight corresponding line red + range = Range.from_shorthand( + line_no, 0, line_no, 0) + red_range = Range.from_shorthand( + line_no + len(current_block), 0, line_no + len(current_block), 0) + + await sdk.ide.applyFileSystemEdit(FileEdit( + filepath=rif.filepath, + range=range, + replacement=line + "\n" + )) + await sdk.ide.highlightCode(RangeInFile(filepath=rif.filepath, range=range), "#00FF0022") + await sdk.ide.highlightCode(RangeInFile(filepath=rif.filepath, range=red_range), "#FF000022") + + async def show_block_as_suggestion(): + nonlocal i, offset_from_blocks, current_block + await sdk.ide.showSuggestion(FileEdit( + filepath=rif.filepath, + range=Range.from_shorthand( + i + offset_from_blocks - len(current_block) + rif.range.start.line, 0, i + offset_from_blocks + rif.range.start.line, 0), + replacement="\n".join(current_block) + "\n" + )) + offset_from_blocks += len(current_block) + current_block.clear() + + async def add_to_block(line: str): + current_block.append(line) + # TODO: This start line might have changed above + # await insert_line(line, i + offset_from_blocks + + # rif.range.start.line) + + async def handle_generated_line(line: str): + nonlocal i, lines, current_block, offset_from_blocks, original_lines + # diff = list(difflib.ndiff(rif.contents.splitlines( + # keepends=True), completion.splitlines(keepends=True))) + if i < len(original_lines) and line == original_lines[i]: + # Line is the same as the original. Start a new block + await show_block_as_suggestion() + else: + # Add to the current block + await add_to_block(line) + + lines_of_prefix_copied = 0 + repeating_file_suffix = False + line_below_highlighted_range = file_suffix.lstrip().split("\n")[0] + async for chunk in model_to_use.stream_chat(prompt, with_history=await sdk.get_chat_context(), temperature=0): + # Stop early if it is repeating the file_suffix + if repeating_file_suffix: + break + + # Accumulate lines + chunk_lines = chunk.split("\n") + chunk_lines[0] = unfinished_line + chunk_lines[0] + if chunk.endswith("\n"): + unfinished_line = "" + chunk_lines.pop() # because this will be an empty string + else: + unfinished_line = chunk_lines.pop() + lines.extend(chunk_lines) + + # Deal with newly accumulated lines + for line in chunk_lines: + # Lines that should signify the end of generation + if self.is_end_line(line): + break + # Lines that should be ignored, like the <> tags + elif self.line_to_be_ignored(line): + continue + # Check if we are currently just copying the prefix + elif (lines_of_prefix_copied > 0 or i == 0) and lines_of_prefix_copied < len(file_prefix.splitlines()) and line == full_file_contents_lines[lines_of_prefix_copied]: + # This is a sketchy way of stopping it from repeating the file_prefix. Is a bug if output happens to have a matching line + lines_of_prefix_copied += 1 + continue + # Because really short lines might be expected to be repeated, this is only a !heuristic! + # Stop when it starts copying the file_suffix + elif line.strip() == line_below_highlighted_range.strip() and len(line.strip()) > 4: + repeating_file_suffix = True break - chunk_lines = chunk.split("\n") - chunk_lines[0] = unfinished_line + chunk_lines[0] - if chunk.endswith("\n"): - unfinished_line = "" - chunk_lines.pop() # because this will be an empty string - else: - unfinished_line = chunk_lines.pop() - lines.extend(chunk_lines) - - for line in chunk_lines: - if "" in line: - break - elif "```" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line or "" in line: - continue - elif (lines_of_prefix_copied > 0 or i == 0) and lines_of_prefix_copied < len(segs[0].splitlines()) and line == full_file_contents_lst[lines_of_prefix_copied]: - # This is a sketchy way of stopping it from repeating the file_prefix. Is a bug if output happens to have a matching line - lines_of_prefix_copied += 1 - continue - elif i < len(original_lines) and line == original_lines[i]: - i += 1 - continue - # Because really short lines might be expected to be repeated !heuristic! - elif line.strip() == line_below_highlighted_range.strip() and len(line.strip()) > 4: - should_stop = True - break - await add_line(i, line) - i += 1 - - # Add the unfinished line - if unfinished_line != "": - unfinished_line = unfinished_line.replace( - "", "").replace("", "").replace("```", "").replace("", "").replace("", "").replace("", "").replace("", "").replace(" 0: + await show_block_as_suggestion() + + # Record the completion + completion = "\n".join(lines) + self._prompt_and_completion += prompt + completion + + async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: + self.description = f"`{self.user_input}`" + await sdk.update_ui() + + rif_with_contents = [] + for range_in_file in map(lambda x: RangeInFile( + filepath=x.filepath, + # Only consider the range line-by-line. Maybe later don't if it's only a single line. + range=x.range.to_full_lines() + ), self.range_in_files): + file_contents = await sdk.ide.readRangeInFile(range_in_file) + rif_with_contents.append( + RangeInFileWithContents.from_range_in_file(range_in_file, file_contents)) + + rif_dict = {} + for rif in rif_with_contents: + rif_dict[rif.filepath] = rif.contents + + for rif in rif_with_contents: + await sdk.ide.setFileOpen(rif.filepath) + await self.stream_rif(rif, sdk) + # await sdk.ide.saveFile(rif.filepath) class EditFileStep(Step): -- cgit v1.2.3-70-g09d2