diff options
Diffstat (limited to 'server/continuedev/libs/util/calculate_diff.py')
-rw-r--r-- | server/continuedev/libs/util/calculate_diff.py | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/server/continuedev/libs/util/calculate_diff.py b/server/continuedev/libs/util/calculate_diff.py new file mode 100644 index 00000000..99301ae7 --- /dev/null +++ b/server/continuedev/libs/util/calculate_diff.py @@ -0,0 +1,154 @@ +import difflib +from typing import List + +from ...models.filesystem import FileEdit +from ...models.main import Position, Range + + +def calculate_diff(filepath: str, original: str, updated: str) -> List[FileEdit]: + s = difflib.SequenceMatcher(None, original, updated) + offset = 0 # The indices are offset by previous deletions/insertions + edits = [] + for tag, i1, i2, j1, j2 in s.get_opcodes(): + i1, i2, j1, j2 = i1 + offset, i2 + offset, j1 + offset, j2 + offset + replacement = updated[j1:j2] + if tag == "equal": + pass + elif tag == "delete": + edits.append( + FileEdit.from_deletion(filepath, Range.from_indices(original, i1, i2)) + ) + offset -= i2 - i1 + elif tag == "insert": + edits.append( + FileEdit.from_insertion( + filepath, Position.from_index(original, i1), replacement + ) + ) + offset += j2 - j1 + elif tag == "replace": + edits.append( + FileEdit( + filepath=filepath, + range=Range.from_indices(original, i1, i2), + replacement=replacement, + ) + ) + offset += (j2 - j1) - (i2 - i1) + else: + raise Exception("Unexpected difflib.SequenceMatcher tag: " + tag) + + return edits + + +def calculate_diff2(filepath: str, original: str, updated: str) -> List[FileEdit]: + # original_lines = original.splitlines() + # updated_lines = updated.splitlines() + # offset = 0 + # while len(original_lines) and len(updated_lines) and original_lines[0] == updated_lines[0]: + # original_lines = original_lines[1:] + # updated_lines = updated_lines[1:] + + # while len(original_lines) and len(updated_lines) and original_lines[-1] == updated_lines[-1]: + # original_lines = original_lines[:-1] + # updated_lines = updated_lines[:-1] + + # original = "\n".join(original_lines) + # updated = "\n".join(updated_lines) + + edits = [] + max_iterations = 1000 + i = 0 + while not original == updated: + # TODO - For some reason it can't handle a single newline at the end of the file? + s = difflib.SequenceMatcher(None, original, updated) + opcodes = s.get_opcodes() + for edit_index in range(len(opcodes)): + tag, i1, i2, j1, j2 = s.get_opcodes()[edit_index] + replacement = updated[j1:j2] + if tag == "equal": + continue # ;) + elif tag == "delete": + edits.append( + FileEdit.from_deletion( + filepath, Range.from_indices(original, i1, i2) + ) + ) + elif tag == "insert": + edits.append( + FileEdit.from_insertion( + filepath, Position.from_index(original, i1), replacement + ) + ) + elif tag == "replace": + edits.append( + FileEdit( + filepath=filepath, + range=Range.from_indices(original, i1, i2), + replacement=replacement, + ) + ) + else: + raise Exception("Unexpected difflib.SequenceMatcher tag: " + tag) + break + + original = apply_edit_to_str(original, edits[-1]) + + i += 1 + if i > max_iterations: + raise Exception("Max iterations reached") + + return edits + + +def read_range_in_str(s: str, r: Range) -> str: + lines = s.splitlines()[r.start.line : r.end.line + 1] + if len(lines) == 0: + return "" + + lines[0] = lines[0][r.start.character :] + lines[-1] = lines[-1][: r.end.character + 1] + return "\n".join(lines) + + +def apply_edit_to_str(s: str, edit: FileEdit) -> str: + read_range_in_str(s, edit.range) + + # Split lines and deal with some edge cases (could obviously be nicer) + lines = s.splitlines() + if s.startswith("\n"): + lines.insert(0, "") + if s.endswith("\n"): + lines.append("") + + if len(lines) == 0: + lines = [""] + + end = Position(line=edit.range.end.line, character=edit.range.end.character) + if edit.range.end.line == len(lines) and edit.range.end.character == 0: + end = Position( + line=edit.range.end.line - 1, + character=len(lines[min(len(lines) - 1, edit.range.end.line - 1)]), + ) + + before_lines = lines[: edit.range.start.line] + after_lines = lines[end.line + 1 :] + between_str = ( + lines[min(len(lines) - 1, edit.range.start.line)][: edit.range.start.character] + + edit.replacement + + lines[min(len(lines) - 1, end.line)][end.character + 1 :] + ) + + Range( + start=edit.range.start, + end=Position( + line=edit.range.start.line + len(edit.replacement.splitlines()) - 1, + character=edit.range.start.character + + len(edit.replacement.splitlines()[-1]) + if edit.replacement != "" + else 0, + ), + ) + + lines = before_lines + between_str.splitlines() + after_lines + return "\n".join(lines) |