summaryrefslogtreecommitdiff
path: root/server/continuedev/libs/util
diff options
context:
space:
mode:
Diffstat (limited to 'server/continuedev/libs/util')
-rw-r--r--server/continuedev/libs/util/calculate_diff.py154
-rw-r--r--server/continuedev/libs/util/commonregex.py144
-rw-r--r--server/continuedev/libs/util/copy_codebase.py121
-rw-r--r--server/continuedev/libs/util/count_tokens.py206
-rw-r--r--server/continuedev/libs/util/create_async_task.py38
-rw-r--r--server/continuedev/libs/util/devdata.py67
-rw-r--r--server/continuedev/libs/util/edit_config.py149
-rw-r--r--server/continuedev/libs/util/errors.py2
-rw-r--r--server/continuedev/libs/util/filter_files.py33
-rw-r--r--server/continuedev/libs/util/logging.py47
-rw-r--r--server/continuedev/libs/util/map_path.py16
-rw-r--r--server/continuedev/libs/util/paths.py148
-rw-r--r--server/continuedev/libs/util/queue.py17
-rw-r--r--server/continuedev/libs/util/ripgrep.py25
-rw-r--r--server/continuedev/libs/util/step_name_to_steps.py47
-rw-r--r--server/continuedev/libs/util/strings.py64
-rw-r--r--server/continuedev/libs/util/telemetry.py108
-rw-r--r--server/continuedev/libs/util/templating.py76
-rw-r--r--server/continuedev/libs/util/traceback/traceback_parsers.py56
19 files changed, 1518 insertions, 0 deletions
diff --git a/server/continuedev/libs/util/calculate_diff.py b/server/continuedev/libs/util/calculate_diff.py
new file mode 100644
index 00000000..99301ae7
--- /dev/null
+++ b/server/continuedev/libs/util/calculate_diff.py
@@ -0,0 +1,154 @@
+import difflib
+from typing import List
+
+from ...models.filesystem import FileEdit
+from ...models.main import Position, Range
+
+
+def calculate_diff(filepath: str, original: str, updated: str) -> List[FileEdit]:
+ s = difflib.SequenceMatcher(None, original, updated)
+ offset = 0 # The indices are offset by previous deletions/insertions
+ edits = []
+ for tag, i1, i2, j1, j2 in s.get_opcodes():
+ i1, i2, j1, j2 = i1 + offset, i2 + offset, j1 + offset, j2 + offset
+ replacement = updated[j1:j2]
+ if tag == "equal":
+ pass
+ elif tag == "delete":
+ edits.append(
+ FileEdit.from_deletion(filepath, Range.from_indices(original, i1, i2))
+ )
+ offset -= i2 - i1
+ elif tag == "insert":
+ edits.append(
+ FileEdit.from_insertion(
+ filepath, Position.from_index(original, i1), replacement
+ )
+ )
+ offset += j2 - j1
+ elif tag == "replace":
+ edits.append(
+ FileEdit(
+ filepath=filepath,
+ range=Range.from_indices(original, i1, i2),
+ replacement=replacement,
+ )
+ )
+ offset += (j2 - j1) - (i2 - i1)
+ else:
+ raise Exception("Unexpected difflib.SequenceMatcher tag: " + tag)
+
+ return edits
+
+
+def calculate_diff2(filepath: str, original: str, updated: str) -> List[FileEdit]:
+ # original_lines = original.splitlines()
+ # updated_lines = updated.splitlines()
+ # offset = 0
+ # while len(original_lines) and len(updated_lines) and original_lines[0] == updated_lines[0]:
+ # original_lines = original_lines[1:]
+ # updated_lines = updated_lines[1:]
+
+ # while len(original_lines) and len(updated_lines) and original_lines[-1] == updated_lines[-1]:
+ # original_lines = original_lines[:-1]
+ # updated_lines = updated_lines[:-1]
+
+ # original = "\n".join(original_lines)
+ # updated = "\n".join(updated_lines)
+
+ edits = []
+ max_iterations = 1000
+ i = 0
+ while not original == updated:
+ # TODO - For some reason it can't handle a single newline at the end of the file?
+ s = difflib.SequenceMatcher(None, original, updated)
+ opcodes = s.get_opcodes()
+ for edit_index in range(len(opcodes)):
+ tag, i1, i2, j1, j2 = s.get_opcodes()[edit_index]
+ replacement = updated[j1:j2]
+ if tag == "equal":
+ continue # ;)
+ elif tag == "delete":
+ edits.append(
+ FileEdit.from_deletion(
+ filepath, Range.from_indices(original, i1, i2)
+ )
+ )
+ elif tag == "insert":
+ edits.append(
+ FileEdit.from_insertion(
+ filepath, Position.from_index(original, i1), replacement
+ )
+ )
+ elif tag == "replace":
+ edits.append(
+ FileEdit(
+ filepath=filepath,
+ range=Range.from_indices(original, i1, i2),
+ replacement=replacement,
+ )
+ )
+ else:
+ raise Exception("Unexpected difflib.SequenceMatcher tag: " + tag)
+ break
+
+ original = apply_edit_to_str(original, edits[-1])
+
+ i += 1
+ if i > max_iterations:
+ raise Exception("Max iterations reached")
+
+ return edits
+
+
+def read_range_in_str(s: str, r: Range) -> str:
+ lines = s.splitlines()[r.start.line : r.end.line + 1]
+ if len(lines) == 0:
+ return ""
+
+ lines[0] = lines[0][r.start.character :]
+ lines[-1] = lines[-1][: r.end.character + 1]
+ return "\n".join(lines)
+
+
+def apply_edit_to_str(s: str, edit: FileEdit) -> str:
+ read_range_in_str(s, edit.range)
+
+ # Split lines and deal with some edge cases (could obviously be nicer)
+ lines = s.splitlines()
+ if s.startswith("\n"):
+ lines.insert(0, "")
+ if s.endswith("\n"):
+ lines.append("")
+
+ if len(lines) == 0:
+ lines = [""]
+
+ end = Position(line=edit.range.end.line, character=edit.range.end.character)
+ if edit.range.end.line == len(lines) and edit.range.end.character == 0:
+ end = Position(
+ line=edit.range.end.line - 1,
+ character=len(lines[min(len(lines) - 1, edit.range.end.line - 1)]),
+ )
+
+ before_lines = lines[: edit.range.start.line]
+ after_lines = lines[end.line + 1 :]
+ between_str = (
+ lines[min(len(lines) - 1, edit.range.start.line)][: edit.range.start.character]
+ + edit.replacement
+ + lines[min(len(lines) - 1, end.line)][end.character + 1 :]
+ )
+
+ Range(
+ start=edit.range.start,
+ end=Position(
+ line=edit.range.start.line + len(edit.replacement.splitlines()) - 1,
+ character=edit.range.start.character
+ + len(edit.replacement.splitlines()[-1])
+ if edit.replacement != ""
+ else 0,
+ ),
+ )
+
+ lines = before_lines + between_str.splitlines() + after_lines
+ return "\n".join(lines)
diff --git a/server/continuedev/libs/util/commonregex.py b/server/continuedev/libs/util/commonregex.py
new file mode 100644
index 00000000..c2f6bb82
--- /dev/null
+++ b/server/continuedev/libs/util/commonregex.py
@@ -0,0 +1,144 @@
+# coding: utf-8
+import re
+from typing import Any
+
+date = re.compile(
+ "(?:(?<!\:)(?<!\:\d)[0-3]?\d(?:st|nd|rd|th)?\s+(?:of\s+)?(?:jan\.?|january|feb\.?|february|mar\.?|march|apr\.?|april|may|jun\.?|june|jul\.?|july|aug\.?|august|sep\.?|september|oct\.?|october|nov\.?|november|dec\.?|december)|(?:jan\.?|january|feb\.?|february|mar\.?|march|apr\.?|april|may|jun\.?|june|jul\.?|july|aug\.?|august|sep\.?|september|oct\.?|october|nov\.?|november|dec\.?|december)\s+(?<!\:)(?<!\:\d)[0-3]?\d(?:st|nd|rd|th)?)(?:\,)?\s*(?:\d{4})?|[0-3]?\d[-\./][0-3]?\d[-\./]\d{2,4}",
+ re.IGNORECASE,
+)
+time = re.compile("\d{1,2}:\d{2} ?(?:[ap]\.?m\.?)?|\d[ap]\.?m\.?", re.IGNORECASE)
+phone = re.compile(
+ """((?:(?<![\d-])(?:\+?\d{1,3}[-.\s*]?)?(?:\(?\d{3}\)?[-.\s*]?)?\d{3}[-.\s*]?\d{4}(?![\d-]))|(?:(?<![\d-])(?:(?:\(\+?\d{2}\))|(?:\+?\d{2}))\s*\d{2}\s*\d{3}\s*\d{4}(?![\d-])))"""
+)
+phones_with_exts = re.compile(
+ "((?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|(?:[2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?(?:[2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?(?:[0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(?:\d+)?))",
+ re.IGNORECASE,
+)
+link = re.compile(
+ "(?i)((?:https?://|www\d{0,3}[.])?[a-z0-9.\-]+[.](?:(?:international)|(?:construction)|(?:contractors)|(?:enterprises)|(?:photography)|(?:immobilien)|(?:management)|(?:technology)|(?:directory)|(?:education)|(?:equipment)|(?:institute)|(?:marketing)|(?:solutions)|(?:builders)|(?:clothing)|(?:computer)|(?:democrat)|(?:diamonds)|(?:graphics)|(?:holdings)|(?:lighting)|(?:plumbing)|(?:training)|(?:ventures)|(?:academy)|(?:careers)|(?:company)|(?:domains)|(?:florist)|(?:gallery)|(?:guitars)|(?:holiday)|(?:kitchen)|(?:recipes)|(?:shiksha)|(?:singles)|(?:support)|(?:systems)|(?:agency)|(?:berlin)|(?:camera)|(?:center)|(?:coffee)|(?:estate)|(?:kaufen)|(?:luxury)|(?:monash)|(?:museum)|(?:photos)|(?:repair)|(?:social)|(?:tattoo)|(?:travel)|(?:viajes)|(?:voyage)|(?:build)|(?:cheap)|(?:codes)|(?:dance)|(?:email)|(?:glass)|(?:house)|(?:ninja)|(?:photo)|(?:shoes)|(?:solar)|(?:today)|(?:aero)|(?:arpa)|(?:asia)|(?:bike)|(?:buzz)|(?:camp)|(?:club)|(?:coop)|(?:farm)|(?:gift)|(?:guru)|(?:info)|(?:jobs)|(?:kiwi)|(?:land)|(?:limo)|(?:link)|(?:menu)|(?:mobi)|(?:moda)|(?:name)|(?:pics)|(?:pink)|(?:post)|(?:rich)|(?:ruhr)|(?:sexy)|(?:tips)|(?:wang)|(?:wien)|(?:zone)|(?:biz)|(?:cab)|(?:cat)|(?:ceo)|(?:com)|(?:edu)|(?:gov)|(?:int)|(?:mil)|(?:net)|(?:onl)|(?:org)|(?:pro)|(?:red)|(?:tel)|(?:uno)|(?:xxx)|(?:ac)|(?:ad)|(?:ae)|(?:af)|(?:ag)|(?:ai)|(?:al)|(?:am)|(?:an)|(?:ao)|(?:aq)|(?:ar)|(?:as)|(?:at)|(?:au)|(?:aw)|(?:ax)|(?:az)|(?:ba)|(?:bb)|(?:bd)|(?:be)|(?:bf)|(?:bg)|(?:bh)|(?:bi)|(?:bj)|(?:bm)|(?:bn)|(?:bo)|(?:br)|(?:bs)|(?:bt)|(?:bv)|(?:bw)|(?:by)|(?:bz)|(?:ca)|(?:cc)|(?:cd)|(?:cf)|(?:cg)|(?:ch)|(?:ci)|(?:ck)|(?:cl)|(?:cm)|(?:cn)|(?:co)|(?:cr)|(?:cu)|(?:cv)|(?:cw)|(?:cx)|(?:cy)|(?:cz)|(?:de)|(?:dj)|(?:dk)|(?:dm)|(?:do)|(?:dz)|(?:ec)|(?:ee)|(?:eg)|(?:er)|(?:es)|(?:et)|(?:eu)|(?:fi)|(?:fj)|(?:fk)|(?:fm)|(?:fo)|(?:fr)|(?:ga)|(?:gb)|(?:gd)|(?:ge)|(?:gf)|(?:gg)|(?:gh)|(?:gi)|(?:gl)|(?:gm)|(?:gn)|(?:gp)|(?:gq)|(?:gr)|(?:gs)|(?:gt)|(?:gu)|(?:gw)|(?:gy)|(?:hk)|(?:hm)|(?:hn)|(?:hr)|(?:ht)|(?:hu)|(?:id)|(?:ie)|(?:il)|(?:im)|(?:in)|(?:io)|(?:iq)|(?:ir)|(?:is)|(?:it)|(?:je)|(?:jm)|(?:jo)|(?:jp)|(?:ke)|(?:kg)|(?:kh)|(?:ki)|(?:km)|(?:kn)|(?:kp)|(?:kr)|(?:kw)|(?:ky)|(?:kz)|(?:la)|(?:lb)|(?:lc)|(?:li)|(?:lk)|(?:lr)|(?:ls)|(?:lt)|(?:lu)|(?:lv)|(?:ly)|(?:ma)|(?:mc)|(?:md)|(?:me)|(?:mg)|(?:mh)|(?:mk)|(?:ml)|(?:mm)|(?:mn)|(?:mo)|(?:mp)|(?:mq)|(?:mr)|(?:ms)|(?:mt)|(?:mu)|(?:mv)|(?:mw)|(?:mx)|(?:my)|(?:mz)|(?:na)|(?:nc)|(?:ne)|(?:nf)|(?:ng)|(?:ni)|(?:nl)|(?:no)|(?:np)|(?:nr)|(?:nu)|(?:nz)|(?:om)|(?:pa)|(?:pe)|(?:pf)|(?:pg)|(?:ph)|(?:pk)|(?:pl)|(?:pm)|(?:pn)|(?:pr)|(?:ps)|(?:pt)|(?:pw)|(?:py)|(?:qa)|(?:re)|(?:ro)|(?:rs)|(?:ru)|(?:rw)|(?:sa)|(?:sb)|(?:sc)|(?:sd)|(?:se)|(?:sg)|(?:sh)|(?:si)|(?:sj)|(?:sk)|(?:sl)|(?:sm)|(?:sn)|(?:so)|(?:sr)|(?:st)|(?:su)|(?:sv)|(?:sx)|(?:sy)|(?:sz)|(?:tc)|(?:td)|(?:tf)|(?:tg)|(?:th)|(?:tj)|(?:tk)|(?:tl)|(?:tm)|(?:tn)|(?:to)|(?:tp)|(?:tr)|(?:tt)|(?:tv)|(?:tw)|(?:tz)|(?:ua)|(?:ug)|(?:uk)|(?:us)|(?:uy)|(?:uz)|(?:va)|(?:vc)|(?:ve)|(?:vg)|(?:vi)|(?:vn)|(?:vu)|(?:wf)|(?:ws)|(?:ye)|(?:yt)|(?:za)|(?:zm)|(?:zw))(?:/[^\s()<>]+[^\s`!()\[\]{};:'\".,<>?\xab\xbb\u201c\u201d\u2018\u2019])?)",
+ re.IGNORECASE,
+)
+email = re.compile(
+ "([a-z0-9!#$%&'*+\/=?^_`{|.}~-]+@(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?)",
+ re.IGNORECASE,
+)
+ip = re.compile(
+ "(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)",
+ re.IGNORECASE,
+)
+ipv6 = re.compile(
+ "\s*(?!.*::.*::)(?:(?!:)|:(?=:))(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)){6}(?:[0-9a-f]{0,4}(?:(?<=::)|(?<!::):)[0-9a-f]{0,4}(?:(?<=::)|(?<!:)|(?<=:)(?<!::):)|(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)(?:\.(?:25[0-4]|2[0-4]\d|1\d\d|[1-9]?\d)){3})\s*",
+ re.VERBOSE | re.IGNORECASE | re.DOTALL,
+)
+price = re.compile("[$]\s?[+-]?[0-9]{1,3}(?:(?:,?[0-9]{3}))*(?:\.[0-9]{1,2})?")
+hex_color = re.compile("(#(?:[0-9a-fA-F]{8})|#(?:[0-9a-fA-F]{3}){1,2})\\b")
+credit_card = re.compile("((?:(?:\\d{4}[- ]?){3}\\d{4}|\\d{15,16}))(?![\\d])")
+btc_address = re.compile(
+ "(?<![a-km-zA-HJ-NP-Z0-9])[13][a-km-zA-HJ-NP-Z0-9]{26,33}(?![a-km-zA-HJ-NP-Z0-9])"
+)
+street_address = re.compile(
+ "\d{1,4} [\w\s]{1,20}(?:street|st|avenue|ave|road|rd|highway|hwy|square|sq|trail|trl|drive|dr|court|ct|park|parkway|pkwy|circle|cir|boulevard|blvd)\W?(?=\s|$)",
+ re.IGNORECASE,
+)
+zip_code = re.compile(r"\b\d{5}(?:[-\s]\d{4})?\b")
+po_box = re.compile(r"P\.? ?O\.? Box \d+", re.IGNORECASE)
+ssn = re.compile(
+ "(?!000|666|333)0*(?:[0-6][0-9][0-9]|[0-7][0-6][0-9]|[0-7][0-7][0-2])[- ](?!00)[0-9]{2}[- ](?!0000)[0-9]{4}"
+)
+
+regexes = {
+ "dates": date,
+ "times": time,
+ "phones": phone,
+ "phones_with_exts": phones_with_exts,
+ "emails": email,
+ "ips": ip,
+ "ipv6s": ipv6,
+ "prices": price,
+ "hex_colors": hex_color,
+ "credit_cards": credit_card,
+ "btc_addresses": btc_address,
+ "street_addresses": street_address,
+ "zip_codes": zip_code,
+ "po_boxes": po_box,
+ "ssn_number": ssn,
+}
+
+placeholders = {
+ "dates": "<DATE>",
+ "times": "<TIME>",
+ "phones": "<PHONE>",
+ "phones_with_exts": "<PHONE_WITH_EXT>",
+ "emails": "<EMAIL>",
+ "ips": "<IP>",
+ "ipv6s": "<IPV6>",
+ "prices": "<PRICE>",
+ "hex_colors": "<HEX_COLOR>",
+ "credit_cards": "<CREDIT_CARD>",
+ "btc_addresses": "<BTC_ADDRESS>",
+ "street_addresses": "<STREET_ADDRESS>",
+ "zip_codes": "<ZIP_CODE>",
+ "po_boxes": "<PO_BOX>",
+ "ssn_number": "<SSN>",
+}
+
+
+class regex:
+ def __init__(self, obj, regex):
+ self.obj = obj
+ self.regex = regex
+
+ def __call__(self, *args):
+ def regex_method(text=None):
+ return [x.strip() for x in self.regex.findall(text or self.obj.text)]
+
+ return regex_method
+
+
+class CommonRegex(object):
+ def __init__(self, text=""):
+ self.text = text
+
+ for k, v in list(regexes.items()):
+ setattr(self, k, regex(self, v)(self))
+
+ if text:
+ for key in list(regexes.keys()):
+ method = getattr(self, key)
+ setattr(self, key, method())
+
+
+pii_parser = CommonRegex()
+
+
+def clean_pii_from_str(text: str):
+ """Replace personally identifiable information (PII) with placeholders."""
+ for regex_name, regex in list(regexes.items()):
+ placeholder = placeholders[regex_name]
+ text = regex.sub(placeholder, text)
+
+ return text
+
+
+def clean_pii_from_any(v: Any) -> Any:
+ """Replace personally identifiable information (PII) with placeholders. Not guaranteed to return same type as input."""
+ if isinstance(v, str):
+ return clean_pii_from_str(v)
+ elif isinstance(v, dict):
+ cleaned_dict = {}
+ for key, value in v.items():
+ cleaned_dict[key] = clean_pii_from_any(value)
+ return cleaned_dict
+ elif isinstance(v, list):
+ return [clean_pii_from_any(x) for x in v]
+ else:
+ # Try to convert to string
+ try:
+ orig_text = str(v)
+ cleaned_text = clean_pii_from_str(orig_text)
+ if orig_text != cleaned_text:
+ return cleaned_text
+ else:
+ return v
+ except:
+ return v
diff --git a/server/continuedev/libs/util/copy_codebase.py b/server/continuedev/libs/util/copy_codebase.py
new file mode 100644
index 00000000..78f38148
--- /dev/null
+++ b/server/continuedev/libs/util/copy_codebase.py
@@ -0,0 +1,121 @@
+import os
+import shutil
+from pathlib import Path
+from typing import Iterable, List, Union
+
+from watchdog.events import PatternMatchingEventHandler
+from watchdog.observers import Observer
+
+from ...core.autopilot import Autopilot
+from ...models.filesystem import FileSystem
+from ...models.main import (
+ AddDirectory,
+ AddFile,
+ DeleteDirectory,
+ DeleteFile,
+ FileSystemEdit,
+ RenameDirectory,
+ RenameFile,
+ SequentialFileSystemEdit,
+)
+from .map_path import map_path
+
+
+def create_copy(orig_root: str, copy_root: str = None, ignore: Iterable[str] = []):
+ # TODO: Make ignore a spec, like .gitignore
+ if copy_root is None:
+ copy_root = Path(orig_root) / ".continue-copy"
+ ignore.append(str(copy_root))
+ ignore = set(ignore)
+
+ os.mkdir(copy_root)
+ # I think you're messing up a lot of absolute paths here
+ for child in os.listdir():
+ if os.path.isdir(child):
+ if child not in ignore:
+ os.mkdir(map_path(child))
+ create_copy(Path(orig_root) / child, Path(copy_root) / child, ignore)
+ else:
+ os.symlink(child, map_path(child))
+ else:
+ if child not in ignore:
+ shutil.copyfile(child, map_path(child))
+ else:
+ os.symlink(child, map_path(child))
+
+
+# The whole usage of watchdog here should only be specific to RealFileSystem, you want to have a different "Observer" class for VirtualFileSystem, which would depend on being sent notifications
+class CopyCodebaseEventHandler(PatternMatchingEventHandler):
+ def __init__(
+ self,
+ ignore_directories: List[str],
+ ignore_patterns: List[str],
+ autopilot: Autopilot,
+ orig_root: str,
+ copy_root: str,
+ filesystem: FileSystem,
+ ):
+ super().__init__(
+ ignore_directories=ignore_directories, ignore_patterns=ignore_patterns
+ )
+ self.autopilot = autopilot
+ self.orig_root = orig_root
+ self.copy_root = copy_root
+ self.filesystem = filesystem
+
+ # For now, we'll just make the update immediately, but eventually need to sync with autopilot.
+ # It should be the autopilot that makes the update right? It's just another action, everything comes from a single stream.
+
+ def _event_to_edit(self, event) -> Union[FileSystemEdit, None]:
+ # NOTE: You'll need to map paths to create both an action within the copy filesystem (the one you take) and one in the original filesystem (the one you'll record and allow the user to accept). Basically just need a converter built in to the FileSystemEdit class
+ src = event.src_path()
+ if event.is_directory:
+ if event.event_type == "moved":
+ return RenameDirectory(src, event.dest_path())
+ elif event.event_type == "deleted":
+ return DeleteDirectory(src)
+ elif event.event_type == "created":
+ return AddDirectory(src)
+ else:
+ if event.event_type == "moved":
+ return RenameFile(src, event.dest_path())
+ elif event.event_type == "deleted":
+ return DeleteFile(src)
+ elif event.event_type == "created":
+ contents = self.filesystem.read(src)
+ # Unclear whether it will always pass a "modified" event right after if something like echo "abc" > newfile.txt happens
+ return AddFile(src, contents)
+ elif event.event_type == "modified":
+ # Watchdog doesn't pass the contents or edit, so have to get it myself and diff
+ updated = self.filesystem.read(src)
+ copy_filepath = map_path(src, self.orig_root, self.copy_root)
+ old = self.filesystem.read(copy_filepath)
+
+ edits = calculate_diff(src, updated, old)
+ return SequentialFileSystemEdit(edits)
+ return None
+
+ def on_any_event(self, event):
+ edit = self._event_to_edit(event)
+ if edit is None:
+ return
+ edit = edit.with_mapped_paths(self.orig_root, self.copy_root)
+ action = ManualEditAction(edit)
+ self.autopilot.act(action)
+
+
+def maintain_copy_workspace(
+ autopilot: Autopilot, filesystem: FileSystem, orig_root: str, copy_root: str
+):
+ observer = Observer()
+ event_handler = CopyCodebaseEventHandler(
+ [".git"], [], autopilot, orig_root, copy_root, filesystem
+ )
+ observer.schedule(event_handler, orig_root, recursive=True)
+ observer.start()
+ try:
+ while observer.isAlive():
+ observer.join(1)
+ finally:
+ observer.stop()
+ observer.join()
diff --git a/server/continuedev/libs/util/count_tokens.py b/server/continuedev/libs/util/count_tokens.py
new file mode 100644
index 00000000..d895a2cf
--- /dev/null
+++ b/server/continuedev/libs/util/count_tokens.py
@@ -0,0 +1,206 @@
+import json
+from typing import Dict, List, Union
+
+from ...core.main import ChatMessage
+from .templating import render_templated_string
+
+# TODO move many of these into specific LLM.properties() function that
+# contains max tokens, if its a chat model or not, default args (not all models
+# want to be run at 0.5 temp). also lets custom models made for long contexts
+# exist here (likg LLongMA)
+aliases = {
+ "ggml": "gpt-3.5-turbo",
+ "claude-2": "gpt-3.5-turbo",
+}
+DEFAULT_MAX_TOKENS = 1024
+DEFAULT_ARGS = {
+ "max_tokens": DEFAULT_MAX_TOKENS,
+ "temperature": 0.5,
+}
+
+already_saw_import_err = False
+
+
+def encoding_for_model(model_name: str):
+ global already_saw_import_err
+ if already_saw_import_err:
+ return None
+
+ try:
+ import tiktoken
+ from tiktoken_ext import openai_public # noqa: F401
+
+ try:
+ return tiktoken.encoding_for_model(aliases.get(model_name, model_name))
+ except Exception as _:
+ return tiktoken.encoding_for_model("gpt-3.5-turbo")
+ except Exception as e:
+ print("Error importing tiktoken", e)
+ already_saw_import_err = True
+ return None
+
+
+def count_tokens(model_name: str, text: Union[str, None]):
+ if text is None:
+ return 0
+ encoding = encoding_for_model(model_name)
+ if encoding is None:
+ # Make a safe estimate given that tokens are usually typically ~4 characters on average
+ return len(text) // 2
+ return len(encoding.encode(text, disallowed_special=()))
+
+
+def count_chat_message_tokens(model_name: str, chat_message: ChatMessage) -> int:
+ # Doing simpler, safer version of what is here:
+ # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
+ # every message follows <|start|>{role/name}\n{content}<|end|>\n
+ TOKENS_PER_MESSAGE = 4
+ return count_tokens(model_name, chat_message.content) + TOKENS_PER_MESSAGE
+
+
+def prune_raw_prompt_from_top(
+ model_name: str, context_length: int, prompt: str, tokens_for_completion: int
+):
+ max_tokens = context_length - tokens_for_completion
+ encoding = encoding_for_model(model_name)
+
+ if encoding is None:
+ desired_length_in_chars = max_tokens * 2
+ return prompt[-desired_length_in_chars:]
+
+ tokens = encoding.encode(prompt, disallowed_special=())
+ if len(tokens) <= max_tokens:
+ return prompt
+ else:
+ return encoding.decode(tokens[-max_tokens:])
+
+
+def prune_chat_history(
+ model_name: str,
+ chat_history: List[ChatMessage],
+ context_length: int,
+ tokens_for_completion: int,
+):
+ total_tokens = tokens_for_completion + sum(
+ count_chat_message_tokens(model_name, message) for message in chat_history
+ )
+
+ # 1. Replace beyond last 5 messages with summary
+ i = 0
+ while total_tokens > context_length and i < len(chat_history) - 5:
+ message = chat_history[0]
+ total_tokens -= count_tokens(model_name, message.content)
+ total_tokens += count_tokens(model_name, message.summary)
+ message.content = message.summary
+ i += 1
+
+ # 2. Remove entire messages until the last 5
+ while (
+ len(chat_history) > 5
+ and total_tokens > context_length
+ and len(chat_history) > 0
+ ):
+ message = chat_history.pop(0)
+ total_tokens -= count_tokens(model_name, message.content)
+
+ # 3. Truncate message in the last 5, except last 1
+ i = 0
+ while (
+ total_tokens > context_length
+ and len(chat_history) > 0
+ and i < len(chat_history) - 1
+ ):
+ message = chat_history[i]
+ total_tokens -= count_tokens(model_name, message.content)
+ total_tokens += count_tokens(model_name, message.summary)
+ message.content = message.summary
+ i += 1
+
+ # 4. Remove entire messages in the last 5, except last 1
+ while total_tokens > context_length and len(chat_history) > 1:
+ message = chat_history.pop(0)
+ total_tokens -= count_tokens(model_name, message.content)
+
+ # 5. Truncate last message
+ if total_tokens > context_length and len(chat_history) > 0:
+ message = chat_history[0]
+ message.content = prune_raw_prompt_from_top(
+ model_name, context_length, message.content, tokens_for_completion
+ )
+ total_tokens = context_length
+
+ return chat_history
+
+
+# In case we've missed weird edge cases
+TOKEN_BUFFER_FOR_SAFETY = 100
+
+
+def compile_chat_messages(
+ model_name: str,
+ msgs: Union[List[ChatMessage], None],
+ context_length: int,
+ max_tokens: int,
+ prompt: Union[str, None] = None,
+ functions: Union[List, None] = None,
+ system_message: Union[str, None] = None,
+) -> List[Dict]:
+ """
+ The total number of tokens is system_message + sum(msgs) + functions + prompt after it is converted to a message
+ """
+
+ msgs_copy = [msg.copy(deep=True) for msg in msgs] if msgs is not None else []
+
+ if prompt is not None:
+ prompt_msg = ChatMessage(role="user", content=prompt, summary=prompt)
+ msgs_copy += [prompt_msg]
+
+ if system_message is not None and system_message.strip() != "":
+ # NOTE: System message takes second precedence to user prompt, so it is placed just before
+ # but move back to start after processing
+ rendered_system_message = render_templated_string(system_message)
+ system_chat_msg = ChatMessage(
+ role="system",
+ content=rendered_system_message,
+ summary=rendered_system_message,
+ )
+ # insert at second-to-last position
+ msgs_copy.insert(-1, system_chat_msg)
+
+ # Add tokens from functions
+ function_tokens = 0
+ if functions is not None:
+ for function in functions:
+ function_tokens += count_tokens(model_name, json.dumps(function))
+
+ if max_tokens + function_tokens + TOKEN_BUFFER_FOR_SAFETY >= context_length:
+ raise ValueError(
+ f"max_tokens ({max_tokens}) is too close to context_length ({context_length}), which doesn't leave room for chat history. This would cause incoherent responses. Try increasing the context_length parameter of the model in your config file."
+ )
+
+ msgs_copy = prune_chat_history(
+ model_name,
+ msgs_copy,
+ context_length,
+ function_tokens + max_tokens + TOKEN_BUFFER_FOR_SAFETY,
+ )
+
+ history = [msg.to_dict(with_functions=functions is not None) for msg in msgs_copy]
+
+ # Move system message back to start
+ if (
+ system_message is not None
+ and len(history) >= 2
+ and history[-2]["role"] == "system"
+ ):
+ system_message_dict = history.pop(-2)
+ history.insert(0, system_message_dict)
+
+ return history
+
+
+def format_chat_messages(messages: List[ChatMessage]) -> str:
+ formatted = ""
+ for msg in messages:
+ formatted += f"<{msg['role'].capitalize()}>\n{msg['content']}\n\n"
+ return formatted
diff --git a/server/continuedev/libs/util/create_async_task.py b/server/continuedev/libs/util/create_async_task.py
new file mode 100644
index 00000000..232d3fa1
--- /dev/null
+++ b/server/continuedev/libs/util/create_async_task.py
@@ -0,0 +1,38 @@
+import asyncio
+import traceback
+from typing import Callable, Coroutine, Optional
+
+import nest_asyncio
+
+from .logging import logger
+from .telemetry import posthog_logger
+
+nest_asyncio.apply()
+
+
+def create_async_task(
+ coro: Coroutine, on_error: Optional[Callable[[Exception], Coroutine]] = None
+):
+ """asyncio.create_task and log errors by adding a callback"""
+ task = asyncio.create_task(coro)
+
+ def callback(future: asyncio.Future):
+ try:
+ future.result()
+ except Exception as e:
+ formatted_tb = "\n".join(traceback.format_exception(e))
+ logger.critical(f"Exception caught from async task: {formatted_tb}")
+ posthog_logger.capture_event(
+ "async_task_error",
+ {
+ "error_title": e.__str__() or e.__repr__(),
+ "error_message": "\n".join(traceback.format_exception(e)),
+ },
+ )
+
+ # Log the error to the GUI
+ if on_error is not None:
+ asyncio.create_task(on_error(e))
+
+ task.add_done_callback(callback)
+ return task
diff --git a/server/continuedev/libs/util/devdata.py b/server/continuedev/libs/util/devdata.py
new file mode 100644
index 00000000..61b4351d
--- /dev/null
+++ b/server/continuedev/libs/util/devdata.py
@@ -0,0 +1,67 @@
+"""
+This file contains mechanisms for logging development data to files, SQL databases, and other formats.
+"""
+
+
+import json
+from datetime import datetime
+from typing import Any, Dict
+
+import aiohttp
+
+from .create_async_task import create_async_task
+from .logging import logger
+from .paths import getDevDataFilePath
+
+
+class DevDataLogger:
+ user_token: str = None
+ data_server_url: str = None
+
+ def setup(self, user_token: str = None, data_server_url: str = None):
+ self.user_token = user_token
+ self.data_server_url = data_server_url
+
+ def _to_data_server(self, table_name: str, data: Dict[str, Any]):
+ async def _async_helper(self, table_name: str, data: Dict[str, Any]):
+ if self.user_token is None or self.data_server_url is None:
+ return
+
+ async with aiohttp.ClientSession() as session:
+ await session.post(
+ f"{self.data_server_url}/event",
+ headers={"Authorization": f"Bearer {self.user_token}"},
+ json={
+ "table_name": table_name,
+ "data": data,
+ "user_token": self.user_token,
+ },
+ )
+
+ create_async_task(
+ _async_helper(self, table_name, data),
+ lambda e: logger.warning(f"Failed to send dev data: {e}"),
+ )
+
+ def _static_columns(self):
+ return {
+ "user_token": self.user_token or "NO_USER_TOKEN",
+ "timestamp": datetime.now().isoformat(),
+ }
+
+ def _to_local(self, table_name: str, data: Dict[str, Any]):
+ filepath = getDevDataFilePath(table_name)
+ with open(filepath, "a") as f:
+ json_line = json.dumps(data)
+ f.write(f"{json_line}\n")
+
+ def capture(self, table_name: str, data: Dict[str, Any]):
+ try:
+ data = {**self._static_columns(), **data}
+ self._to_data_server(table_name, data)
+ self._to_local(table_name, data)
+ except Exception as e:
+ logger.warning(f"Failed to capture dev data: {e}")
+
+
+dev_data_logger = DevDataLogger()
diff --git a/server/continuedev/libs/util/edit_config.py b/server/continuedev/libs/util/edit_config.py
new file mode 100644
index 00000000..4dc427d2
--- /dev/null
+++ b/server/continuedev/libs/util/edit_config.py
@@ -0,0 +1,149 @@
+import threading
+from typing import Any, Dict, List
+
+import redbaron
+
+from .paths import getConfigFilePath
+
+
+def get_config_source():
+ config_file_path = getConfigFilePath()
+ with open(config_file_path, "r") as file:
+ source_code = file.read()
+ return source_code
+
+
+def load_red():
+ source_code = get_config_source()
+
+ red = redbaron.RedBaron(source_code)
+ return red
+
+
+def get_config_node(red):
+ for node in red:
+ if node.type == "assignment" and node.target.value == "config":
+ return node
+ else:
+ raise Exception("Config file appears to be improperly formatted")
+
+
+def edit_property(
+ args: redbaron.RedBaron, key_path: List[str], value: redbaron.RedBaron
+):
+ for i in range(len(args)):
+ node = args[i]
+ if node.type != "call_argument":
+ continue
+
+ if node.target.value == key_path[0]:
+ if len(key_path) > 1:
+ edit_property(node.value.value[1].value, key_path[1:], value)
+ else:
+ args[i].value = value
+ return
+
+
+edit_lock = threading.Lock()
+
+
+def edit_config_property(key_path: List[str], value: redbaron.RedBaron):
+ with edit_lock:
+ red = load_red()
+ config = get_config_node(red)
+ config_args = config.value.value[1].value
+ edit_property(config_args, key_path, value)
+
+ with open(getConfigFilePath(), "w") as file:
+ file.write(red.dumps())
+
+
+def add_config_import(line: str):
+ # check if the import already exists
+ source = get_config_source()
+ if line in source:
+ return
+
+ with edit_lock:
+ red = load_red()
+ # if it doesn't exist, add it
+ red.insert(1, line)
+
+ with open(getConfigFilePath(), "w") as file:
+ file.write(red.dumps())
+
+
+filtered_attrs = {
+ "class_name",
+ "name",
+ "llm",
+}
+
+filtered_attrs_when_new = {"timeout", "prompt_templates"}
+
+
+def escape_string(string: str) -> str:
+ return string.replace('"', '\\"').replace("'", "\\'")
+
+
+def display_val(v: Any, k: str = None):
+ if k == "template_messages":
+ return v
+
+ if isinstance(v, str):
+ return f'"{escape_string(v)}"'
+ return str(v)
+
+
+def is_default(llm, k, v):
+ if k == "template_messages" and llm.__fields__[k].default is not None:
+ return llm.__fields__[k].default.__name__ == v
+ return v == llm.__fields__[k].default
+
+
+def display_llm_class(llm, new: bool = False):
+ sep = ",\n\t\t\t"
+ args = sep.join(
+ [
+ f"{k}={display_val(v, k)}"
+ for k, v in llm.dict().items()
+ if k not in filtered_attrs and v is not None and not is_default(llm, k, v)
+ ]
+ )
+ return f"{llm.__class__.__name__}(\n\t\t\t{args}\n\t\t)"
+
+
+def create_obj_node(
+ class_name: str, args: Dict[str, str], tabs: int = 1
+) -> redbaron.RedBaron:
+ args = [f"{key}={value}" for key, value in args.items()]
+ t = "\t" * tabs
+ new_line = "\n\t" + t
+ sep = "," + new_line
+
+ return redbaron.RedBaron(f"{class_name}({new_line}{sep.join(args)}\n{t})")[0]
+
+
+def create_string_node(string: str) -> redbaron.RedBaron:
+ string = escape_string(string)
+ if "\n" in string:
+ return redbaron.RedBaron(f'"""{string}"""')[0]
+ return redbaron.RedBaron(f'"{string}"')[0]
+
+
+def create_literal_node(literal: str) -> redbaron.RedBaron:
+ return redbaron.RedBaron(literal)[0]
+
+
+def create_float_node(float: float) -> redbaron.RedBaron:
+ return redbaron.RedBaron(f"{float}")[0]
+
+
+# Example:
+# edit_config_property(
+# [
+# "models",
+# "default",
+# ],
+# create_obj_node("OpenAI", {"api_key": '""', "model": '"gpt-4"'}),
+# )
diff --git a/server/continuedev/libs/util/errors.py b/server/continuedev/libs/util/errors.py
new file mode 100644
index 00000000..46074cfc
--- /dev/null
+++ b/server/continuedev/libs/util/errors.py
@@ -0,0 +1,2 @@
+class SessionNotFound(Exception):
+ pass
diff --git a/server/continuedev/libs/util/filter_files.py b/server/continuedev/libs/util/filter_files.py
new file mode 100644
index 00000000..6ebaa274
--- /dev/null
+++ b/server/continuedev/libs/util/filter_files.py
@@ -0,0 +1,33 @@
+import fnmatch
+from typing import List
+
+DEFAULT_IGNORE_DIRS = [
+ ".git",
+ ".vscode",
+ ".idea",
+ ".vs",
+ ".venv",
+ "env",
+ ".env",
+ "node_modules",
+ "dist",
+ "build",
+ "target",
+ "out",
+ "bin",
+ ".pytest_cache",
+ ".vscode-test",
+ ".continue",
+ "__pycache__",
+]
+
+DEFAULT_IGNORE_PATTERNS = DEFAULT_IGNORE_DIRS + list(
+ filter(lambda d: f"**/{d}", DEFAULT_IGNORE_DIRS)
+)
+
+
+def should_filter_path(
+ path: str, ignore_patterns: List[str] = DEFAULT_IGNORE_PATTERNS
+) -> bool:
+ """Returns whether a file should be filtered"""
+ return any(fnmatch.fnmatch(path, pattern) for pattern in ignore_patterns)
diff --git a/server/continuedev/libs/util/logging.py b/server/continuedev/libs/util/logging.py
new file mode 100644
index 00000000..a4dc3562
--- /dev/null
+++ b/server/continuedev/libs/util/logging.py
@@ -0,0 +1,47 @@
+import logging
+import os
+
+from .paths import getLogFilePath
+
+logfile_path = getLogFilePath()
+
+try:
+ # Truncate the logs that are more than a day old
+ if os.path.exists(logfile_path) and os.path.getsize(logfile_path) > 32 * 1024:
+ tail = None
+ with open(logfile_path, "rb") as f:
+ f.seek(-32 * 1024, os.SEEK_END)
+ tail = f.read().decode("utf-8")
+
+ if tail is not None:
+ with open(logfile_path, "w") as f:
+ f.write(tail)
+
+except Exception as e:
+ print("Error truncating log file: {}".format(e))
+
+# Create a logger
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+# Create a file handler
+file_handler = logging.FileHandler(logfile_path)
+file_handler.setLevel(logging.DEBUG)
+
+# Create a console handler
+console_handler = logging.StreamHandler()
+console_handler.setLevel(logging.DEBUG)
+
+# Create a formatter
+formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] %(message)s")
+
+# Add the formatter to the handlers
+file_handler.setFormatter(formatter)
+console_handler.setFormatter(formatter)
+
+# Add the handlers to the logger
+logger.addHandler(file_handler)
+logger.addHandler(console_handler)
+
+# Log a test message
+logger.debug("------ Begin Logs ------")
diff --git a/server/continuedev/libs/util/map_path.py b/server/continuedev/libs/util/map_path.py
new file mode 100644
index 00000000..1dddc2e9
--- /dev/null
+++ b/server/continuedev/libs/util/map_path.py
@@ -0,0 +1,16 @@
+from pathlib import Path
+
+
+def map_path(path: str, orig_root: str, copy_root: str) -> Path:
+ path = Path(path)
+ if path.is_relative_to(orig_root):
+ if path.is_absolute():
+ return Path(copy_root) / path.relative_to(orig_root)
+ else:
+ return path
+ else:
+ if path.is_absolute():
+ return path
+ else:
+ # For this one, you need to know the directory from which the relative path is being used.
+ return Path(orig_root) / path
diff --git a/server/continuedev/libs/util/paths.py b/server/continuedev/libs/util/paths.py
new file mode 100644
index 00000000..22e4b5b9
--- /dev/null
+++ b/server/continuedev/libs/util/paths.py
@@ -0,0 +1,148 @@
+import os
+import re
+from typing import Optional
+
+from ..constants.default_config import default_config
+from ..constants.main import (
+ CONTINUE_GLOBAL_FOLDER,
+ CONTINUE_SERVER_FOLDER,
+ CONTINUE_SESSIONS_FOLDER,
+)
+
+
+def find_data_file(filename):
+ datadir = os.path.dirname(__file__)
+ return os.path.abspath(os.path.join(datadir, filename))
+
+
+def getGlobalFolderPath():
+ path = os.path.join(os.path.expanduser("~"), CONTINUE_GLOBAL_FOLDER)
+ os.makedirs(path, exist_ok=True)
+ return path
+
+
+def getSessionsFolderPath():
+ path = os.path.join(getGlobalFolderPath(), CONTINUE_SESSIONS_FOLDER)
+ os.makedirs(path, exist_ok=True)
+ return path
+
+
+def getServerFolderPath():
+ path = os.path.join(getGlobalFolderPath(), CONTINUE_SERVER_FOLDER)
+ os.makedirs(path, exist_ok=True)
+ return path
+
+
+def getDevDataFolderPath():
+ path = os.path.join(getGlobalFolderPath(), "dev_data")
+ os.makedirs(path, exist_ok=True)
+ return path
+
+
+def getDiffsFolderPath():
+ path = os.path.join(getGlobalFolderPath(), "diffs")
+ os.makedirs(path, exist_ok=True)
+ return path
+
+
+def getDevDataFilePath(table_name: str):
+ filepath = os.path.join(getDevDataFolderPath(), f"{table_name}.jsonl")
+ if not os.path.exists(filepath):
+ with open(filepath, "w") as f:
+ f.write("")
+
+ return filepath
+
+
+def getMeilisearchExePath():
+ binary_name = "meilisearch.exe" if os.name == "nt" else "meilisearch"
+ path = os.path.join(getServerFolderPath(), binary_name)
+ return path
+
+
+def getSessionFilePath(session_id: str):
+ path = os.path.join(getSessionsFolderPath(), f"{session_id}.json")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ return path
+
+
+def getSessionsListFilePath():
+ path = os.path.join(getSessionsFolderPath(), "sessions.json")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ if not os.path.exists(path):
+ with open(path, "w") as f:
+ f.write("[]")
+ return path
+
+
+def migrateConfigFile(existing: str) -> Optional[str]:
+ if existing.strip() == "":
+ return default_config
+
+ migrated = (
+ existing.replace("MaybeProxyOpenAI", "OpenAIFreeTrial")
+ .replace("maybe_proxy_openai", "openai_free_trial")
+ .replace("unused=", "saved=")
+ .replace("medium=", "summarize=")
+ )
+ if migrated != existing:
+ return migrated
+
+ return None
+
+
+def getConfigFilePath() -> str:
+ path = os.path.join(getGlobalFolderPath(), "config.py")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+
+ if not os.path.exists(path):
+ with open(path, "w") as f:
+ f.write(default_config)
+ else:
+ # Make any necessary migrations
+ with open(path, "r") as f:
+ existing_content = f.read()
+
+ migrated = migrateConfigFile(existing_content)
+
+ if migrated is not None:
+ with open(path, "w") as f:
+ f.write(migrated)
+
+ return path
+
+
+def convertConfigImports(shorten: bool) -> str:
+ path = getConfigFilePath()
+ # Make any necessary migrations
+ with open(path, "r") as f:
+ existing_content = f.read()
+
+ if shorten:
+ migrated = existing_content.replace(
+ "from continuedev.src.continuedev.", "from continuedev."
+ )
+ else:
+ migrated = re.sub(
+ r"(?<!src\.)continuedev\.(?!src)",
+ "continuedev.",
+ existing_content,
+ )
+
+ with open(path, "w") as f:
+ f.write(migrated)
+
+
+def getLogFilePath():
+ path = os.path.join(getGlobalFolderPath(), "continue.log")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ return path
+
+
+def getSavedContextGroupsPath():
+ path = os.path.join(getGlobalFolderPath(), "saved_context_groups.json")
+ os.makedirs(os.path.dirname(path), exist_ok=True)
+ if not os.path.exists(path):
+ with open(path, "w") as f:
+ f.write("\{\}")
+ return path
diff --git a/server/continuedev/libs/util/queue.py b/server/continuedev/libs/util/queue.py
new file mode 100644
index 00000000..e1f98cc6
--- /dev/null
+++ b/server/continuedev/libs/util/queue.py
@@ -0,0 +1,17 @@
+import asyncio
+from typing import Dict
+
+
+class AsyncSubscriptionQueue:
+ # The correct way to do this is probably to keep request IDs
+ queues: Dict[str, asyncio.Queue] = {}
+
+ def post(self, messageType: str, data: any):
+ if messageType not in self.queues:
+ self.queues.update({messageType: asyncio.Queue()})
+ self.queues[messageType].put_nowait(data)
+
+ async def get(self, message_type: str) -> any:
+ if message_type not in self.queues:
+ self.queues.update({message_type: asyncio.Queue()})
+ return await self.queues[message_type].get()
diff --git a/server/continuedev/libs/util/ripgrep.py b/server/continuedev/libs/util/ripgrep.py
new file mode 100644
index 00000000..f7e0af9a
--- /dev/null
+++ b/server/continuedev/libs/util/ripgrep.py
@@ -0,0 +1,25 @@
+import os
+
+
+def get_rg_path():
+ if os.name == "nt":
+ paths_to_try = [
+ f"C:\\Users\\{os.getlogin()}\\AppData\\Local\\Programs\\Microsoft VS Code\\resources\\app\\node_modules.asar.unpacked\\@vscode\\ripgrep\\bin\\rg.exe",
+ f"C:\\Users\\{os.getlogin()}\\AppData\\Local\\Programs\\Microsoft VS Code\\resources\\app\\node_modules.asar.unpacked\\vscode-ripgrep\\bin\\rg.exe",
+ ]
+ for path in paths_to_try:
+ if os.path.exists(path):
+ rg_path = path
+ break
+ elif os.name == "posix":
+ if "darwin" in os.sys.platform:
+ rg_path = "/Applications/Visual Studio Code.app/Contents/Resources/app/node_modules.asar.unpacked/@vscode/ripgrep/bin/rg"
+ else:
+ rg_path = "/usr/share/code/resources/app/node_modules.asar.unpacked/vscode-ripgrep/bin/rg"
+ else:
+ rg_path = "rg"
+
+ if not os.path.exists(rg_path):
+ rg_path = "rg"
+
+ return rg_path
diff --git a/server/continuedev/libs/util/step_name_to_steps.py b/server/continuedev/libs/util/step_name_to_steps.py
new file mode 100644
index 00000000..25fd8ba3
--- /dev/null
+++ b/server/continuedev/libs/util/step_name_to_steps.py
@@ -0,0 +1,47 @@
+from typing import Dict
+
+from ...core.main import Step
+from ...core.steps import UserInputStep
+from ...libs.util.logging import logger
+from ...plugins.recipes.AddTransformRecipe.main import AddTransformRecipe
+from ...plugins.recipes.CreatePipelineRecipe.main import CreatePipelineRecipe
+from ...plugins.recipes.DDtoBQRecipe.main import DDtoBQRecipe
+from ...plugins.recipes.DeployPipelineAirflowRecipe.main import (
+ DeployPipelineAirflowRecipe,
+)
+from ...plugins.steps.chat import SimpleChatStep
+from ...plugins.steps.clear_history import ClearHistoryStep
+from ...plugins.steps.comment_code import CommentCodeStep
+from ...plugins.steps.feedback import FeedbackStep
+from ...plugins.steps.help import HelpStep
+from ...plugins.steps.main import EditHighlightedCodeStep
+from ...plugins.steps.on_traceback import DefaultOnTracebackStep
+from ...plugins.steps.open_config import OpenConfigStep
+
+# This mapping is used to convert from string in ContinueConfig json to corresponding Step class.
+# Used for example in slash_commands and steps_on_startup
+step_name_to_step_class = {
+ "UserInputStep": UserInputStep,
+ "EditHighlightedCodeStep": EditHighlightedCodeStep,
+ "SimpleChatStep": SimpleChatStep,
+ "CommentCodeStep": CommentCodeStep,
+ "FeedbackStep": FeedbackStep,
+ "AddTransformRecipe": AddTransformRecipe,
+ "CreatePipelineRecipe": CreatePipelineRecipe,
+ "DDtoBQRecipe": DDtoBQRecipe,
+ "DeployPipelineAirflowRecipe": DeployPipelineAirflowRecipe,
+ "DefaultOnTracebackStep": DefaultOnTracebackStep,
+ "ClearHistoryStep": ClearHistoryStep,
+ "OpenConfigStep": OpenConfigStep,
+ "HelpStep": HelpStep,
+}
+
+
+def get_step_from_name(step_name: str, params: Dict) -> Step:
+ try:
+ return step_name_to_step_class[step_name](**params)
+ except:
+ logger.error(
+ f"Incorrect parameters for step {step_name}. Parameters provided were: {params}"
+ )
+ raise
diff --git a/server/continuedev/libs/util/strings.py b/server/continuedev/libs/util/strings.py
new file mode 100644
index 00000000..f2b6035f
--- /dev/null
+++ b/server/continuedev/libs/util/strings.py
@@ -0,0 +1,64 @@
+from typing import Tuple
+
+
+def dedent_and_get_common_whitespace(s: str) -> Tuple[str, str]:
+ lines = s.splitlines()
+ if len(lines) == 0:
+ return "", ""
+
+ # Longest common whitespace prefix
+ lcp = lines[0].split(lines[0].strip())[0]
+ # Iterate through the lines
+ for i in range(1, len(lines)):
+ # Empty lines are wildcards
+ if lines[i].strip() == "":
+ continue # hey that's us!
+ # Iterate through the leading whitespace characters of the current line
+ for j in range(0, len(lcp)):
+ # If it doesn't have the same whitespace as lcp, then update lcp
+ if j >= len(lines[i]) or lcp[j] != lines[i][j]:
+ lcp = lcp[:j]
+ if lcp == "":
+ return s, ""
+ break
+
+ return "\n".join(map(lambda x: x.lstrip(lcp), lines)), lcp
+
+
+def strip_code_block(s: str) -> str:
+ """
+ Strips the code block from a string, if it has one.
+ """
+ if s.startswith("```\n") and s.endswith("\n```"):
+ return s[4:-4]
+ elif s.startswith("```") and s.endswith("```"):
+ return s[3:-3]
+ elif s.startswith("`") and s.endswith("`"):
+ return s[1:-1]
+ return s
+
+
+def remove_quotes_and_escapes(output: str) -> str:
+ """
+ Clean up the output of the completion API, removing unnecessary escapes and quotes
+ """
+ output = output.strip()
+
+ # Replace smart quotes
+ output = output.replace("“", '"')
+ output = output.replace("”", '"')
+ output = output.replace("‘", "'")
+ output = output.replace("’", "'")
+
+ # Remove escapes
+ output = output.replace('\\"', '"')
+ output = output.replace("\\'", "'")
+ output = output.replace("\\n", "\n")
+ output = output.replace("\\t", "\t")
+ output = output.replace("\\\\", "\\")
+ if (output.startswith('"') and output.endswith('"')) or (
+ output.startswith("'") and output.endswith("'")
+ ):
+ output = output[1:-1]
+
+ return output
diff --git a/server/continuedev/libs/util/telemetry.py b/server/continuedev/libs/util/telemetry.py
new file mode 100644
index 00000000..1772fe20
--- /dev/null
+++ b/server/continuedev/libs/util/telemetry.py
@@ -0,0 +1,108 @@
+import os
+import socket
+from typing import Any, Dict, Optional
+
+from dotenv import load_dotenv
+
+from ..constants.main import CONTINUE_SERVER_VERSION_FILE
+from .commonregex import clean_pii_from_any
+from .paths import getServerFolderPath
+
+load_dotenv()
+in_codespaces = os.getenv("CODESPACES") == "true"
+POSTHOG_API_KEY = "phc_JS6XFROuNbhJtVCEdTSYk6gl5ArRrTNMpCcguAXlSPs"
+
+
+def is_connected():
+ try:
+ # connect to the host -- tells us if the host is actually reachable
+ socket.create_connection(("www.google.com", 80))
+ return True
+ except OSError:
+ pass
+ return False
+
+
+class PostHogLogger:
+ unique_id: str = "NO_UNIQUE_ID"
+ allow_anonymous_telemetry: bool = False
+ ide_info: Optional[Dict] = None
+ posthog = None
+
+ def __init__(self, api_key: str):
+ self.api_key = api_key
+
+ def setup(
+ self, unique_id: str, allow_anonymous_telemetry: bool, ide_info: Optional[Dict]
+ ):
+ self.unique_id = unique_id or "NO_UNIQUE_ID"
+ self.allow_anonymous_telemetry = allow_anonymous_telemetry or False
+ self.ide_info = ide_info
+
+ # Capture initial event
+ self.capture_event("session_start", {"os": os.name})
+
+ def capture_event(self, event_name: str, event_properties: Any):
+ """Safely capture event. Telemetry should never be the reason Continue doesn't work"""
+ try:
+ self._capture_event(event_name, event_properties)
+ except Exception as e:
+ print(f"Failed to capture event: {e}")
+ pass
+
+ _found_disconnected: bool = False
+
+ def _capture_event(self, event_name: str, event_properties: Any):
+ # logger.debug(
+ # f"Logging to PostHog: {event_name} ({self.unique_id}, {self.allow_anonymous_telemetry}): {event_properties}")
+ telemetry_path = os.path.expanduser("~/.continue/telemetry.log")
+
+ # Make sure the telemetry file exists
+ if not os.path.exists(telemetry_path):
+ os.makedirs(os.path.dirname(telemetry_path), exist_ok=True)
+ open(telemetry_path, "w").close()
+
+ with open(telemetry_path, "a") as f:
+ str_to_write = f"{event_name}: {event_properties}\n{self.unique_id}\n{self.allow_anonymous_telemetry}\n\n"
+ f.write(str_to_write)
+
+ if not self.allow_anonymous_telemetry:
+ return
+
+ # Clean PII from event properties
+ event_properties = clean_pii_from_any(event_properties)
+
+ # Add additional properties that are on every event
+ if in_codespaces:
+ event_properties["codespaces"] = True
+
+ server_version_file = os.path.join(
+ getServerFolderPath(), CONTINUE_SERVER_VERSION_FILE
+ )
+ if os.path.exists(server_version_file):
+ with open(server_version_file, "r") as f:
+ event_properties["server_version"] = f.read()
+
+ # Add operating system
+ event_properties["os"] = os.name
+ if self.ide_info:
+ event_properties["ide_name"] = self.ide_info.get("name", None)
+ event_properties["ide_version"] = self.ide_info.get("version", None)
+ event_properties["ide_remote_name"] = self.ide_info.get("remoteName", None)
+
+ # Send event to PostHog
+ if self.posthog is None:
+ from posthog import Posthog
+
+ # The personal API key is necessary only if you want to use local evaluation of feature flags.
+ self.posthog = Posthog(self.api_key, host="https://app.posthog.com")
+
+ if is_connected():
+ self.posthog.capture(self.unique_id, event_name, event_properties)
+ else:
+ if not self._found_disconnected:
+ self._found_disconnected = True
+ raise ConnectionError("No internet connection")
+
+
+posthog_logger = PostHogLogger(api_key=POSTHOG_API_KEY)
diff --git a/server/continuedev/libs/util/templating.py b/server/continuedev/libs/util/templating.py
new file mode 100644
index 00000000..8d6a32fc
--- /dev/null
+++ b/server/continuedev/libs/util/templating.py
@@ -0,0 +1,76 @@
+import os
+from typing import Callable, Dict, List, Union
+
+import chevron
+
+from ...core.main import ChatMessage
+
+
+def get_vars_in_template(template):
+ """
+ Get the variables in a template
+ """
+ return [
+ token[1]
+ for token in chevron.tokenizer.tokenize(template)
+ if token[0] == "variable"
+ ]
+
+
+def escape_var(var: str) -> str:
+ """
+ Escape a variable so it can be used in a template
+ """
+ return var.replace(os.path.sep, "").replace(".", "")
+
+
+def render_templated_string(template: str) -> str:
+ """
+ Render system message or other templated string with mustache syntax.
+ Right now it only supports rendering absolute file paths as their contents.
+ """
+ vars = get_vars_in_template(template)
+
+ args = {}
+ for var in vars:
+ if var.startswith(os.path.sep):
+ # Escape vars which are filenames, because mustache doesn't allow / in variable names
+ escaped_var = escape_var(var)
+ template = template.replace(var, escaped_var)
+
+ if os.path.exists(var):
+ args[escaped_var] = open(var, "r").read()
+ else:
+ args[escaped_var] = ""
+
+ return chevron.render(template, args)
+
+
+"""
+A PromptTemplate can either be a template string (mustache syntax, e.g. {{user_input}}) or
+a function which takes the history and a dictionary of additional key-value pairs and returns
+either a string or a list of ChatMessages.
+If a string is returned, it will be assumed that the chat history should be ignored
+"""
+PromptTemplate = Union[
+ str, Callable[[ChatMessage, Dict[str, str]], Union[str, List[ChatMessage]]]
+]
+
+
+def render_prompt_template(
+ template: PromptTemplate, history: List[ChatMessage], other_data: Dict[str, str]
+) -> str:
+ """
+ Render a prompt template.
+ """
+ if isinstance(template, str):
+ data = {
+ "history": history,
+ **other_data,
+ }
+ if len(history) > 0 and history[0].role == "system":
+ data["system_message"] = history.pop(0).content
+
+ return chevron.render(template, data)
+ else:
+ return template(history, other_data)
diff --git a/server/continuedev/libs/util/traceback/traceback_parsers.py b/server/continuedev/libs/util/traceback/traceback_parsers.py
new file mode 100644
index 00000000..58a4f728
--- /dev/null
+++ b/server/continuedev/libs/util/traceback/traceback_parsers.py
@@ -0,0 +1,56 @@
+from boltons import tbutils
+
+from ....models.main import Traceback
+
+PYTHON_TRACEBACK_PREFIX = "Traceback (most recent call last):"
+
+
+def get_python_traceback(output: str) -> str:
+ if PYTHON_TRACEBACK_PREFIX in output:
+ tb_string = output.split(PYTHON_TRACEBACK_PREFIX)[-1]
+
+ # Then need to remove any lines below the traceback. Do this by noticing that
+ # the last line of the traceback is the first (other than they prefix) that doesn't begin with whitespace
+ lines = list(filter(lambda x: x.strip() != "", tb_string.splitlines()))
+ for i in range(len(lines) - 1):
+ if not lines[i].startswith(" "):
+ tb_string = "\n".join(lines[: i + 1])
+ break
+
+ return PYTHON_TRACEBACK_PREFIX + "\n" + tb_string
+ elif "SyntaxError" in output:
+ return "SyntaxError" + output.split("SyntaxError")[-1]
+ else:
+ return None
+
+
+def get_javascript_traceback(output: str) -> str:
+ lines = output.splitlines()
+ first_line = None
+ for i in range(len(lines) - 1):
+ segs = lines[i].split(":")
+ if (
+ len(segs) > 1
+ and segs[0] != ""
+ and segs[1].startswith(" ")
+ and lines[i + 1].strip().startswith("at")
+ ):
+ first_line = lines[i]
+ break
+
+ if first_line is not None:
+ return "\n".join(lines[lines.index(first_line) :])
+ else:
+ return None
+
+
+def parse_python_traceback(tb_string: str) -> Traceback:
+ # Remove anchor lines - tbutils doesn't always get them right
+ tb_string = "\n".join(
+ filter(
+ lambda x: x.strip().replace("~", "").replace("^", "") != "",
+ tb_string.splitlines(),
+ )
+ )
+ exc = tbutils.ParsedException.from_string(tb_string)
+ return Traceback.from_tbutil_parsed_exc(exc)