diff options
author | Nate Sesti <sestinj@gmail.com> | 2023-06-03 20:30:57 -0400 |
---|---|---|
committer | Nate Sesti <sestinj@gmail.com> | 2023-06-03 20:30:57 -0400 |
commit | 3759065cd387ad3d188b76923936ca93a8212178 (patch) | |
tree | b828ed158201a007ce641bb51eff6d347d685701 | |
parent | 552c429d11bd693a1a7d9b5d55a7c84c2383d1e7 (diff) | |
download | sncontinue-3759065cd387ad3d188b76923936ca93a8212178.tar.gz sncontinue-3759065cd387ad3d188b76923936ca93a8212178.tar.bz2 sncontinue-3759065cd387ad3d188b76923936ca93a8212178.zip |
polishing embeddings steps
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | continuedev/src/continuedev/core/policy.py | 2 | ||||
-rw-r--r-- | continuedev/src/continuedev/libs/chroma/query.py | 17 | ||||
-rw-r--r-- | continuedev/src/continuedev/steps/chroma.py | 10 |
4 files changed, 21 insertions, 10 deletions
@@ -136,3 +136,5 @@ notes.txt cached_embeddings.pkl .ruff_cache codeql + +**/.continue
\ No newline at end of file diff --git a/continuedev/src/continuedev/core/policy.py b/continuedev/src/continuedev/core/policy.py index 6f357ba0..926cc624 100644 --- a/continuedev/src/continuedev/core/policy.py +++ b/continuedev/src/continuedev/core/policy.py @@ -17,7 +17,7 @@ class DemoPolicy(Policy): def next(self, history: History) -> Step: # At the very start, run initial Steps spcecified in the config if history.get_current() is None: - return CreateCodebaseIndexChroma() >> MessageStep(message="Welcome to Continue!") >> StepsOnStartupStep() + return MessageStep(message="Welcome to Continue!") >> CreateCodebaseIndexChroma() >> StepsOnStartupStep() observation = history.get_current().observation if observation is not None and isinstance(observation, UserInputObservation): diff --git a/continuedev/src/continuedev/libs/chroma/query.py b/continuedev/src/continuedev/libs/chroma/query.py index 1cb178cc..c27329f0 100644 --- a/continuedev/src/continuedev/libs/chroma/query.py +++ b/continuedev/src/continuedev/libs/chroma/query.py @@ -38,21 +38,26 @@ class ChromaIndexManager: return None def check_index_exists(self): - return os.path.exists(self.index_dir) + return os.path.exists(os.path.join(self.index_dir, "metadata.json")) def create_codebase_index(self): """Create a new index for the current branch.""" if not self.check_index_exists(): os.makedirs(self.index_dir) + else: + return - print("ROOT DIRECTORY: ", self.git_root_dir) - documents = load_gpt_index_documents(self.git_root_dir) + documents = load_gpt_index_documents(self.workspace_dir) chunks = {} doc_chunks = [] for doc in documents: text_splitter = TokenTextSplitter() - text_chunks = text_splitter.split_text(doc.text) + try: + text_chunks = text_splitter.split_text(doc.text) + except: + print("ERROR (probably found special token): ", doc.text) + continue filename = doc.extra_info["filename"] chunks[filename] = len(text_chunks) for i, text in enumerate(text_chunks): @@ -88,9 +93,9 @@ class ChromaIndexManager: modified_deleted_files = [f for f in modified_deleted_files if f] deleted_files = [ - f for f in modified_deleted_files if not os.path.exists(self.git_root_dir + "/" + f)] + f for f in modified_deleted_files if not os.path.exists(os.path.join(self.workspace_dir, f))] modified_files = [ - f for f in modified_deleted_files if os.path.exists(self.git_root_dir + "/" + f)] + f for f in modified_deleted_files if os.path.exists(os.path.join(self.workspace_dir, f))] return filter_ignored_files(modified_files, self.index_dir), filter_ignored_files(deleted_files, self.index_dir) diff --git a/continuedev/src/continuedev/steps/chroma.py b/continuedev/src/continuedev/steps/chroma.py index 556f7252..7bb9389e 100644 --- a/continuedev/src/continuedev/steps/chroma.py +++ b/continuedev/src/continuedev/steps/chroma.py @@ -13,10 +13,12 @@ class CreateCodebaseIndexChroma(Step): hide: bool = True async def describe(self, llm) -> Coroutine[str, None, None]: - return "Creating a codebase index for the current branch." + return "Indexing the codebase..." async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: index = ChromaIndexManager(await sdk.ide.getWorkspaceDirectory()) + if not index.check_index_exists(): + self.hide = False index.create_codebase_index() @@ -55,7 +57,9 @@ class AnswerQuestionChroma(Step): Here is the answer:""") answer = (await sdk.models.gpt35()).complete(prompt) - print(answer) + # Make paths relative to the workspace directory + answer = answer.replace(await sdk.ide.getWorkspaceDirectory(), "") + self._answer = answer await sdk.ide.setFileOpen(files[0]) @@ -67,7 +71,7 @@ class EditFileChroma(Step): async def run(self, sdk: ContinueSDK) -> Coroutine[Observation, None, None]: index = ChromaIndexManager(await sdk.ide.getWorkspaceDirectory()) - results = index.query_codebase_index(self.question) + results = index.query_codebase_index(self.request) resource_name = list( results.source_nodes[0].node.relationships.values())[0] |