summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTuowen Zhao <ztuowen@gmail.com>2023-10-19 00:04:44 -0700
committerTuowen Zhao <ztuowen@gmail.com>2023-10-19 00:04:44 -0700
commit2128f5fe9386dcf2f0597c8035f951c5b60d7562 (patch)
treeac3ab65a87bd4971275ae91d7b61176eced13774
parent08f38574fa2633bbf709d24e1c79417d4285ba61 (diff)
downloadsncontinue-2128f5fe9386dcf2f0597c8035f951c5b60d7562.tar.gz
sncontinue-2128f5fe9386dcf2f0597c8035f951c5b60d7562.tar.bz2
sncontinue-2128f5fe9386dcf2f0597c8035f951c5b60d7562.zip
cleanup server
-rw-r--r--server/config.py12
-rw-r--r--server/continuedev/core/config.py12
-rw-r--r--server/continuedev/core/models.py31
-rw-r--r--server/continuedev/core/steps.py7
-rw-r--r--server/continuedev/libs/constants/default_config.py12
-rw-r--r--server/continuedev/libs/llm/__init__.py10
-rw-r--r--server/continuedev/libs/llm/anthropic.py74
-rw-r--r--server/continuedev/libs/llm/ggml.py226
-rw-r--r--server/continuedev/libs/llm/google_palm_api.py50
-rw-r--r--server/continuedev/libs/llm/hf_inference_api.py78
-rw-r--r--server/continuedev/libs/llm/hf_tgi.py65
-rw-r--r--server/continuedev/libs/llm/hugging_face.py19
-rw-r--r--server/continuedev/libs/llm/openai.py156
-rw-r--r--server/continuedev/libs/llm/openai_free_trial.py83
-rw-r--r--server/continuedev/libs/llm/replicate.py78
-rw-r--r--server/continuedev/libs/llm/text_gen_interface.py114
-rw-r--r--server/continuedev/libs/llm/together.py125
-rw-r--r--server/continuedev/models/reference/generate.py12
-rw-r--r--server/continuedev/plugins/steps/chat.py198
-rw-r--r--server/continuedev/plugins/steps/setup_model.py10
-rw-r--r--server/tests/util/config.py15
21 files changed, 41 insertions, 1346 deletions
diff --git a/server/config.py b/server/config.py
index 29c05a6c..0ac22cb4 100644
--- a/server/config.py
+++ b/server/config.py
@@ -6,7 +6,7 @@ See https://continue.dev/docs/customization to for documentation of the availabl
from continuedev.core.models import Models
from continuedev.core.config import CustomCommand, SlashCommand, ContinueConfig
-from continuedev.libs.llm import OpenAIFreeTrial
+from continuedev.libs.llm import Ollama
from continuedev.plugins.context_providers import (
DiffContextProvider,
@@ -26,8 +26,14 @@ from continuedev.plugins.steps.share_session import ShareSessionStep
config = ContinueConfig(
allow_anonymous_telemetry=True,
models=Models(
- default=OpenAIFreeTrial(api_key="", model="gpt-4"),
- summarize=OpenAIFreeTrial(api_key="", model="gpt-3.5-turbo"),
+ default=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ ),
+ summarize=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ )
),
system_message=None,
temperature=0.5,
diff --git a/server/continuedev/core/config.py b/server/continuedev/core/config.py
index 2bbb42cc..bf555b59 100644
--- a/server/continuedev/core/config.py
+++ b/server/continuedev/core/config.py
@@ -2,7 +2,7 @@ from typing import Dict, List, Optional, Type
from pydantic import BaseModel, Field, validator
-from ..libs.llm.openai_free_trial import OpenAIFreeTrial
+from ..libs.llm import Ollama
from .context import ContextProvider
from .main import Policy, Step
from .models import Models
@@ -48,8 +48,14 @@ class ContinueConfig(BaseModel):
)
models: Models = Field(
Models(
- default=OpenAIFreeTrial(model="gpt-4"),
- summarize=OpenAIFreeTrial(model="gpt-3.5-turbo"),
+ default=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ ),
+ summarize=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ )
),
description="Configuration for the models used by Continue. Read more about how to configure models in the documentation.",
)
diff --git a/server/continuedev/core/models.py b/server/continuedev/core/models.py
index 21ebd8f6..c31177b9 100644
--- a/server/continuedev/core/models.py
+++ b/server/continuedev/core/models.py
@@ -2,18 +2,9 @@ from typing import List, Optional
from pydantic import BaseModel
-from ..libs.llm.anthropic import AnthropicLLM
from ..libs.llm.base import LLM
-from ..libs.llm.ggml import GGML
-from ..libs.llm.google_palm_api import GooglePaLMAPI
-from ..libs.llm.hf_inference_api import HuggingFaceInferenceAPI
-from ..libs.llm.hf_tgi import HuggingFaceTGI
from ..libs.llm.llamacpp import LlamaCpp
from ..libs.llm.ollama import Ollama
-from ..libs.llm.openai import OpenAI
-from ..libs.llm.openai_free_trial import OpenAIFreeTrial
-from ..libs.llm.replicate import ReplicateLLM
-from ..libs.llm.together import TogetherLLM
class ContinueSDK(BaseModel):
@@ -30,32 +21,14 @@ ALL_MODEL_ROLES = [
MODEL_CLASSES = {
cls.__name__: cls
for cls in [
- OpenAI,
- OpenAIFreeTrial,
- GGML,
- TogetherLLM,
- AnthropicLLM,
- ReplicateLLM,
Ollama,
- LlamaCpp,
- HuggingFaceInferenceAPI,
- HuggingFaceTGI,
- GooglePaLMAPI,
+ LlamaCpp
]
}
MODEL_MODULE_NAMES = {
- "OpenAI": "openai",
- "OpenAIFreeTrial": "openai_free_trial",
- "GGML": "ggml",
- "TogetherLLM": "together",
- "AnthropicLLM": "anthropic",
- "ReplicateLLM": "replicate",
"Ollama": "ollama",
- "LlamaCpp": "llamacpp",
- "HuggingFaceInferenceAPI": "hf_inference_api",
- "HuggingFaceTGI": "hf_tgi",
- "GooglePaLMAPI": "google_palm_api",
+ "LlamaCpp": "llamacpp"
}
diff --git a/server/continuedev/core/steps.py b/server/continuedev/core/steps.py
index 5c20dd15..110a4457 100644
--- a/server/continuedev/core/steps.py
+++ b/server/continuedev/core/steps.py
@@ -5,7 +5,6 @@ from textwrap import dedent
from typing import Coroutine, List, Optional, Union
from ..libs.llm.base import LLM
-from ..libs.llm.openai_free_trial import OpenAIFreeTrial
from ..libs.util.count_tokens import DEFAULT_MAX_TOKENS
from ..libs.util.devdata import dev_data_logger
from ..libs.util.strings import (
@@ -229,12 +228,6 @@ class DefaultModelEditCodeStep(Step):
+ max_tokens
)
- # If using 3.5 and overflows, upgrade to 3.5.16k
- if model_to_use.model == "gpt-3.5-turbo":
- if total_tokens > model_to_use.context_length:
- model_to_use = OpenAIFreeTrial(model="gpt-3.5-turbo-0613")
- await sdk.start_model(model_to_use)
-
# Remove tokens from the end first, and then the start to clear space
# This part finds the start and end lines
full_file_contents_lst = full_file_contents.split("\n")
diff --git a/server/continuedev/libs/constants/default_config.py b/server/continuedev/libs/constants/default_config.py
index a007eef1..7cffacbc 100644
--- a/server/continuedev/libs/constants/default_config.py
+++ b/server/continuedev/libs/constants/default_config.py
@@ -7,7 +7,7 @@ See https://continue.dev/docs/customization to for documentation of the availabl
from continuedev.core.models import Models
from continuedev.core.config import CustomCommand, SlashCommand, ContinueConfig
-from continuedev.libs.llm import OpenAIFreeTrial
+from continuedev.libs.llm import Ollama
from continuedev.plugins.context_providers import (
DiffContextProvider,
@@ -27,8 +27,14 @@ from continuedev.plugins.steps.share_session import ShareSessionStep
config = ContinueConfig(
allow_anonymous_telemetry=True,
models=Models(
- default=OpenAIFreeTrial(api_key="", model="gpt-4"),
- summarize=OpenAIFreeTrial(api_key="", model="gpt-3.5-turbo")
+ default=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ ),
+ summarize=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ )
),
system_message=None,
temperature=0.5,
diff --git a/server/continuedev/libs/llm/__init__.py b/server/continuedev/libs/llm/__init__.py
index 829ffede..7ac92059 100644
--- a/server/continuedev/libs/llm/__init__.py
+++ b/server/continuedev/libs/llm/__init__.py
@@ -1,14 +1,4 @@
-from .anthropic import AnthropicLLM # noqa: F401
-from .ggml import GGML # noqa: F401
-from .google_palm_api import GooglePaLMAPI # noqa: F401
-from .hf_inference_api import HuggingFaceInferenceAPI # noqa: F401
-from .hf_tgi import HuggingFaceTGI # noqa: F401
from .llamacpp import LlamaCpp # noqa: F401
from .ollama import Ollama # noqa: F401
-from .openai import OpenAI # noqa: F401
-from .openai_free_trial import OpenAIFreeTrial # noqa: F401
from .proxy_server import ProxyServer # noqa: F401
from .queued import QueuedLLM # noqa: F401
-from .replicate import ReplicateLLM # noqa: F401
-from .text_gen_interface import TextGenUI # noqa: F401
-from .together import TogetherLLM # noqa: F401
diff --git a/server/continuedev/libs/llm/anthropic.py b/server/continuedev/libs/llm/anthropic.py
deleted file mode 100644
index 7d0708f1..00000000
--- a/server/continuedev/libs/llm/anthropic.py
+++ /dev/null
@@ -1,74 +0,0 @@
-from typing import Any, Callable, Coroutine
-
-from anthropic import AI_PROMPT, HUMAN_PROMPT, AsyncAnthropic
-
-from .base import LLM, CompletionOptions
-from .prompts.chat import anthropic_template_messages
-
-
-class AnthropicLLM(LLM):
- """
- Import the `AnthropicLLM` class and set it as the default model:
-
- ```python title="~/.continue/config.py"
- from continuedev.libs.llm.anthropic import AnthropicLLM
-
- config = ContinueConfig(
- ...
- models=Models(
- default=AnthropicLLM(api_key="<API_KEY>", model="claude-2")
- )
- )
- ```
-
- Claude 2 is not yet publicly released. You can request early access [here](https://www.anthropic.com/earlyaccess).
-
- """
-
- api_key: str
- "Anthropic API key"
-
- model: str = "claude-2"
-
- _async_client: AsyncAnthropic = None
-
- template_messages: Callable = anthropic_template_messages
-
- class Config:
- arbitrary_types_allowed = True
-
- async def start(self, **kwargs):
- await super().start(**kwargs)
- self._async_client = AsyncAnthropic(api_key=self.api_key)
-
- if self.model == "claude-2":
- self.context_length = 100_000
-
- def collect_args(self, options: CompletionOptions):
- options.stop = None
- args = super().collect_args(options)
-
- if "max_tokens" in args:
- args["max_tokens_to_sample"] = args["max_tokens"]
- del args["max_tokens"]
- if "frequency_penalty" in args:
- del args["frequency_penalty"]
- if "presence_penalty" in args:
- del args["presence_penalty"]
- return args
-
- async def _stream_complete(self, prompt: str, options):
- args = self.collect_args(options)
- prompt = f"{HUMAN_PROMPT} {prompt} {AI_PROMPT}"
-
- async for chunk in await self._async_client.completions.create(
- prompt=prompt, stream=True, **args
- ):
- yield chunk.completion
-
- async def _complete(self, prompt: str, options) -> Coroutine[Any, Any, str]:
- args = self.collect_args(options)
- prompt = f"{HUMAN_PROMPT} {prompt} {AI_PROMPT}"
- return (
- await self._async_client.completions.create(prompt=prompt, **args)
- ).completion
diff --git a/server/continuedev/libs/llm/ggml.py b/server/continuedev/libs/llm/ggml.py
deleted file mode 100644
index 55d580a8..00000000
--- a/server/continuedev/libs/llm/ggml.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import json
-from typing import Any, Callable, Coroutine, Dict, List, Literal, Optional
-
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from ..util.logging import logger
-from .base import LLM, CompletionOptions
-from .openai import CHAT_MODELS
-from .prompts.chat import llama2_template_messages
-from .prompts.edit import simplified_edit_prompt
-
-
-class GGML(LLM):
- """
- See our [5 minute quickstart](https://github.com/continuedev/ggml-server-example) to run any model locally with ggml. While these models don't yet perform as well, they are free, entirely private, and run offline.
-
- Once the model is running on localhost:8000, change `~/.continue/config.py` to look like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.libs.llm.ggml import GGML
-
- config = ContinueConfig(
- ...
- models=Models(
- default=GGML(
- max_context_length=2048,
- server_url="http://localhost:8000")
- )
- )
- ```
- """
-
- server_url: str = Field(
- "http://localhost:8000",
- description="URL of the OpenAI-compatible server where the model is being served",
- )
- model: str = Field(
- "ggml", description="The name of the model to use (optional for the GGML class)"
- )
-
- api_base: Optional[str] = Field(None, description="OpenAI API base URL.")
-
- api_type: Optional[Literal["azure", "openai"]] = Field(
- None, description="OpenAI API type."
- )
-
- api_version: Optional[str] = Field(
- None, description="OpenAI API version. For use with Azure OpenAI Service."
- )
-
- engine: Optional[str] = Field(
- None, description="OpenAI engine. For use with Azure OpenAI Service."
- )
-
- template_messages: Optional[
- Callable[[List[Dict[str, str]]], str]
- ] = llama2_template_messages
-
- prompt_templates = {
- "edit": simplified_edit_prompt,
- }
-
- class Config:
- arbitrary_types_allowed = True
-
- def get_headers(self):
- headers = {
- "Content-Type": "application/json",
- }
- if self.api_key is not None:
- if self.api_type == "azure":
- headers["api-key"] = self.api_key
- else:
- headers["Authorization"] = f"Bearer {self.api_key}"
-
- return headers
-
- def get_full_server_url(self, endpoint: str):
- endpoint = endpoint.lstrip("/").rstrip("/")
-
- if self.api_type == "azure":
- if self.engine is None or self.api_version is None or self.api_base is None:
- raise Exception(
- "For Azure OpenAI Service, you must specify engine, api_version, and api_base."
- )
-
- return f"{self.api_base}/openai/deployments/{self.engine}/{endpoint}?api-version={self.api_version}"
- else:
- return f"{self.server_url}/v1/{endpoint}"
-
- async def _raw_stream_complete(self, prompt, options):
- args = self.collect_args(options)
-
- async with self.create_client_session() as client_session:
- async with client_session.post(
- self.get_full_server_url(endpoint="completions"),
- json={
- "prompt": prompt,
- "stream": True,
- **args,
- },
- headers=self.get_headers(),
- proxy=self.proxy,
- ) as resp:
- if resp.status != 200:
- raise Exception(
- f"Error calling /chat/completions endpoint: {resp.status}"
- )
-
- async for line in resp.content.iter_any():
- if line:
- chunks = line.decode("utf-8")
- for chunk in chunks.split("\n"):
- if (
- chunk.startswith(": ping - ")
- or chunk.startswith("data: [DONE]")
- or chunk.strip() == ""
- ):
- continue
- elif chunk.startswith("data: "):
- chunk = chunk[6:]
- try:
- j = json.loads(chunk)
- except Exception:
- continue
- if (
- "choices" in j
- and len(j["choices"]) > 0
- and "text" in j["choices"][0]
- ):
- yield j["choices"][0]["text"]
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- args = self.collect_args(options)
-
- async def generator():
- async with self.create_client_session() as client_session:
- async with client_session.post(
- self.get_full_server_url(endpoint="chat/completions"),
- json={"messages": messages, "stream": True, **args},
- headers=self.get_headers(),
- proxy=self.proxy,
- ) as resp:
- if resp.status != 200:
- raise Exception(
- f"Error calling /chat/completions endpoint: {resp.status}"
- )
-
- async for line, end in resp.content.iter_chunks():
- json_chunk = line.decode("utf-8")
- chunks = json_chunk.split("\n")
- for chunk in chunks:
- if (
- chunk.strip() == ""
- or json_chunk.startswith(": ping - ")
- or json_chunk.startswith("data: [DONE]")
- ):
- continue
- try:
- yield json.loads(chunk[6:])["choices"][0]["delta"]
- except:
- pass
-
- # Because quite often the first attempt fails, and it works thereafter
- try:
- async for chunk in generator():
- yield chunk
- except Exception as e:
- logger.warning(f"Error calling /chat/completions endpoint: {e}")
- async for chunk in generator():
- yield chunk
-
- async def _raw_complete(self, prompt: str, options) -> Coroutine[Any, Any, str]:
- args = self.collect_args(options)
-
- async with self.create_client_session() as client_session:
- async with client_session.post(
- self.get_full_server_url(endpoint="completions"),
- json={
- "prompt": prompt,
- **args,
- },
- headers=self.get_headers(),
- proxy=self.proxy,
- ) as resp:
- if resp.status != 200:
- raise Exception(
- f"Error calling /chat/completions endpoint: {resp.status}"
- )
-
- text = await resp.text()
- try:
- completion = json.loads(text)["choices"][0]["text"]
- return completion
- except Exception as e:
- raise Exception(
- f"Error calling /completion endpoint: {e}\n\nResponse text: {text}"
- )
-
- async def _complete(self, prompt: str, options: CompletionOptions):
- completion = ""
- if self.model in CHAT_MODELS:
- async for chunk in self._stream_chat(
- [{"role": "user", "content": prompt}], options
- ):
- if "content" in chunk:
- completion += chunk["content"]
-
- else:
- async for chunk in self._raw_stream_complete(prompt, options):
- completion += chunk
-
- return completion
-
- async def _stream_complete(self, prompt, options: CompletionOptions):
- if self.model in CHAT_MODELS:
- async for chunk in self._stream_chat(
- [{"role": "user", "content": prompt}], options
- ):
- if "content" in chunk:
- yield chunk["content"]
-
- else:
- async for chunk in self._raw_stream_complete(prompt, options):
- yield chunk
diff --git a/server/continuedev/libs/llm/google_palm_api.py b/server/continuedev/libs/llm/google_palm_api.py
deleted file mode 100644
index 3379fefe..00000000
--- a/server/continuedev/libs/llm/google_palm_api.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from typing import List
-
-import requests
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from .base import LLM
-
-
-class GooglePaLMAPI(LLM):
- """
- The Google PaLM API is currently in public preview, so production applications are not supported yet. However, you can [create an API key in Google MakerSuite](https://makersuite.google.com/u/2/app/apikey) and begin trying out the `chat-bison-001` model. Change `~/.continue/config.py` to look like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.core.models import Models
- from continuedev.libs.llm.hf_inference_api import GooglePaLMAPI
-
- config = ContinueConfig(
- ...
- models=Models(
- default=GooglePaLMAPI(
- model="chat-bison-001"
- api_key="<MAKERSUITE_API_KEY>",
- )
- )
- ```
- """
-
- api_key: str = Field(..., description="Google PaLM API key")
-
- model: str = "chat-bison-001"
-
- async def _stream_complete(self, prompt, options):
- api_url = f"https://generativelanguage.googleapis.com/v1beta2/models/{self.model}:generateMessage?key={self.api_key}"
- body = {"prompt": {"messages": [{"content": prompt}]}}
- response = requests.post(api_url, json=body)
- yield response.json()["candidates"][0]["content"]
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- msg_lst = []
- for message in messages:
- msg_lst.append({"content": message["content"]})
-
- api_url = f"https://generativelanguage.googleapis.com/v1beta2/models/{self.model}:generateMessage?key={self.api_key}"
- body = {"prompt": {"messages": msg_lst}}
- response = requests.post(api_url, json=body)
- yield {
- "content": response.json()["candidates"][0]["content"],
- "role": "assistant",
- }
diff --git a/server/continuedev/libs/llm/hf_inference_api.py b/server/continuedev/libs/llm/hf_inference_api.py
deleted file mode 100644
index 990ec7c8..00000000
--- a/server/continuedev/libs/llm/hf_inference_api.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from typing import Callable, Dict, List, Union
-
-from huggingface_hub import InferenceClient
-from pydantic import Field
-
-from .base import LLM, CompletionOptions
-from .prompts.chat import llama2_template_messages
-from .prompts.edit import simplified_edit_prompt
-
-
-class HuggingFaceInferenceAPI(LLM):
- """
- Hugging Face Inference API is a great option for newly released language models. Sign up for an account and add billing [here](https://huggingface.co/settings/billing), access the Inference Endpoints [here](https://ui.endpoints.huggingface.co), click on “New endpoint”, and fill out the form (e.g. select a model like [WizardCoder-Python-34B-V1.0](https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0)), and then deploy your model by clicking “Create Endpoint”. Change `~/.continue/config.py` to look like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.core.models import Models
- from continuedev.libs.llm.hf_inference_api import HuggingFaceInferenceAPI
-
- config = ContinueConfig(
- ...
- models=Models(
- default=HuggingFaceInferenceAPI(
- endpoint_url="<INFERENCE_API_ENDPOINT_URL>",
- hf_token="<HUGGING_FACE_TOKEN>",
- )
- )
- ```
- """
-
- model: str = Field(
- "Hugging Face Inference API",
- description="The name of the model to use (optional for the HuggingFaceInferenceAPI class)",
- )
- hf_token: str = Field(..., description="Your Hugging Face API token")
- endpoint_url: str = Field(
- None, description="Your Hugging Face Inference API endpoint URL"
- )
-
- template_messages: Union[
- Callable[[List[Dict[str, str]]], str], None
- ] = llama2_template_messages
-
- prompt_templates = {
- "edit": simplified_edit_prompt,
- }
-
- class Config:
- arbitrary_types_allowed = True
-
- def collect_args(self, options: CompletionOptions):
- options.stop = None
- args = super().collect_args(options)
-
- if "max_tokens" in args:
- args["max_new_tokens"] = args["max_tokens"]
- del args["max_tokens"]
- if "stop" in args:
- args["stop_sequences"] = args["stop"]
- del args["stop"]
-
- return args
-
- async def _stream_complete(self, prompt, options):
- args = self.collect_args(options)
-
- client = InferenceClient(self.endpoint_url, token=self.hf_token)
-
- stream = client.text_generation(prompt, stream=True, details=True, **args)
-
- for r in stream:
- # skip special tokens
- if r.token.special:
- continue
- # stop if we encounter a stop sequence
- if options.stop is not None:
- if r.token.text in options.stop:
- break
- yield r.token.text
diff --git a/server/continuedev/libs/llm/hf_tgi.py b/server/continuedev/libs/llm/hf_tgi.py
deleted file mode 100644
index 62458db4..00000000
--- a/server/continuedev/libs/llm/hf_tgi.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import json
-from typing import Any, Callable, List
-
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from .base import LLM, CompletionOptions
-from .prompts.chat import llama2_template_messages
-from .prompts.edit import simplified_edit_prompt
-
-
-class HuggingFaceTGI(LLM):
- model: str = "huggingface-tgi"
- server_url: str = Field(
- "http://localhost:8080", description="URL of your TGI server"
- )
-
- template_messages: Callable[[List[ChatMessage]], str] = llama2_template_messages
-
- prompt_templates = {
- "edit": simplified_edit_prompt,
- }
-
- class Config:
- arbitrary_types_allowed = True
-
- def collect_args(self, options: CompletionOptions) -> Any:
- args = super().collect_args(options)
- args = {**args, "max_new_tokens": args.get("max_tokens", 1024), "best_of": 1}
- args.pop("max_tokens", None)
- args.pop("model", None)
- args.pop("functions", None)
- return args
-
- async def _stream_complete(self, prompt, options):
- args = self.collect_args(options)
-
- async with self.create_client_session() as client_session:
- async with client_session.post(
- f"{self.server_url}/generate_stream",
- json={"inputs": prompt, "parameters": args},
- headers={"Content-Type": "application/json"},
- proxy=self.proxy,
- ) as resp:
- async for line in resp.content.iter_any():
- if line:
- text = line.decode("utf-8")
- chunks = text.split("\n")
-
- for chunk in chunks:
- if chunk.startswith("data: "):
- chunk = chunk[len("data: ") :]
- elif chunk.startswith("data:"):
- chunk = chunk[len("data:") :]
-
- if chunk.strip() == "":
- continue
-
- try:
- json_chunk = json.loads(chunk)
- except Exception as e:
- print(f"Error parsing JSON: {e}")
- continue
-
- yield json_chunk["token"]["text"]
diff --git a/server/continuedev/libs/llm/hugging_face.py b/server/continuedev/libs/llm/hugging_face.py
deleted file mode 100644
index c2e934c0..00000000
--- a/server/continuedev/libs/llm/hugging_face.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# TODO: This class is far out of date
-
-from transformers import AutoModelForCausalLM, AutoTokenizer
-
-from .llm import LLM
-
-
-class HuggingFace(LLM):
- def __init__(self, model_path: str = "Salesforce/codegen-2B-mono"):
- self.model_path = model_path
- self.tokenizer = AutoTokenizer.from_pretrained(model_path)
- self.model = AutoModelForCausalLM.from_pretrained(model_path)
-
- def complete(self, prompt: str, **kwargs):
- args = {"max_tokens": 100}
- args.update(kwargs)
- input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
- generated_ids = self.model.generate(input_ids, max_length=args["max_tokens"])
- return self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
diff --git a/server/continuedev/libs/llm/openai.py b/server/continuedev/libs/llm/openai.py
deleted file mode 100644
index ba29279b..00000000
--- a/server/continuedev/libs/llm/openai.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from typing import Callable, List, Literal, Optional
-
-import certifi
-import openai
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from .base import LLM
-
-CHAT_MODELS = {
- "gpt-3.5-turbo",
- "gpt-3.5-turbo-16k",
- "gpt-4",
- "gpt-3.5-turbo-0613",
- "gpt-4-32k",
-}
-MAX_TOKENS_FOR_MODEL = {
- "gpt-3.5-turbo": 4096,
- "gpt-3.5-turbo-0613": 4096,
- "gpt-3.5-turbo-16k": 16_384,
- "gpt-4": 8192,
- "gpt-35-turbo-16k": 16_384,
- "gpt-35-turbo-0613": 4096,
- "gpt-35-turbo": 4096,
- "gpt-4-32k": 32_768,
-}
-
-
-class OpenAI(LLM):
- """
- The OpenAI class can be used to access OpenAI models like gpt-4 and gpt-3.5-turbo.
-
- If you are locally serving a model that uses an OpenAI-compatible server, you can simply change the `api_base` in the `OpenAI` class like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.libs.llm.openai import OpenAI
-
- config = ContinueConfig(
- ...
- models=Models(
- default=OpenAI(
- api_key="EMPTY",
- model="<MODEL_NAME>",
- api_base="http://localhost:8000", # change to your server
- )
- )
- )
- ```
-
- Options for serving models locally with an OpenAI-compatible server include:
-
- - [text-gen-webui](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai#setup--installation)
- - [FastChat](https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md)
- - [LocalAI](https://localai.io/basics/getting_started/)
- - [llama-cpp-python](https://github.com/abetlen/llama-cpp-python#web-server)
- """
-
- api_key: str = Field(
- ...,
- description="OpenAI API key",
- )
-
- proxy: Optional[str] = Field(None, description="Proxy URL to use for requests.")
-
- api_base: Optional[str] = Field(None, description="OpenAI API base URL.")
-
- api_type: Optional[Literal["azure", "openai"]] = Field(
- None, description="OpenAI API type."
- )
-
- api_version: Optional[str] = Field(
- None, description="OpenAI API version. For use with Azure OpenAI Service."
- )
-
- engine: Optional[str] = Field(
- None, description="OpenAI engine. For use with Azure OpenAI Service."
- )
-
- async def start(
- self, unique_id: Optional[str] = None, write_log: Callable[[str], None] = None
- ):
- await super().start(write_log=write_log, unique_id=unique_id)
-
- if self.context_length is None:
- self.context_length = MAX_TOKENS_FOR_MODEL.get(self.model, 4096)
-
- openai.api_key = self.api_key
- if self.api_type is not None:
- openai.api_type = self.api_type
- if self.api_base is not None:
- openai.api_base = self.api_base
- if self.api_version is not None:
- openai.api_version = self.api_version
-
- if self.verify_ssl is not None and self.verify_ssl is False:
- openai.verify_ssl_certs = False
-
- if self.proxy is not None:
- openai.proxy = self.proxy
-
- openai.ca_bundle_path = self.ca_bundle_path or certifi.where()
-
- def collect_args(self, options):
- args = super().collect_args(options)
- if self.engine is not None:
- args["engine"] = self.engine
-
- if not args["model"].endswith("0613") and "functions" in args:
- del args["functions"]
-
- return args
-
- async def _stream_complete(self, prompt, options):
- args = self.collect_args(options)
- args["stream"] = True
-
- if args["model"] in CHAT_MODELS:
- async for chunk in await openai.ChatCompletion.acreate(
- messages=[{"role": "user", "content": prompt}],
- **args,
- headers=self.headers,
- ):
- if len(chunk.choices) > 0 and "content" in chunk.choices[0].delta:
- yield chunk.choices[0].delta.content
- else:
- async for chunk in await openai.Completion.acreate(prompt=prompt, **args, headers=self.headers):
- if len(chunk.choices) > 0:
- yield chunk.choices[0].text
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- args = self.collect_args(options)
-
- async for chunk in await openai.ChatCompletion.acreate(
- messages=messages,
- stream=True,
- **args,
- headers=self.headers,
- ):
- if not hasattr(chunk, "choices") or len(chunk.choices) == 0:
- continue
- yield chunk.choices[0].delta
-
- async def _complete(self, prompt: str, options):
- args = self.collect_args(options)
-
- if args["model"] in CHAT_MODELS:
- resp = await openai.ChatCompletion.acreate(
- messages=[{"role": "user", "content": prompt}],
- **args,
- headers=self.headers,
- )
- return resp.choices[0].message.content
- else:
- return (
- (await openai.Completion.acreate(prompt=prompt, **args, headers=self.headers)).choices[0].text
- )
diff --git a/server/continuedev/libs/llm/openai_free_trial.py b/server/continuedev/libs/llm/openai_free_trial.py
deleted file mode 100644
index b6e707f9..00000000
--- a/server/continuedev/libs/llm/openai_free_trial.py
+++ /dev/null
@@ -1,83 +0,0 @@
-from typing import Callable, List, Optional
-
-from ...core.main import ChatMessage
-from .base import LLM
-from .openai import OpenAI
-from .proxy_server import ProxyServer
-
-
-class OpenAIFreeTrial(LLM):
- """
- With the `OpenAIFreeTrial` `LLM`, new users can try out Continue with GPT-4 using a proxy server that securely makes calls to OpenAI using our API key. Continue should just work the first time you install the extension in VS Code.
-
- Once you are using Continue regularly though, you will need to add an OpenAI API key that has access to GPT-4 by following these steps:
-
- 1. Copy your API key from https://platform.openai.com/account/api-keys
- 2. Open `~/.continue/config.py`. You can do this by using the '/config' command in Continue
- 3. Change the default LLMs to look like this:
-
- ```python title="~/.continue/config.py"
- API_KEY = "<API_KEY>"
- config = ContinueConfig(
- ...
- models=Models(
- default=OpenAIFreeTrial(model="gpt-4", api_key=API_KEY),
- summarize=OpenAIFreeTrial(model="gpt-3.5-turbo", api_key=API_KEY)
- )
- )
- ```
-
- The `OpenAIFreeTrial` class will automatically switch to using your API key instead of ours. If you'd like to explicitly use one or the other, you can use the `ProxyServer` or `OpenAI` classes instead.
-
- These classes support any models available through the OpenAI API, assuming your API key has access, including "gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k", and "gpt-4-32k".
- """
-
- api_key: Optional[str] = None
-
- llm: Optional[LLM] = None
-
- def update_llm_properties(self):
- if self.llm is not None:
- self.llm.system_message = self.system_message
-
- async def start(
- self, write_log: Callable[[str], None] = None, unique_id: Optional[str] = None
- ):
- await super().start(write_log=write_log, unique_id=unique_id)
- if self.api_key is None or self.api_key.strip() == "":
- self.llm = ProxyServer(
- model=self.model,
- verify_ssl=self.verify_ssl,
- ca_bundle_path=self.ca_bundle_path,
- )
- else:
- self.llm = OpenAI(
- api_key=self.api_key,
- model=self.model,
- verify_ssl=self.verify_ssl,
- ca_bundle_path=self.ca_bundle_path,
- )
-
- await self.llm.start(write_log=write_log, unique_id=unique_id)
-
- async def stop(self):
- await self.llm.stop()
-
- async def _complete(self, prompt: str, options):
- self.update_llm_properties()
- return await self.llm._complete(prompt, options)
-
- async def _stream_complete(self, prompt, options):
- self.update_llm_properties()
- resp = self.llm._stream_complete(prompt, options)
- async for item in resp:
- yield item
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- self.update_llm_properties()
- resp = self.llm._stream_chat(messages=messages, options=options)
- async for item in resp:
- yield item
-
- def count_tokens(self, text: str):
- return self.llm.count_tokens(text)
diff --git a/server/continuedev/libs/llm/replicate.py b/server/continuedev/libs/llm/replicate.py
deleted file mode 100644
index 3423193b..00000000
--- a/server/continuedev/libs/llm/replicate.py
+++ /dev/null
@@ -1,78 +0,0 @@
-import concurrent.futures
-from typing import List
-
-import replicate
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from .base import LLM
-from .prompts.edit import simplified_edit_prompt
-
-
-class ReplicateLLM(LLM):
- """
- Replicate is a great option for newly released language models or models that you've deployed through their platform. Sign up for an account [here](https://replicate.ai/), copy your API key, and then select any model from the [Replicate Streaming List](https://replicate.com/collections/streaming-language-models). Change `~/.continue/config.py` to look like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.core.models import Models
- from continuedev.libs.llm.replicate import ReplicateLLM
-
- config = ContinueConfig(
- ...
- models=Models(
- default=ReplicateLLM(
- model="replicate/codellama-13b-instruct:da5676342de1a5a335b848383af297f592b816b950a43d251a0a9edd0113604b",
- api_key="my-replicate-api-key")
- )
- )
- ```
-
- If you don't specify the `model` parameter, it will default to `replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781`.
- """
-
- api_key: str = Field(..., description="Replicate API key")
-
- model: str = "replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781"
-
- _client: replicate.Client = None
-
- prompt_templates = {
- "edit": simplified_edit_prompt,
- }
-
- async def start(self, **kwargs):
- await super().start(**kwargs)
- self._client = replicate.Client(api_token=self.api_key)
-
- async def _complete(self, prompt: str, options):
- def helper():
- output = self._client.run(
- self.model, input={"message": prompt, "prompt": prompt}
- )
- completion = ""
- for item in output:
- completion += item
-
- return completion
-
- with concurrent.futures.ThreadPoolExecutor() as executor:
- future = executor.submit(helper)
- completion = future.result()
-
- return completion
-
- async def _stream_complete(self, prompt, options):
- for item in self._client.run(
- self.model, input={"message": prompt, "prompt": prompt}
- ):
- yield item
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- for item in self._client.run(
- self.model,
- input={
- "message": messages[-1]["content"],
- "prompt": messages[-1]["content"],
- },
- ):
- yield {"content": item, "role": "assistant"}
diff --git a/server/continuedev/libs/llm/text_gen_interface.py b/server/continuedev/libs/llm/text_gen_interface.py
deleted file mode 100644
index 225fd3b6..00000000
--- a/server/continuedev/libs/llm/text_gen_interface.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import json
-from typing import Any, Callable, Dict, List, Union
-
-import websockets
-from pydantic import Field
-
-from ...core.main import ChatMessage
-from .base import LLM
-from .prompts.chat import llama2_template_messages
-from .prompts.edit import simplest_edit_prompt
-
-
-class TextGenUI(LLM):
- """
- TextGenUI is a comprehensive, open-source language model UI and local server. You can set it up with an OpenAI-compatible server plugin, but if for some reason that doesn't work, you can use this class like so:
-
- ```python title="~/.continue/config.py"
- from continuedev.libs.llm.text_gen_interface import TextGenUI
-
- config = ContinueConfig(
- ...
- models=Models(
- default=TextGenUI(
- model="<MODEL_NAME>",
- )
- )
- )
- ```
- """
-
- model: str = "text-gen-ui"
- server_url: str = Field(
- "http://localhost:5000", description="URL of your TextGenUI server"
- )
- streaming_url: str = Field(
- "http://localhost:5005",
- description="URL of your TextGenUI streaming server (separate from main server URL)",
- )
-
- prompt_templates = {
- "edit": simplest_edit_prompt,
- }
-
- template_messages: Union[
- Callable[[List[Dict[str, str]]], str], None
- ] = llama2_template_messages
-
- class Config:
- arbitrary_types_allowed = True
-
- def collect_args(self, options) -> Any:
- args = super().collect_args(options)
- args = {**args, "max_new_tokens": options.max_tokens}
- args.pop("max_tokens", None)
- return args
-
- async def _stream_complete(self, prompt, options):
- args = self.collect_args(options)
-
- ws_url = f"{self.streaming_url.replace('http://', 'ws://').replace('https://', 'wss://')}"
- payload = json.dumps({"prompt": prompt, "stream": True, **args})
- async with websockets.connect(
- f"{ws_url}/api/v1/stream", ping_interval=None
- ) as websocket:
- await websocket.send(payload)
-
- while True:
- incoming_data = await websocket.recv()
- incoming_data = json.loads(incoming_data)
-
- match incoming_data["event"]:
- case "text_stream":
- yield incoming_data["text"]
- case "stream_end":
- break
-
- async def _stream_chat(self, messages: List[ChatMessage], options):
- args = self.collect_args(options)
-
- async def generator():
- ws_url = f"{self.streaming_url.replace('http://', 'ws://').replace('https://', 'wss://')}"
- history = list(map(lambda x: x["content"], messages))
- payload = json.dumps(
- {
- "user_input": messages[-1]["content"],
- "history": {"internal": [history], "visible": [history]},
- "stream": True,
- **args,
- }
- )
- async with websockets.connect(
- f"{ws_url}/api/v1/chat-stream", ping_interval=None
- ) as websocket:
- await websocket.send(payload)
-
- prev = ""
- while True:
- incoming_data = await websocket.recv()
- incoming_data = json.loads(incoming_data)
-
- match incoming_data["event"]:
- case "text_stream":
- visible = incoming_data["history"]["visible"][-1]
- if len(visible) > 0:
- yield {
- "role": "assistant",
- "content": visible[-1].replace(prev, ""),
- }
- prev = visible[-1]
- case "stream_end":
- break
-
- async for chunk in generator():
- yield chunk
diff --git a/server/continuedev/libs/llm/together.py b/server/continuedev/libs/llm/together.py
deleted file mode 100644
index 35b3a424..00000000
--- a/server/continuedev/libs/llm/together.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import json
-from typing import Callable
-
-import aiohttp
-from pydantic import Field
-
-from ...core.main import ContinueCustomException
-from ..util.logging import logger
-from .base import LLM
-from .prompts.chat import llama2_template_messages
-from .prompts.edit import simplified_edit_prompt
-
-
-class TogetherLLM(LLM):
- """
- The Together API is a cloud platform for running large AI models. You can sign up [here](https://api.together.xyz/signup), copy your API key on the initial welcome screen, and then hit the play button on any model from the [Together Models list](https://docs.together.ai/docs/models-inference). Change `~/.continue/config.py` to look like this:
-
- ```python title="~/.continue/config.py"
- from continuedev.core.models import Models
- from continuedev.libs.llm.together import TogetherLLM
-
- config = ContinueConfig(
- ...
- models=Models(
- default=TogetherLLM(
- api_key="<API_KEY>",
- model="togethercomputer/llama-2-13b-chat"
- )
- )
- )
- ```
- """
-
- api_key: str = Field(..., description="Together API key")
-
- model: str = "togethercomputer/RedPajama-INCITE-7B-Instruct"
- base_url: str = Field(
- "https://api.together.xyz",
- description="The base URL for your Together API instance",
- )
-
- _client_session: aiohttp.ClientSession = None
-
- template_messages: Callable = llama2_template_messages
-
- prompt_templates = {
- "edit": simplified_edit_prompt,
- }
-
- async def start(self, **kwargs):
- await super().start(**kwargs)
- self._client_session = aiohttp.ClientSession(
- connector=aiohttp.TCPConnector(verify_ssl=self.verify_ssl),
- timeout=aiohttp.ClientTimeout(total=self.timeout),
- )
-
- async def stop(self):
- await self._client_session.close()
-
- async def _stream_complete(self, prompt, options):
- args = self.collect_args(options)
-
- async with self._client_session.post(
- f"{self.base_url}/inference",
- json={
- "prompt": prompt,
- "stream_tokens": True,
- **args,
- },
- headers={"Authorization": f"Bearer {self.api_key}"},
- proxy=self.proxy,
- ) as resp:
- async for line in resp.content.iter_chunks():
- if line[1]:
- json_chunk = line[0].decode("utf-8")
- if json_chunk.startswith(": ping - ") or json_chunk.startswith(
- "data: [DONE]"
- ):
- continue
-
- chunks = json_chunk.split("\n")
- for chunk in chunks:
- if chunk.strip() != "":
- if chunk.startswith("data: "):
- chunk = chunk[6:]
- if chunk == "[DONE]":
- break
- try:
- json_chunk = json.loads(chunk)
- except Exception as e:
- logger.warning(f"Invalid JSON chunk: {chunk}\n\n{e}")
- continue
- if "choices" in json_chunk:
- yield json_chunk["choices"][0]["text"]
-
- async def _complete(self, prompt: str, options):
- args = self.collect_args(options)
-
- async with self._client_session.post(
- f"{self.base_url}/inference",
- json={"prompt": prompt, **args},
- headers={"Authorization": f"Bearer {self.api_key}"},
- proxy=self.proxy,
- ) as resp:
- text = await resp.text()
- j = json.loads(text)
- try:
- if "choices" not in j["output"]:
- raise Exception(text)
- if "output" in j:
- return j["output"]["choices"][0]["text"]
- except Exception as e:
- j = await resp.json()
- if "error" in j:
- if j["error"].startswith("invalid hexlify value"):
- raise ContinueCustomException(
- message=f"Invalid Together API key:\n\n{j['error']}",
- title="Together API Error",
- )
- else:
- raise ContinueCustomException(
- message=j["error"], title="Together API Error"
- )
-
- raise e
diff --git a/server/continuedev/models/reference/generate.py b/server/continuedev/models/reference/generate.py
index b17df3b2..43e88750 100644
--- a/server/continuedev/models/reference/generate.py
+++ b/server/continuedev/models/reference/generate.py
@@ -4,19 +4,9 @@ import json
from textwrap import dedent
LLM_MODULES = [
- ("openai", "OpenAI"),
- ("anthropic", "AnthropicLLM"),
- ("ggml", "GGML"),
("llamacpp", "LlamaCpp"),
- ("text_gen_interface", "TextGenUI"),
("ollama", "Ollama"),
- ("replicate", "ReplicateLLM"),
- ("together", "TogetherLLM"),
- ("hf_inference_api", "HuggingFaceInferenceAPI"),
- ("hf_tgi", "HuggingFaceTGI"),
- ("openai_free_trial", "OpenAIFreeTrial"),
- ("google_palm_api", "GooglePaLMAPI"),
- ("queued", "QueuedLLM"),
+ ("queued", "QueuedLLM")
]
CONTEXT_PROVIDER_MODULES = [
diff --git a/server/continuedev/plugins/steps/chat.py b/server/continuedev/plugins/steps/chat.py
index 1b0f76f9..919d939e 100644
--- a/server/continuedev/plugins/steps/chat.py
+++ b/server/continuedev/plugins/steps/chat.py
@@ -4,26 +4,17 @@ import os
from textwrap import dedent
from typing import Any, Coroutine, List
-import openai
from directory_tree import display_tree
from dotenv import load_dotenv
from pydantic import Field
-from ...core.main import ChatMessage, FunctionCall, Models, Step, step_to_json_schema
+from ...core.main import ChatMessage, Models, Step, step_to_json_schema
from ...core.sdk import ContinueSDK
-from ...core.steps import MessageStep
-from ...libs.llm.openai import OpenAI
-from ...libs.llm.openai_free_trial import OpenAIFreeTrial
from ...libs.util.devdata import dev_data_logger
from ...libs.util.strings import remove_quotes_and_escapes
from ...libs.util.telemetry import posthog_logger
-from .main import EditHighlightedCodeStep
load_dotenv()
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-openai.api_key = OPENAI_API_KEY
-
-FREE_USAGE_STEP_NAME = "Please enter OpenAI API key"
def add_ellipsis(text: str, max_length: int = 200) -> str:
@@ -40,48 +31,6 @@ class SimpleChatStep(Step):
async def run(self, sdk: ContinueSDK):
# Check if proxy server API key
- if (
- isinstance(sdk.models.default, OpenAIFreeTrial)
- and (
- sdk.models.default.api_key is None
- or sdk.models.default.api_key.strip() == ""
- )
- and len(list(filter(lambda x: not x.step.hide, sdk.history.timeline))) >= 10
- and len(
- list(
- filter(
- lambda x: x.step.name == FREE_USAGE_STEP_NAME,
- sdk.history.timeline,
- )
- )
- )
- == 0
- ):
- await sdk.run_step(
- MessageStep(
- name=FREE_USAGE_STEP_NAME,
- message=dedent(
- """\
- To make it easier to use Continue, you're getting limited free usage. When you have the chance, please enter your own OpenAI key in `~/.continue/config.py`. You can open the file by using the '/config' slash command in the text box below.
-
- Here's an example of how to edit the file:
- ```python
- ...
- config=ContinueConfig(
- ...
- models=Models(
- default=OpenAIFreeTrial(api_key="<API_KEY>", model="gpt-4"),
- summarize=OpenAIFreeTrial(api_key="<API_KEY>", model="gpt-3.5-turbo")
- )
- )
- ```
-
- You can also learn more about customizations [here](https://continue.dev/docs/customization).
- """
- ),
- )
- )
-
messages = self.messages or await sdk.get_chat_context()
generator = sdk.models.chat.stream_chat(
@@ -232,148 +181,3 @@ class EditFileStep(Step):
async def run(self, sdk: ContinueSDK):
await sdk.edit_file(self.filename, self.instructions)
-
-class ChatWithFunctions(Step):
- user_input: str
- functions: List[Step] = [
- AddFileStep(filename="", file_contents=""),
- EditFileStep(filename="", instructions=""),
- EditHighlightedCodeStep(user_input=""),
- ViewDirectoryTreeStep(),
- AddDirectoryStep(directory_name=""),
- DeleteFileStep(filename=""),
- RunTerminalCommandStep(command=""),
- ]
- name: str = "Input"
- manage_own_chat_context: bool = True
- description: str = ""
- hide: bool = True
-
- async def run(self, sdk: ContinueSDK):
- await sdk.update_ui()
-
- step_name_step_class_map = {
- step.name.replace(" ", ""): step.__class__ for step in self.functions
- }
-
- functions = [step_to_json_schema(function) for function in self.functions]
-
- self.chat_context.append(
- ChatMessage(role="user", content=self.user_input, summary=self.user_input)
- )
-
- last_function_called_name = None
- last_function_called_params = None
- while True:
- was_function_called = False
- func_args = ""
- func_name = ""
- msg_content = ""
- msg_step = None
-
- gpt350613 = OpenAI(model="gpt-3.5-turbo-0613")
- await sdk.start_model(gpt350613)
-
- async for msg_chunk in gpt350613.stream_chat(
- await sdk.get_chat_context(), functions=functions
- ):
- if sdk.current_step_was_deleted():
- return
-
- if "content" in msg_chunk and msg_chunk["content"] is not None:
- msg_content += msg_chunk["content"]
- # if last_function_called_index_in_history is not None:
- # while sdk.history.timeline[last_function_called_index].step.hide:
- # last_function_called_index += 1
- # sdk.history.timeline[last_function_called_index_in_history].step.description = msg_content
- if msg_step is None:
- msg_step = MessageStep(
- name="Chat", message=msg_chunk["content"]
- )
- await sdk.run_step(msg_step)
- else:
- msg_step.description = msg_content
- await sdk.update_ui()
- elif "function_call" in msg_chunk or func_name != "":
- was_function_called = True
- if "function_call" in msg_chunk:
- if "arguments" in msg_chunk["function_call"]:
- func_args += msg_chunk["function_call"]["arguments"]
- if "name" in msg_chunk["function_call"]:
- func_name += msg_chunk["function_call"]["name"]
-
- if not was_function_called:
- self.chat_context.append(
- ChatMessage(
- role="assistant", content=msg_content, summary=msg_content
- )
- )
- break
- else:
- if func_name == "python" and "python" not in step_name_step_class_map:
- # GPT must be fine-tuned to believe this exists, but it doesn't always
- func_name = "EditHighlightedCodeStep"
- func_args = json.dumps({"user_input": self.user_input})
- # self.chat_context.append(ChatMessage(
- # role="assistant",
- # content=None,
- # function_call=FunctionCall(
- # name=func_name,
- # arguments=func_args
- # ),
- # summary=f"Called function {func_name}"
- # ))
- # self.chat_context.append(ChatMessage(
- # role="user",
- # content="The 'python' function does not exist. Don't call it. Try again to call another function.",
- # summary="'python' function does not exist."
- # ))
- # msg_step.hide = True
- # continue
- # Call the function, then continue to chat
- func_args = "{}" if func_args == "" else func_args
- try:
- fn_call_params = json.loads(func_args)
- except json.JSONDecodeError:
- raise Exception("The model returned invalid JSON. Please try again")
- self.chat_context.append(
- ChatMessage(
- role="assistant",
- content=None,
- function_call=FunctionCall(name=func_name, arguments=func_args),
- summary=f"Called function {func_name}",
- )
- )
- sdk.history.current_index + 1
- if func_name not in step_name_step_class_map:
- raise Exception(
- f"The model tried to call a function ({func_name}) that does not exist. Please try again."
- )
-
- # if func_name == "AddFileStep":
- # step_to_run.hide = True
- # self.description += f"\nAdded file `{func_args['filename']}`"
- # elif func_name == "AddDirectoryStep":
- # step_to_run.hide = True
- # self.description += f"\nAdded directory `{func_args['directory_name']}`"
- # else:
- # self.description += f"\n`Running function {func_name}`\n\n"
- if func_name == "EditHighlightedCodeStep":
- fn_call_params["user_input"] = self.user_input
- elif func_name == "EditFile":
- fn_call_params["instructions"] = self.user_input
-
- step_to_run = step_name_step_class_map[func_name](**fn_call_params)
- if (
- last_function_called_name is not None
- and last_function_called_name == func_name
- and last_function_called_params is not None
- and last_function_called_params == fn_call_params
- ):
- # If it's calling the same function more than once in a row, it's probably looping and confused
- return
- last_function_called_name = func_name
- last_function_called_params = fn_call_params
-
- await sdk.run_step(step_to_run)
- await sdk.update_ui()
diff --git a/server/continuedev/plugins/steps/setup_model.py b/server/continuedev/plugins/steps/setup_model.py
index 87e52f1b..e7249594 100644
--- a/server/continuedev/plugins/steps/setup_model.py
+++ b/server/continuedev/plugins/steps/setup_model.py
@@ -5,16 +5,8 @@ from ...models.filesystem import RangeInFile
from ...models.main import Range
MODEL_CLASS_TO_MESSAGE = {
- "OpenAI": "Obtain your OpenAI API key from [here](https://platform.openai.com/account/api-keys) and paste it into the `api_key` field at config.models.default.api_key in `config.py`. Then reload the VS Code window for changes to take effect.",
- "OpenAIFreeTrial": "To get started with OpenAI models, obtain your OpenAI API key from [here](https://platform.openai.com/account/api-keys) and paste it into the `api_key` field at config.models.default.api_key in `config.py`. Then reload the VS Code window for changes to take effect.",
- "AnthropicLLM": "To get started with Anthropic, you first need to sign up for the beta [here](https://claude.ai/login) to obtain an API key. Once you have the key, paste it into the `api_key` field at config.models.default.api_key in `config.py`. Then reload the VS Code window for changes to take effect.",
- "ReplicateLLM": "To get started with Replicate, sign up to obtain an API key [here](https://replicate.ai/), then paste it into the `api_key` field at config.models.default.api_key in `config.py`.",
"Ollama": "To get started with Ollama, download the app from [ollama.ai](https://ollama.ai/). Once it is downloaded, be sure to pull at least one model and use its name in the model field in config.py (e.g. `model='codellama'`).",
- "GGML": "GGML models can be run locally using the `llama-cpp-python` library. To learn how to set up a local llama-cpp-python server, read [here](https://github.com/continuedev/ggml-server-example). Once it is started on port 8000, you're all set!",
- "TogetherLLM": "To get started using models from Together, first obtain your Together API key from [here](https://together.ai). Paste it into the `api_key` field at config.models.default.api_key in `config.py`. Then, on their models page, press 'start' on the model of your choice and make sure the `model=` parameter in the config file for the `TogetherLLM` class reflects the name of this model. Finally, reload the VS Code window for changes to take effect.",
- "LlamaCpp": "To get started with this model, clone the [`llama.cpp` repo](https://github.com/ggerganov/llama.cpp) and follow the instructions to set up the server [here](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#build). Any of the parameters described in the README can be passed to the `llama_cpp_args` field in the `LlamaCpp` class in `config.py`.",
- "HuggingFaceInferenceAPI": "To get started with the HuggingFace Inference API, first deploy a model and obtain your API key from [here](https://huggingface.co/inference-api). Paste it into the `hf_token` field at config.models.default.hf_token in `config.py`. Finally, reload the VS Code window for changes to take effect.",
- "GooglePaLMAPI": "To get started with the Google PaLM API, create an API key in Makersuite [here](https://makersuite.google.com/u/2/app/apikey), then paste it into the `api_key` field at config.models.default.api_key in `config.py`.",
+ "LlamaCpp": "To get started with this model, clone the [`llama.cpp` repo](https://github.com/ggerganov/llama.cpp) and follow the instructions to set up the server [here](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md#build). Any of the parameters described in the README can be passed to the `llama_cpp_args` field in the `LlamaCpp` class in `config.py`."
}
diff --git a/server/tests/util/config.py b/server/tests/util/config.py
index 370933a0..82811bc1 100644
--- a/server/tests/util/config.py
+++ b/server/tests/util/config.py
@@ -1,15 +1,18 @@
from continuedev.core.config import ContinueConfig
from continuedev.core.models import Models
-from continuedev.libs.llm.openai_free_trial import OpenAIFreeTrial
+from continuedev.libs.llm import Ollama
config = ContinueConfig(
allow_anonymous_telemetry=False,
models=Models(
- default=OpenAIFreeTrial(api_key="", model="gpt-4"),
- summarize=OpenAIFreeTrial(
- api_key="",
- model="gpt-3.5-turbo",
- ),
+ default=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ ),
+ summarize=Ollama(
+ title="CodeLlama-7b-Instruct",
+ model="codellama:7b-instruct"
+ )
),
system_message=None,
temperature=0.5,