From a16ba7a0166dbf9062ee4616e3ccfbff377e9f4b Mon Sep 17 00:00:00 2001
From: Nate Sesti <sestinj@gmail.com>
Date: Fri, 15 Sep 2023 23:22:37 -0700
Subject: feat: :sparkles: add stop_tokens option to LLM

---
 continuedev/src/continuedev/libs/llm/__init__.py | 10 +++++++---
 docs/docs/reference/Models/anthropic.md          |  1 +
 docs/docs/reference/Models/ggml.md               |  1 +
 docs/docs/reference/Models/hf_inference_api.md   |  1 +
 docs/docs/reference/Models/hf_tgi.md             |  1 +
 docs/docs/reference/Models/llamacpp.md           |  1 +
 docs/docs/reference/Models/maybe_proxy_openai.md |  1 +
 docs/docs/reference/Models/ollama.md             |  1 +
 docs/docs/reference/Models/openai.md             |  1 +
 docs/docs/reference/Models/queued.md             |  1 +
 docs/docs/reference/Models/replicate.md          |  1 +
 docs/docs/reference/Models/text_gen_interface.md |  1 +
 docs/docs/reference/Models/together.md           |  1 +
 docs/docs/reference/config.md                    |  2 +-
 14 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/continuedev/src/continuedev/libs/llm/__init__.py b/continuedev/src/continuedev/libs/llm/__init__.py
index baeb9d1a..b2eecab6 100644
--- a/continuedev/src/continuedev/libs/llm/__init__.py
+++ b/continuedev/src/continuedev/libs/llm/__init__.py
@@ -68,6 +68,10 @@ class LLM(ContinueBaseModel):
         ..., description="The name of the model to be used (e.g. gpt-4, codellama)"
     )
 
+    stop_tokens: Optional[List[str]] = Field(
+        None, description="Tokens that will stop the completion."
+    )
+
     timeout: Optional[int] = Field(
         300,
         description="Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.",
@@ -204,7 +208,7 @@ class LLM(ContinueBaseModel):
             top_k=top_k,
             presence_penalty=presence_penalty,
             frequency_penalty=frequency_penalty,
-            stop=stop,
+            stop=stop or self.stop_tokens,
             max_tokens=max_tokens,
             functions=functions,
         )
@@ -251,7 +255,7 @@ class LLM(ContinueBaseModel):
             top_k=top_k,
             presence_penalty=presence_penalty,
             frequency_penalty=frequency_penalty,
-            stop=stop,
+            stop=stop or self.stop_tokens,
             max_tokens=max_tokens,
             functions=functions,
         )
@@ -296,7 +300,7 @@ class LLM(ContinueBaseModel):
             top_k=top_k,
             presence_penalty=presence_penalty,
             frequency_penalty=frequency_penalty,
-            stop=stop,
+            stop=stop or self.stop_tokens,
             max_tokens=max_tokens,
             functions=functions,
         )
diff --git a/docs/docs/reference/Models/anthropic.md b/docs/docs/reference/Models/anthropic.md
index 3f405d39..e2c6f683 100644
--- a/docs/docs/reference/Models/anthropic.md
+++ b/docs/docs/reference/Models/anthropic.md
@@ -31,6 +31,7 @@ Claude 2 is not yet publicly released. You can request early access [here](https
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;claude-2&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="claude-2"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/ggml.md b/docs/docs/reference/Models/ggml.md
index 3369df6f..d02f6d05 100644
--- a/docs/docs/reference/Models/ggml.md
+++ b/docs/docs/reference/Models/ggml.md
@@ -34,6 +34,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to use (optional for the GGML class)&quot;, &quot;default&quot;: &quot;ggml&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="ggml"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/hf_inference_api.md b/docs/docs/reference/Models/hf_inference_api.md
index 6e5aeeca..e7857b21 100644
--- a/docs/docs/reference/Models/hf_inference_api.md
+++ b/docs/docs/reference/Models/hf_inference_api.md
@@ -33,6 +33,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to use (optional for the HuggingFaceInferenceAPI class)&quot;, &quot;default&quot;: &quot;Hugging Face Inference API&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="Hugging Face Inference API"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/hf_tgi.md b/docs/docs/reference/Models/hf_tgi.md
index 3ea28730..ab3f4d61 100644
--- a/docs/docs/reference/Models/hf_tgi.md
+++ b/docs/docs/reference/Models/hf_tgi.md
@@ -18,6 +18,7 @@ import ClassPropertyRef from '@site/src/components/ClassPropertyRef.tsx';
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;huggingface-tgi&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="huggingface-tgi"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/llamacpp.md b/docs/docs/reference/Models/llamacpp.md
index c1c5e4f9..ae4b6e62 100644
--- a/docs/docs/reference/Models/llamacpp.md
+++ b/docs/docs/reference/Models/llamacpp.md
@@ -38,6 +38,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;llamacpp&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="llamacpp"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/maybe_proxy_openai.md b/docs/docs/reference/Models/maybe_proxy_openai.md
index 651cbdba..c080b54d 100644
--- a/docs/docs/reference/Models/maybe_proxy_openai.md
+++ b/docs/docs/reference/Models/maybe_proxy_openai.md
@@ -39,6 +39,7 @@ These classes support any models available through the OpenAI API, assuming your
 <ClassPropertyRef name='system_message' details='{&quot;title&quot;: &quot;System Message&quot;, &quot;description&quot;: &quot;A system message that will always be followed by the LLM&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/ollama.md b/docs/docs/reference/Models/ollama.md
index 9f92c850..f0370b45 100644
--- a/docs/docs/reference/Models/ollama.md
+++ b/docs/docs/reference/Models/ollama.md
@@ -29,6 +29,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;llama2&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="llama2"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/openai.md b/docs/docs/reference/Models/openai.md
index d9c440b7..f28e0598 100644
--- a/docs/docs/reference/Models/openai.md
+++ b/docs/docs/reference/Models/openai.md
@@ -47,6 +47,7 @@ Options for serving models locally with an OpenAI-compatible server include:
 <ClassPropertyRef name='system_message' details='{&quot;title&quot;: &quot;System Message&quot;, &quot;description&quot;: &quot;A system message that will always be followed by the LLM&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/queued.md b/docs/docs/reference/Models/queued.md
index 2499e03b..231aa4dc 100644
--- a/docs/docs/reference/Models/queued.md
+++ b/docs/docs/reference/Models/queued.md
@@ -31,6 +31,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;queued&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="queued"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/replicate.md b/docs/docs/reference/Models/replicate.md
index 041f4c2b..83bfd383 100644
--- a/docs/docs/reference/Models/replicate.md
+++ b/docs/docs/reference/Models/replicate.md
@@ -34,6 +34,7 @@ If you don't specify the `model` parameter, it will default to `replicate/llama-
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="replicate/llama-2-70b-chat:58d078176e02c219e11eb4da5a02a7830a283b14cf8f94537af893ccff5ee781"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/text_gen_interface.md b/docs/docs/reference/Models/text_gen_interface.md
index 7ca38b36..d910bee2 100644
--- a/docs/docs/reference/Models/text_gen_interface.md
+++ b/docs/docs/reference/Models/text_gen_interface.md
@@ -32,6 +32,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;text-gen-ui&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="text-gen-ui"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/Models/together.md b/docs/docs/reference/Models/together.md
index 1a13cbbc..6838ba36 100644
--- a/docs/docs/reference/Models/together.md
+++ b/docs/docs/reference/Models/together.md
@@ -34,6 +34,7 @@ config = ContinueConfig(
 <ClassPropertyRef name='context_length' details='{&quot;title&quot;: &quot;Context Length&quot;, &quot;description&quot;: &quot;The maximum context length of the LLM in tokens, as counted by count_tokens.&quot;, &quot;default&quot;: 2048, &quot;type&quot;: &quot;integer&quot;}' required={false} default="2048"/>
 <ClassPropertyRef name='unique_id' details='{&quot;title&quot;: &quot;Unique Id&quot;, &quot;description&quot;: &quot;The unique ID of the user.&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
 <ClassPropertyRef name='model' details='{&quot;title&quot;: &quot;Model&quot;, &quot;description&quot;: &quot;The name of the model to be used (e.g. gpt-4, codellama)&quot;, &quot;default&quot;: &quot;togethercomputer/RedPajama-INCITE-7B-Instruct&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default="togethercomputer/RedPajama-INCITE-7B-Instruct"/>
+<ClassPropertyRef name='stop_tokens' details='{&quot;title&quot;: &quot;Stop Tokens&quot;, &quot;description&quot;: &quot;Tokens that will stop the completion.&quot;, &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default=""/>
 <ClassPropertyRef name='timeout' details='{&quot;title&quot;: &quot;Timeout&quot;, &quot;description&quot;: &quot;Set the timeout for each request to the LLM. If you are running a local LLM that takes a while to respond, you might want to set this to avoid timeouts.&quot;, &quot;default&quot;: 300, &quot;type&quot;: &quot;integer&quot;}' required={false} default="300"/>
 <ClassPropertyRef name='verify_ssl' details='{&quot;title&quot;: &quot;Verify Ssl&quot;, &quot;description&quot;: &quot;Whether to verify SSL certificates for requests.&quot;, &quot;type&quot;: &quot;boolean&quot;}' required={false} default=""/>
 <ClassPropertyRef name='ca_bundle_path' details='{&quot;title&quot;: &quot;Ca Bundle Path&quot;, &quot;description&quot;: &quot;Path to a custom CA bundle to use when making the HTTP request&quot;, &quot;type&quot;: &quot;string&quot;}' required={false} default=""/>
diff --git a/docs/docs/reference/config.md b/docs/docs/reference/config.md
index a96dc2ac..f867ee1e 100644
--- a/docs/docs/reference/config.md
+++ b/docs/docs/reference/config.md
@@ -11,7 +11,7 @@ Continue can be deeply customized by editing the `ContinueConfig` object in `~/.
 <ClassPropertyRef name='steps_on_startup' details='{&quot;title&quot;: &quot;Steps On Startup&quot;, &quot;description&quot;: &quot;Steps that will be automatically run at the beginning of a new session&quot;, &quot;default&quot;: [], &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;$ref&quot;: &quot;#/definitions/Step&quot;}}' required={false} default="[]"/>
 <ClassPropertyRef name='disallowed_steps' details='{&quot;title&quot;: &quot;Disallowed Steps&quot;, &quot;description&quot;: &quot;Steps that are not allowed to be run, and will be skipped if attempted&quot;, &quot;default&quot;: [], &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;type&quot;: &quot;string&quot;}}' required={false} default="[]"/>
 <ClassPropertyRef name='allow_anonymous_telemetry' details='{&quot;title&quot;: &quot;Allow Anonymous Telemetry&quot;, &quot;description&quot;: &quot;If this field is set to True, we will collect anonymous telemetry as described in the documentation page on telemetry. If set to False, we will not collect any data.&quot;, &quot;default&quot;: true, &quot;type&quot;: &quot;boolean&quot;}' required={false} default="True"/>
-<ClassPropertyRef name='models' details='{&quot;title&quot;: &quot;Models&quot;, &quot;description&quot;: &quot;Configuration for the models used by Continue. Read more about how to configure models in the documentation.&quot;, &quot;default&quot;: {&quot;default&quot;: {&quot;title&quot;: null, &quot;system_message&quot;: null, &quot;context_length&quot;: 2048, &quot;model&quot;: &quot;gpt-4&quot;, &quot;timeout&quot;: 300, &quot;verify_ssl&quot;: null, &quot;ca_bundle_path&quot;: null, &quot;prompt_templates&quot;: {}, &quot;api_key&quot;: null, &quot;llm&quot;: null, &quot;class_name&quot;: &quot;MaybeProxyOpenAI&quot;}, &quot;small&quot;: null, &quot;medium&quot;: {&quot;title&quot;: null, &quot;system_message&quot;: null, &quot;context_length&quot;: 2048, &quot;model&quot;: &quot;gpt-3.5-turbo&quot;, &quot;timeout&quot;: 300, &quot;verify_ssl&quot;: null, &quot;ca_bundle_path&quot;: null, &quot;prompt_templates&quot;: {}, &quot;api_key&quot;: null, &quot;llm&quot;: null, &quot;class_name&quot;: &quot;MaybeProxyOpenAI&quot;}, &quot;large&quot;: null, &quot;edit&quot;: null, &quot;chat&quot;: null, &quot;unused&quot;: []}, &quot;allOf&quot;: [{&quot;$ref&quot;: &quot;#/definitions/Models&quot;}]}' required={false} default="{&#x27;default&#x27;: {&#x27;title&#x27;: None, &#x27;system_message&#x27;: None, &#x27;context_length&#x27;: 2048, &#x27;model&#x27;: &#x27;gpt-4&#x27;, &#x27;timeout&#x27;: 300, &#x27;verify_ssl&#x27;: None, &#x27;ca_bundle_path&#x27;: None, &#x27;prompt_templates&#x27;: {}, &#x27;api_key&#x27;: None, &#x27;llm&#x27;: None, &#x27;class_name&#x27;: &#x27;MaybeProxyOpenAI&#x27;}, &#x27;small&#x27;: None, &#x27;medium&#x27;: {&#x27;title&#x27;: None, &#x27;system_message&#x27;: None, &#x27;context_length&#x27;: 2048, &#x27;model&#x27;: &#x27;gpt-3.5-turbo&#x27;, &#x27;timeout&#x27;: 300, &#x27;verify_ssl&#x27;: None, &#x27;ca_bundle_path&#x27;: None, &#x27;prompt_templates&#x27;: {}, &#x27;api_key&#x27;: None, &#x27;llm&#x27;: None, &#x27;class_name&#x27;: &#x27;MaybeProxyOpenAI&#x27;}, &#x27;large&#x27;: None, &#x27;edit&#x27;: None, &#x27;chat&#x27;: None, &#x27;unused&#x27;: []}"/>
+<ClassPropertyRef name='models' details='{&quot;title&quot;: &quot;Models&quot;, &quot;description&quot;: &quot;Configuration for the models used by Continue. Read more about how to configure models in the documentation.&quot;, &quot;default&quot;: {&quot;default&quot;: {&quot;title&quot;: null, &quot;system_message&quot;: null, &quot;context_length&quot;: 2048, &quot;model&quot;: &quot;gpt-4&quot;, &quot;stop_tokens&quot;: null, &quot;timeout&quot;: 300, &quot;verify_ssl&quot;: null, &quot;ca_bundle_path&quot;: null, &quot;prompt_templates&quot;: {}, &quot;api_key&quot;: null, &quot;llm&quot;: null, &quot;class_name&quot;: &quot;MaybeProxyOpenAI&quot;}, &quot;small&quot;: null, &quot;medium&quot;: {&quot;title&quot;: null, &quot;system_message&quot;: null, &quot;context_length&quot;: 2048, &quot;model&quot;: &quot;gpt-3.5-turbo&quot;, &quot;stop_tokens&quot;: null, &quot;timeout&quot;: 300, &quot;verify_ssl&quot;: null, &quot;ca_bundle_path&quot;: null, &quot;prompt_templates&quot;: {}, &quot;api_key&quot;: null, &quot;llm&quot;: null, &quot;class_name&quot;: &quot;MaybeProxyOpenAI&quot;}, &quot;large&quot;: null, &quot;edit&quot;: null, &quot;chat&quot;: null, &quot;unused&quot;: []}, &quot;allOf&quot;: [{&quot;$ref&quot;: &quot;#/definitions/Models&quot;}]}' required={false} default="{&#x27;default&#x27;: {&#x27;title&#x27;: None, &#x27;system_message&#x27;: None, &#x27;context_length&#x27;: 2048, &#x27;model&#x27;: &#x27;gpt-4&#x27;, &#x27;stop_tokens&#x27;: None, &#x27;timeout&#x27;: 300, &#x27;verify_ssl&#x27;: None, &#x27;ca_bundle_path&#x27;: None, &#x27;prompt_templates&#x27;: {}, &#x27;api_key&#x27;: None, &#x27;llm&#x27;: None, &#x27;class_name&#x27;: &#x27;MaybeProxyOpenAI&#x27;}, &#x27;small&#x27;: None, &#x27;medium&#x27;: {&#x27;title&#x27;: None, &#x27;system_message&#x27;: None, &#x27;context_length&#x27;: 2048, &#x27;model&#x27;: &#x27;gpt-3.5-turbo&#x27;, &#x27;stop_tokens&#x27;: None, &#x27;timeout&#x27;: 300, &#x27;verify_ssl&#x27;: None, &#x27;ca_bundle_path&#x27;: None, &#x27;prompt_templates&#x27;: {}, &#x27;api_key&#x27;: None, &#x27;llm&#x27;: None, &#x27;class_name&#x27;: &#x27;MaybeProxyOpenAI&#x27;}, &#x27;large&#x27;: None, &#x27;edit&#x27;: None, &#x27;chat&#x27;: None, &#x27;unused&#x27;: []}"/>
 <ClassPropertyRef name='temperature' details='{&quot;title&quot;: &quot;Temperature&quot;, &quot;description&quot;: &quot;The temperature parameter for sampling from the LLM. Higher temperatures will result in more random output, while lower temperatures will result in more predictable output. This value ranges from 0 to 1.&quot;, &quot;default&quot;: 0.5, &quot;type&quot;: &quot;number&quot;}' required={false} default="0.5"/>
 <ClassPropertyRef name='custom_commands' details='{&quot;title&quot;: &quot;Custom Commands&quot;, &quot;description&quot;: &quot;An array of custom commands that allow you to reuse prompts. Each has name, description, and prompt properties. When you enter /&lt;name&gt; in the text input, it will act as a shortcut to the prompt.&quot;, &quot;default&quot;: [{&quot;name&quot;: &quot;test&quot;, &quot;prompt&quot;: &quot;Write a comprehensive set of unit tests for the selected code. It should setup, run tests that check for correctness including important edge cases, and teardown. Ensure that the tests are complete and sophisticated. Give the tests just as chat output, don&#x27;t edit any file.&quot;, &quot;description&quot;: &quot;This is an example custom command. Use /config to edit it and create more&quot;}], &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;$ref&quot;: &quot;#/definitions/CustomCommand&quot;}}' required={false} default="[{&#x27;name&#x27;: &#x27;test&#x27;, &#x27;prompt&#x27;: &quot;Write a comprehensive set of unit tests for the selected code. It should setup, run tests that check for correctness including important edge cases, and teardown. Ensure that the tests are complete and sophisticated. Give the tests just as chat output, don&#x27;t edit any file.&quot;, &#x27;description&#x27;: &#x27;This is an example custom command. Use /config to edit it and create more&#x27;}]"/>
 <ClassPropertyRef name='slash_commands' details='{&quot;title&quot;: &quot;Slash Commands&quot;, &quot;description&quot;: &quot;An array of slash commands that let you map custom Steps to a shortcut.&quot;, &quot;default&quot;: [], &quot;type&quot;: &quot;array&quot;, &quot;items&quot;: {&quot;$ref&quot;: &quot;#/definitions/SlashCommand&quot;}}' required={false} default="[]"/>
-- 
cgit v1.2.3-70-g09d2