7 files changed, 407 insertions, 0 deletions
diff --git a/server/tests/__init__.py b/server/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/server/tests/__init__.py
diff --git a/server/tests/llm_test.py b/server/tests/llm_test.py
new file mode 100644
index 00000000..a016b464
--- /dev/null
+++ b/server/tests/llm_test.py
@@ -0,0 +1,179 @@
+import asyncio
+import os
+from functools import wraps
+
+import pytest
+from continuedev.core.main import ChatMessage
+from continuedev.libs.llm.anthropic import AnthropicLLM
+from continuedev.libs.llm.base import LLM, CompletionOptions
+from continuedev.libs.llm.ggml import GGML
+from continuedev.libs.llm.openai import OpenAI
+from continuedev.libs.llm.together import TogetherLLM
+from continuedev.libs.util.count_tokens import DEFAULT_ARGS
+from dotenv import load_dotenv
+from util.prompts import tokyo_test_pair
+
+load_dotenv()
+
+
+SPEND_MONEY = True
+
+
+def start_model(model):
+    def write_log(msg: str):
+        pass
+
+    asyncio.run(model.start(write_log=write_log, unique_id="test_unique_id"))
+
+
+def async_test(func):
+    @wraps(func)
+    def wrapper(*args, **kwargs):
+        return asyncio.run(func(*args, **kwargs))
+
+    return wrapper
+
+
+class TestBaseLLM:
+    model = "gpt-3.5-turbo"
+    context_length = 4096
+    system_message = "test_system_message"
+
+    def setup_class(cls):
+        cls.llm = LLM(
+            model=cls.model,
+            context_length=cls.context_length,
+            system_message=cls.system_message,
+        )
+
+        start_model(cls.llm)
+
+    def test_llm_is_instance(self):
+        assert isinstance(self.llm, LLM)
+
+    def test_llm_collect_args(self):
+        options = CompletionOptions(model=self.model)
+        assert self.llm.collect_args(options) == {
+            **DEFAULT_ARGS,
+            "model": self.model,
+        }
+
+    @pytest.mark.skipif(SPEND_MONEY is False, reason="Costs money")
+    @async_test
+    async def test_completion(self):
+        if self.llm.__class__.__name__ == "LLM":
+            pytest.skip("Skipping abstract LLM")
+
+        resp = await self.llm.complete(tokyo_test_pair[0], temperature=0.0)
+        assert isinstance(resp, str)
+        assert resp.strip().lower() == tokyo_test_pair[1]
+
+    @pytest.mark.skipif(SPEND_MONEY is False, reason="Costs money")
+    @async_test
+    async def test_stream_chat(self):
+        if self.llm.__class__.__name__ == "LLM":
+            pytest.skip("Skipping abstract LLM")
+
+        completion = ""
+        role = None
+        async for chunk in self.llm.stream_chat(
+            messages=[
+                ChatMessage(
+                    role="user", content=tokyo_test_pair[0], summary=tokyo_test_pair[0]
+                )
+            ],
+            temperature=0.0,
+        ):
+            assert isinstance(chunk, dict)
+            if "content" in chunk:
+                completion += chunk["content"]
+            if "role" in chunk:
+                role = chunk["role"]
+
+        assert role == "assistant"
+        assert completion.strip().lower() == tokyo_test_pair[1]
+
+    @pytest.mark.skipif(SPEND_MONEY is False, reason="Costs money")
+    @async_test
+    async def test_stream_complete(self):
+        if self.llm.__class__.__name__ == "LLM":
+            pytest.skip("Skipping abstract LLM")
+
+        completion = ""
+        async for chunk in self.llm.stream_complete(
+            tokyo_test_pair[0], temperature=0.0
+        ):
+            assert isinstance(chunk, str)
+            completion += chunk
+
+        assert completion.strip().lower() == tokyo_test_pair[1]
+
+
+class TestOpenAI(TestBaseLLM):
+    def setup_class(cls):
+        super().setup_class(cls)
+        cls.llm = OpenAI(
+            model=cls.model,
+            context_length=cls.context_length,
+            system_message=cls.system_message,
+            api_key=os.environ["OPENAI_API_KEY"],
+            # api_base=f"http://localhost:{port}",
+        )
+        start_model(cls.llm)
+        # cls.server = start_openai(port=port)
+
+    # def teardown_class(cls):
+    # cls.server.terminate()
+
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(SPEND_MONEY is False, reason="Costs money")
+    async def test_completion(self):
+        resp = await self.llm.complete(
+            "Output a single word, that being the capital of Japan:"
+        )
+        assert isinstance(resp, str)
+        assert resp.strip().lower() == tokyo_test_pair[1]
+
+
+class TestGGML(TestBaseLLM):
+    def setup_class(cls):
+        super().setup_class(cls)
+        cls.llm = GGML(
+            model="gpt-3.5-turbo",
+            context_length=cls.context_length,
+            system_message=cls.system_message,
+            server_url="https://api.openai.com",
+            api_key=os.environ["OPENAI_API_KEY"],
+        )
+        start_model(cls.llm)
+
+
+@pytest.mark.skipif(True, reason="Together is not working")
+class TestTogetherLLM(TestBaseLLM):
+    def setup_class(cls):
+        super().setup_class(cls)
+        cls.llm = TogetherLLM(
+            api_key=os.environ["TOGETHER_API_KEY"],
+        )
+        start_model(cls.llm)
+
+
+class TestAnthropicLLM(TestBaseLLM):
+    def setup_class(cls):
+        super().setup_class(cls)
+        cls.llm = AnthropicLLM(api_key=os.environ["ANTHROPIC_API_KEY"])
+        start_model(cls.llm)
+
+    def test_llm_collect_args(self):
+        options = CompletionOptions(model=self.model)
+        assert self.llm.collect_args(options) == {
+            "max_tokens_to_sample": DEFAULT_ARGS["max_tokens"],
+            "temperature": DEFAULT_ARGS["temperature"],
+            "model": self.model,
+        }
+
+
+if __name__ == "__main__":
+    import pytest
+
+    pytest.main()
diff --git a/server/tests/step_test.py b/server/tests/step_test.py
new file mode 100644
index 00000000..a9132dd3
--- /dev/null
+++ b/server/tests/step_test.py
@@ -0,0 +1,68 @@
+import pytest
+from continuedev.core.config import ContinueConfig
+from continuedev.core.steps import UserInputStep
+from continuedev.headless import start_headless_session
+from continuedev.models.filesystem import Range, RangeInFileWithContents
+from continuedev.plugins.steps.chat import SimpleChatStep
+from continuedev.plugins.steps.main import EditHighlightedCodeStep
+from continuedev.plugins.steps.on_traceback import DefaultOnTracebackStep
+from util.prompts import dotenv_test_pair, tokyo_test_pair
+
+TEST_CONFIG = ContinueConfig()
+
+
+@pytest.mark.asyncio
+async def test_step():
+    pytest.skip("TODO: too slow")
+    session = await start_headless_session(config=TEST_CONFIG)
+
+    await session.autopilot.run_from_step(UserInputStep(user_input=tokyo_test_pair[0]))
+
+    full_state = await session.autopilot.get_full_state()
+
+    assert isinstance(full_state.history.timeline[-1].step, SimpleChatStep)
+
+    assert not full_state.history.timeline[-1].step.hide
+
+    assert (
+        full_state.history.timeline[-1].step.description.strip().lower()
+        == tokyo_test_pair[1]
+    )
+
+    await session.autopilot.cleanup()
+
+
+@pytest.mark.asyncio
+async def test_traceback_step():
+    pytest.skip("TODO: too slow")
+    session = await start_headless_session(config=TEST_CONFIG)
+
+    await session.autopilot.run_from_step(
+        DefaultOnTracebackStep(output=dotenv_test_pair[0])
+    )
+
+    full_state = await session.autopilot.get_full_state()
+    assert dotenv_test_pair[1] in full_state.history.timeline[-1].step.description
+
+    await session.autopilot.cleanup()
+
+
+@pytest.mark.asyncio
+async def test_edit_step():
+    pytest.skip("TODO: too slow")
+    session = await start_headless_session(config=TEST_CONFIG)
+
+    range_in_file = RangeInFileWithContents(
+        filepath=__file__, range=Range.from_shorthand(0, 0, 0, 0), contents=""
+    )
+
+    await session.autopilot.handle_highlighted_code(range_in_files=[range_in_file])
+
+    await session.autopilot.run_from_step(
+        EditHighlightedCodeStep(user_input="Don't edit this code")
+    )
+
+    full_state = await session.autopilot.get_full_state()
+    assert isinstance(full_state.history.timeline[-1].step.description, str)
+
+    await session.autopilot.cleanup()
diff --git a/server/tests/util/__init__.py b/server/tests/util/__init__.py
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/server/tests/util/__init__.py
diff --git a/server/tests/util/config.py b/server/tests/util/config.py
new file mode 100644
index 00000000..370933a0
--- /dev/null
+++ b/server/tests/util/config.py
@@ -0,0 +1,19 @@
+from continuedev.core.config import ContinueConfig
+from continuedev.core.models import Models
+from continuedev.libs.llm.openai_free_trial import OpenAIFreeTrial
+
+config = ContinueConfig(
+    allow_anonymous_telemetry=False,
+    models=Models(
+        default=OpenAIFreeTrial(api_key="", model="gpt-4"),
+        summarize=OpenAIFreeTrial(
+            api_key="",
+            model="gpt-3.5-turbo",
+        ),
+    ),
+    system_message=None,
+    temperature=0.5,
+    custom_commands=[],
+    slash_commands=[],
+    context_providers=[],
+)
diff --git a/server/tests/util/openai_mock.py b/server/tests/util/openai_mock.py
new file mode 100644
index 00000000..763c5647
--- /dev/null
+++ b/server/tests/util/openai_mock.py
@@ -0,0 +1,139 @@
+import asyncio
+import os
+import random
+import subprocess
+from typing import Dict, List, Optional
+
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from pydantic import BaseModel
+
+openai = FastAPI()
+
+
+class CompletionBody(BaseModel):
+    prompt: str
+    max_tokens: Optional[int] = 60
+    stream: Optional[bool] = False
+
+    class Config:
+        extra = "allow"
+
+
+@openai.post("/completions")
+@openai.post("/v1/completions")
+async def mock_completion(item: CompletionBody):
+    prompt = item.prompt
+
+    text = "This is a fake completion."
+
+    if item.stream:
+
+        async def stream_text():
+            for i in range(len(text)):
+                word = random.choice(prompt.split())
+                yield {
+                    "choices": [
+                        {
+                            "delta": {"role": "assistant", "content": word},
+                            "finish_reason": None,
+                            "index": 0,
+                        }
+                    ],
+                    "created": 1677825464,
+                    "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD",
+                    "model": "gpt-3.5-turbo-0301",
+                    "object": "chat.completion.chunk",
+                }
+                await asyncio.sleep(0.1)
+
+        return StreamingResponse(stream_text(), media_type="text/plain")
+
+    return {
+        "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7",
+        "object": "text_completion",
+        "created": 1589478378,
+        "model": "gpt-3.5-turbo",
+        "choices": [
+            {
+                "text": text,
+                "index": 0,
+                "logprobs": None,
+                "finish_reason": "length",
+            }
+        ],
+        "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12},
+    }
+
+
+class ChatBody(BaseModel):
+    messages: List[Dict[str, str]]
+    max_tokens: Optional[int] = None
+    stream: Optional[bool] = False
+
+    class Config:
+        extra = "allow"
+
+
+@openai.post("/v1/chat/completions")
+async def mock_chat_completion(item: ChatBody):
+    text = "This is a fake completion."
+
+    if item.stream:
+
+        async def stream_text():
+            for i in range(len(text)):
+                word = text[i]
+                yield {
+                    "choices": [
+                        {
+                            "delta": {"role": "assistant", "content": word},
+                            "finish_reason": None,
+                            "index": 0,
+                        }
+                    ],
+                    "created": 1677825464,
+                    "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD",
+                    "model": "gpt-3.5-turbo-0301",
+                    "object": "chat.completion.chunk",
+                }
+                await asyncio.sleep(0.1)
+
+        return StreamingResponse(stream_text(), media_type="text/plain")
+
+    return {
+        "id": "chatcmpl-123",
+        "object": "chat.completion",
+        "created": 1677652288,
+        "model": "gpt-3.5-turbo-0613",
+        "choices": [
+            {
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": text,
+                },
+                "finish_reason": "stop",
+            }
+        ],
+        "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21},
+    }
+
+
+def start_openai(port: int = 8000):
+    server = subprocess.Popen(
+        [
+            "uvicorn",
+            "openai_mock:openai",
+            "--host",
+            "127.0.0.1",
+            "--port",
+            str(port),
+        ],
+        cwd=os.path.dirname(__file__),
+    )
+    return server
+
+
+if __name__ == "__main__":
+    start_openai()
diff --git a/server/tests/util/prompts.py b/server/tests/util/prompts.py
new file mode 100644
index 00000000..e84ddc82
--- /dev/null
+++ b/server/tests/util/prompts.py
@@ -0,0 +1,2 @@
+tokyo_test_pair = ("Output a single word, that being the capital of Japan:", "tokyo")
+dotenv_test_pair = ("ModuleNotFoundError: No module named 'dotenv'", "python-dotenv")
+\ No newline at end of file