server/continuedev/libs/llm/ollama.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106

import json
from typing import Callable

import aiohttp
from pydantic import Field

from ...core.main import ContinueCustomException
from ..util.logging import logger
from .base import LLM
from .prompts.chat import llama2_template_messages
from .prompts.edit import simplified_edit_prompt


class Ollama(LLM):
    """
    [Ollama](https://ollama.ai/) is an application for Mac and Linux that makes it easy to locally run open-source models, including Llama-2. Download the app from the website, and it will walk you through setup in a couple of minutes. You can also read more in their [README](https://github.com/jmorganca/ollama). Continue can then be configured to use the `Ollama` LLM class:

    ```python title="~/.continue/config.py"
    from continuedev.libs.llm.ollama import Ollama

    config = ContinueConfig(
        ...
        models=Models(
            default=Ollama(model="llama2")
        )
    )
    ```
    """

    model: str = "llama2"
    server_url: str = Field(
        "http://localhost:11434", description="URL of the Ollama server"
    )

    _client_session: aiohttp.ClientSession = None

    template_messages: Callable = llama2_template_messages

    prompt_templates = {
        "edit": simplified_edit_prompt,
    }

    class Config:
        arbitrary_types_allowed = True

    async def start(self, **kwargs):
        await super().start(**kwargs)
        self._client_session = self.create_client_session()
        try:
            async with self._client_session.post(
                f"{self.server_url}/api/generate",
                proxy=self.proxy,
                json={
                    "prompt": "",
                    "model": self.model,
                },
            ) as _:
                pass
        except Exception as e:
            logger.warning(f"Error pre-loading Ollama model: {e}")

    async def stop(self):
        await self._client_session.close()

    async def get_downloaded_models(self):
        async with self._client_session.get(
            f"{self.server_url}/api/tags",
            proxy=self.proxy,
        ) as resp:
            js_data = await resp.json()
            return list(map(lambda x: x["name"], js_data["models"]))

    async def _stream_complete(self, prompt, options):
        async with self._client_session.post(
            f"{self.server_url}/api/generate",
            json={
                "template": prompt,
                "model": self.model,
                "system": self.system_message,
                "options": {"temperature": options.temperature},
            },
            proxy=self.proxy,
        ) as resp:
            if resp.status == 400:
                txt = await resp.text()
                extra_msg = ""
                if "no such file" in txt:
                    extra_msg = f"\n\nThis means that the model '{self.model}' is not downloaded.\n\nYou have the following models downloaded: {', '.join(await self.get_downloaded_models())}.\n\nTo download this model, run `ollama run {self.model}` in your terminal."
                raise ContinueCustomException(
                    f"Ollama returned an error: {txt}{extra_msg}",
                    "Invalid request to Ollama",
                )
            elif resp.status != 200:
                raise ContinueCustomException(
                    f"Ollama returned an error: {await resp.text()}",
                    "Invalid request to Ollama",
                )
            async for line in resp.content.iter_any():
                if line:
                    json_chunk = line.decode("utf-8")
                    chunks = json_chunk.split("\n")
                    for chunk in chunks:
                        if chunk.strip() != "":
                            j = json.loads(chunk)
                            if "response" in j:
                                yield j["response"]