1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
import asyncio
from typing import Any, List, Union
from pydantic import Field
from ...core.main import ChatMessage
from .base import LLM, CompletionOptions
class QueuedLLM(LLM):
"""
QueuedLLM exists to make up for LLM servers that cannot handle multiple requests at once. It uses a lock to ensure that only one request is being processed at a time.
If you are already using another LLM class and are experiencing this problem, you can just wrap it with the QueuedLLM class like this:
```python title="~/.continue/config.py"
from continuedev.libs.llm.queued import QueuedLLM
config = ContinueConfig(
...
models=Models(
default=QueuedLLM(llm=<OTHER_LLM_CLASS>)
)
)
```
"""
llm: LLM = Field(..., description="The LLM to wrap with a lock")
_lock: asyncio.Lock
model: str = "queued"
def dict(self, **kwargs):
return self.llm.dict(**kwargs)
async def start(self, *args, **kwargs):
await super().start(*args, **kwargs)
await self.llm.start(*args, **kwargs)
self._lock = asyncio.Lock()
self.model = self.llm.model
self.template_messages = self.llm.template_messages
self.prompt_templates = self.llm.prompt_templates
self.context_length = self.llm.context_length
async def stop(self):
await self.llm.stop()
def collect_args(self, options: CompletionOptions):
return self.llm.collect_args(options)
def compile_chat_messages(
self,
options: CompletionOptions,
msgs: List[ChatMessage],
functions: Union[List[Any], None] = None,
):
return self.llm.compile_chat_messages(options, msgs, functions)
def template_prompt_like_messages(self, prompt: str) -> str:
return self.llm.template_prompt_like_messages(prompt)
async def _complete(self, prompt: str, options: CompletionOptions):
async with self._lock:
resp = await self.llm._complete(prompt, options)
return resp
async def _stream_complete(self, prompt: str, options: CompletionOptions):
async with self._lock:
async for chunk in self.llm._stream_complete(prompt, options):
yield chunk
async def _stream_chat(
self, messages: List[ChatMessage], options: CompletionOptions
):
async with self._lock:
async for chunk in self.llm._stream_chat(messages, options):
yield chunk
|