import asyncio import os import random import subprocess from typing import Dict, List, Optional from fastapi import FastAPI from fastapi.responses import StreamingResponse from pydantic import BaseModel openai = FastAPI() class CompletionBody(BaseModel): prompt: str max_tokens: Optional[int] = 60 stream: Optional[bool] = False class Config: extra = "allow" @openai.post("/completions") @openai.post("/v1/completions") async def mock_completion(item: CompletionBody): prompt = item.prompt text = "This is a fake completion." if item.stream: async def stream_text(): for i in range(len(text)): word = random.choice(prompt.split()) yield { "choices": [ { "delta": {"role": "assistant", "content": word}, "finish_reason": None, "index": 0, } ], "created": 1677825464, "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD", "model": "gpt-3.5-turbo-0301", "object": "chat.completion.chunk", } await asyncio.sleep(0.1) return StreamingResponse(stream_text(), media_type="text/plain") return { "id": "cmpl-uqkvlQyYK7bGYrRHQ0eXlWi7", "object": "text_completion", "created": 1589478378, "model": "gpt-3.5-turbo", "choices": [ { "text": text, "index": 0, "logprobs": None, "finish_reason": "length", } ], "usage": {"prompt_tokens": 5, "completion_tokens": 7, "total_tokens": 12}, } class ChatBody(BaseModel): messages: List[Dict[str, str]] max_tokens: Optional[int] = None stream: Optional[bool] = False class Config: extra = "allow" @openai.post("/v1/chat/completions") async def mock_chat_completion(item: ChatBody): text = "This is a fake completion." if item.stream: async def stream_text(): for i in range(len(text)): word = text[i] yield { "choices": [ { "delta": {"role": "assistant", "content": word}, "finish_reason": None, "index": 0, } ], "created": 1677825464, "id": "chatcmpl-6ptKyqKOGXZT6iQnqiXAH8adNLUzD", "model": "gpt-3.5-turbo-0301", "object": "chat.completion.chunk", } await asyncio.sleep(0.1) return StreamingResponse(stream_text(), media_type="text/plain") return { "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, "model": "gpt-3.5-turbo-0613", "choices": [ { "index": 0, "message": { "role": "assistant", "content": text, }, "finish_reason": "stop", } ], "usage": {"prompt_tokens": 9, "completion_tokens": 12, "total_tokens": 21}, } def start_openai(port: int = 8000): server = subprocess.Popen( [ "uvicorn", "openai_mock:openai", "--host", "127.0.0.1", "--port", str(port), ], cwd=os.path.dirname(__file__), ) return server if __name__ == "__main__": start_openai()