0

I'm working on OpenAI chatbot and this is FastAPI backend code.

from fastapi import FastAPI, Response
from fastapi.middleware.cors import CORSMiddleware
from dotenv import load_dotenv
from pydantic import BaseModel
from langchain.callbacks import AsyncIteratorCallbackHandler
from typing import AsyncIterable, Awaitable
from fastapi.responses import StreamingResponse
import openai
import os
import uuid
import asyncio

load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=[
                   "*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"])

messages = {}
subject = "wine"
instructor = f"""You're a assistant helping humans. Please answer questions as detail as possible.
                And please format them in a user-friendly way, easy to read.
                If human's asking about something that is not related with ${subject}, just tell him you only answer about ${subject}.
            """
remember_cnt = 10


def initializeMemory(token: str):
    global messages
    messages[token] = []


class Message(BaseModel):
    message: str
    token: str


class Token(BaseModel):
    token: str


async def send_message(message: str, token: str) -> AsyncIterable[str]:

    if (token not in messages):
        messages[token] = []
    messages[token].append({'role': 'user', 'content': message})
    response = openai.ChatCompletion.create(
        model='gpt-4',
        messages=[{'role': 'system', 'content': instructor}] +
        messages[token][-remember_cnt:],
        temperature=0,
        stream=True
    )

    final = ""
    for chunk in response:
        if 'content' in chunk.choices[0].delta:
            string = chunk.choices[0].delta.content
            final += string
            if string.isspace() and len(string.strip()) == 0:
                yield '\n'
            else:
                yield string

    messages[token].append({'role': 'assistant', 'content': final})


@app.post("/memory-clear")
async def memory_clear(token: Token):
    if (token.token != ""):
        initializeMemory(token.token)
    if (token.token == ""):
        return uuid.uuid4()
    else:
        return ""

@app.post("/chat")
async def get_answer(message: Message):
    return StreamingResponse(send_message(message.message, message.token), media_type='text/event-stream')

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(host="0.0.0.0", port=8080, app=app)

Everything's working fine. But the problem is when two users call /chat api at the same time, the don't execute in parallel. They executed one by one so second user have to wait until first user's request fulfilled. How Can I fix this?

I've heard we can use asyncio but I don't know how to use it.

Varenytsia
  • 23
  • 3
  • The issue is that, while your code uses `asyncio`, your call to `openai.ChatCompletion` does not, and that's where your code is spending almost all of its time. Luckily, the OpenAI library also has an [async interface](https://github.com/openai/openai-python#async-api), so I would recommend reading up on async Python and switching to using `response = await openai.ChatCompletion.acreate(...)`. – M.O. Jun 20 '23 at 10:00

0 Answers0