erowid-bot/app/llm.py

124 lines
4.0 KiB
Python

import json
import logging
from typing import AsyncGenerator
import httpx
from app.config import settings
logger = logging.getLogger(__name__)
async def stream_ollama(messages: list[dict], system: str = "") -> AsyncGenerator[str, None]:
"""Stream a chat completion from Ollama."""
all_messages = []
if system:
all_messages.append({"role": "system", "content": system})
all_messages.extend(messages)
payload = {
"model": settings.ollama_chat_model,
"messages": all_messages,
"stream": True,
"options": {
"num_ctx": 2048,
"num_predict": 512,
},
}
timeout = httpx.Timeout(connect=30, read=600, write=30, pool=30)
async with httpx.AsyncClient(timeout=timeout) as client:
async with client.stream(
"POST",
f"{settings.ollama_base_url}/api/chat",
json=payload,
) as resp:
resp.raise_for_status()
buffer = b""
async for chunk in resp.aiter_bytes():
buffer += chunk
# Process complete JSON lines
while b"\n" in buffer:
line, buffer = buffer.split(b"\n", 1)
line = line.strip()
if not line:
continue
try:
data = json.loads(line)
if "message" in data and "content" in data["message"]:
content = data["message"]["content"]
if content:
yield content
if data.get("done"):
return
except json.JSONDecodeError:
continue
async def stream_claude(messages: list[dict], system: str = "") -> AsyncGenerator[str, None]:
"""Stream a chat completion from Claude API."""
try:
from anthropic import AsyncAnthropic
except ImportError:
raise RuntimeError("anthropic package not installed")
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
async with client.messages.stream(
model="claude-sonnet-4-5-20250929",
max_tokens=2048,
system=system,
messages=messages,
) as stream:
async for text in stream.text_stream:
yield text
async def stream_openai(messages: list[dict], system: str = "") -> AsyncGenerator[str, None]:
"""Stream a chat completion from OpenAI API."""
try:
from openai import AsyncOpenAI
except ImportError:
raise RuntimeError("openai package not installed")
client = AsyncOpenAI(api_key=settings.openai_api_key)
all_messages = []
if system:
all_messages.append({"role": "system", "content": system})
all_messages.extend(messages)
stream = await client.chat.completions.create(
model="gpt-4o-mini",
messages=all_messages,
max_tokens=2048,
stream=True,
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
yield chunk.choices[0].delta.content
async def stream_chat(messages: list[dict], system: str = "") -> AsyncGenerator[str, None]:
"""Route to the configured LLM provider."""
provider = settings.llm_provider.lower()
if provider == "ollama":
async for token in stream_ollama(messages, system):
yield token
elif provider == "claude":
if not settings.anthropic_api_key:
yield "Error: ANTHROPIC_API_KEY not configured. Set it in .env or switch LLM_PROVIDER to ollama."
return
async for token in stream_claude(messages, system):
yield token
elif provider == "openai":
if not settings.openai_api_key:
yield "Error: OPENAI_API_KEY not configured. Set it in .env or switch LLM_PROVIDER to ollama."
return
async for token in stream_openai(messages, system):
yield token
else:
yield f"Error: Unknown LLM_PROVIDER '{provider}'. Use ollama, claude, or openai."