From 7f590521b763e7a1b89a7feb76427ae3b9f71962 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Tue, 24 Mar 2026 14:11:37 -0700 Subject: [PATCH] refactor: route AI calls through LiteLLM proxy instead of direct Gemini Both ai-planner.ts and gemini.ts now use LiteLLM's OpenAI-compatible API (http://litellm:4000) which proxies to Gemini Flash. Falls back to direct Gemini API if LITELLM_API_KEY not set. - docker-compose joins ai-internal network for LiteLLM access - LITELLM_URL and LITELLM_API_KEY env vars added - GEMINI_API_KEY kept as optional fallback Co-Authored-By: Claude Opus 4.6 --- .env.example | 6 +- docker-compose.yml | 7 +- src/lib/ai-planner.ts | 270 +++++++++++++++++++++++------------------- src/lib/gemini.ts | 36 ++++-- 4 files changed, 182 insertions(+), 137 deletions(-) diff --git a/.env.example b/.env.example index 63ca28e..57dae36 100644 --- a/.env.example +++ b/.env.example @@ -1,8 +1,10 @@ # Database DATABASE_URL="postgresql://rtrips:changeme@localhost:5432/rtrips" -# AI - Gemini 2.0 Flash for NL parsing -GEMINI_API_KEY="your-gemini-api-key" +# AI — LiteLLM proxy (preferred) or direct Gemini fallback +LITELLM_URL="http://litellm:4000" +LITELLM_API_KEY="your-litellm-master-key" +GEMINI_API_KEY="" # rSpace integration NEXT_PUBLIC_RSPACE_URL="https://rspace.online" diff --git a/docker-compose.yml b/docker-compose.yml index 84b4a95..dfdbd11 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -7,7 +7,9 @@ services: restart: unless-stopped environment: - DATABASE_URL=postgresql://rtrips:${DB_PASSWORD}@rtrips-postgres:5432/rtrips - - GEMINI_API_KEY=${GEMINI_API_KEY} + - LITELLM_URL=${LITELLM_URL:-http://litellm:4000} + - LITELLM_API_KEY=${LITELLM_API_KEY} + - GEMINI_API_KEY=${GEMINI_API_KEY:-} - NEXT_PUBLIC_RSPACE_URL=${NEXT_PUBLIC_RSPACE_URL:-https://rspace.online} - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000} - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://auth.ridentity.online} @@ -35,6 +37,7 @@ services: networks: - traefik-public - rtrips-internal + - ai-internal depends_on: rtrips-postgres: condition: service_healthy @@ -77,6 +80,8 @@ services: networks: traefik-public: external: true + ai-internal: + external: true rtrips-internal: internal: true diff --git a/src/lib/ai-planner.ts b/src/lib/ai-planner.ts index 30ffb43..ca70f7d 100644 --- a/src/lib/ai-planner.ts +++ b/src/lib/ai-planner.ts @@ -1,7 +1,7 @@ -// AI Trip Planner — Gemini 2.0 Flash function-calling agent loop +// AI Trip Planner — LiteLLM-proxied Gemini Flash with function calling // -// Sends user's trip description + tool declarations to Gemini. -// Gemini calls tools (geocode, flights, accommodation, routing) in a loop. +// Uses LiteLLM's OpenAI-compatible API to call gemini-flash with tools. +// LiteLLM routes to the actual Gemini API and manages keys/rate limits. // Returns an EnrichedTrip with real prices, coordinates, and routes. import { @@ -12,9 +12,24 @@ import { } from './ai-tools'; import type { EnrichedTrip } from './types'; -const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent'; const MAX_ITERATIONS = 8; +function getLLMConfig() { + const url = process.env.LITELLM_URL || 'http://litellm:4000'; + const key = process.env.LITELLM_API_KEY; + if (!key) { + // Fallback to direct Gemini if LiteLLM not configured + const geminiKey = process.env.GEMINI_API_KEY; + if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured'); + return { + url: 'https://generativelanguage.googleapis.com/v1beta/openai', + key: geminiKey, + model: 'gemini-2.0-flash', + }; + } + return { url, key, model: 'gemini-flash' }; +} + // ─── System prompt ────────────────────────────────────────────── const SYSTEM_PROMPT = `You are a trip planning assistant with access to real search tools. Given a natural language trip description: @@ -50,70 +65,78 @@ When you have gathered all data, return ONLY a JSON object matching this schema: "routes": [{ "fromIndex": number, "toIndex": number, "distanceMeters": number, "durationSeconds": number }] }`; -// ─── Tool declarations (Gemini function calling format) ───────── +// ─── Tool declarations (OpenAI format) ────────────────────────── -const TOOL_DECLARATIONS = { - tools: [{ - function_declarations: [ - { - name: 'geocodeLocation', - description: 'Geocode a location name to lat/lng coordinates using Nominatim', - parameters: { - type: 'object', - properties: { - name: { type: 'string', description: 'Location name (city, region, or address)' }, - }, - required: ['name'], +const TOOLS = [ + { + type: 'function' as const, + function: { + name: 'geocodeLocation', + description: 'Geocode a location name to lat/lng coordinates using Nominatim', + parameters: { + type: 'object', + properties: { + name: { type: 'string', description: 'Location name (city, region, or address)' }, }, + required: ['name'], }, - { - name: 'searchFlights', - description: 'Search for real flights between two cities using Kiwi Tequila API. Returns top 5 cheapest flights.', - parameters: { - type: 'object', - properties: { - from: { type: 'string', description: 'Departure city or airport code' }, - to: { type: 'string', description: 'Arrival city or airport code' }, - dateFrom: { type: 'string', description: 'Departure date (ISO format YYYY-MM-DD)' }, - dateTo: { type: 'string', description: 'Latest departure date (ISO format YYYY-MM-DD), can be same as dateFrom for one-way' }, - adults: { type: 'number', description: 'Number of adult passengers (default 1)' }, - }, - required: ['from', 'to', 'dateFrom', 'dateTo'], + }, + }, + { + type: 'function' as const, + function: { + name: 'searchFlights', + description: 'Search for real flights between two cities. Returns top 5 cheapest flights.', + parameters: { + type: 'object', + properties: { + from: { type: 'string', description: 'Departure city or airport code' }, + to: { type: 'string', description: 'Arrival city or airport code' }, + dateFrom: { type: 'string', description: 'Departure date (YYYY-MM-DD)' }, + dateTo: { type: 'string', description: 'Latest departure date (YYYY-MM-DD)' }, + adults: { type: 'number', description: 'Number of adult passengers (default 1)' }, }, + required: ['from', 'to', 'dateFrom', 'dateTo'], }, - { - name: 'searchAccommodation', - description: 'Search for real Airbnb listings at a location. Returns top 5 listings with prices and ratings.', - parameters: { - type: 'object', - properties: { - location: { type: 'string', description: 'Location to search (city name, optionally with country)' }, - checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' }, - checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' }, - guests: { type: 'number', description: 'Number of guests (default 2)' }, - maxPrice: { type: 'number', description: 'Maximum price per night in USD (optional)' }, - }, - required: ['location'], + }, + }, + { + type: 'function' as const, + function: { + name: 'searchAccommodation', + description: 'Search for real Airbnb listings at a location. Returns top 5 listings.', + parameters: { + type: 'object', + properties: { + location: { type: 'string', description: 'Location to search' }, + checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' }, + checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' }, + guests: { type: 'number', description: 'Number of guests (default 2)' }, + maxPrice: { type: 'number', description: 'Max price per night in USD (optional)' }, }, + required: ['location'], }, - { - name: 'computeRoute', - description: 'Compute driving route between two points. Only use for driveable distances (same continent, reasonable distance).', - parameters: { - type: 'object', - properties: { - fromLng: { type: 'number', description: 'Departure longitude' }, - fromLat: { type: 'number', description: 'Departure latitude' }, - toLng: { type: 'number', description: 'Arrival longitude' }, - toLat: { type: 'number', description: 'Arrival latitude' }, - profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' }, - }, - required: ['fromLng', 'fromLat', 'toLng', 'toLat'], + }, + }, + { + type: 'function' as const, + function: { + name: 'computeRoute', + description: 'Compute driving route between two points. Only for driveable distances.', + parameters: { + type: 'object', + properties: { + fromLng: { type: 'number', description: 'Departure longitude' }, + fromLat: { type: 'number', description: 'Departure latitude' }, + toLng: { type: 'number', description: 'Arrival longitude' }, + toLat: { type: 'number', description: 'Arrival latitude' }, + profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' }, }, + required: ['fromLng', 'fromLat', 'toLng', 'toLat'], }, - ], - }], -}; + }, + }, +]; // ─── Tool executor ────────────────────────────────────────────── @@ -158,108 +181,109 @@ async function executeTool(name: string, args: ToolCallArgs): Promise { export type PlannerStatus = (message: string) => void; +// ─── OpenAI-compatible message types ──────────────────────────── + +interface ChatMessage { + role: 'system' | 'user' | 'assistant' | 'tool'; + content?: string | null; + tool_calls?: Array<{ + id: string; + type: 'function'; + function: { name: string; arguments: string }; + }>; + tool_call_id?: string; + name?: string; +} + // ─── Main agent loop ──────────────────────────────────────────── export async function planTrip( text: string, onStatus?: PlannerStatus ): Promise { - const apiKey = process.env.GEMINI_API_KEY; - if (!apiKey) throw new Error('GEMINI_API_KEY not configured'); + const config = getLLMConfig(); - // Build initial conversation - const contents: GeminiContent[] = [ - { role: 'user', parts: [{ text }] }, + const messages: ChatMessage[] = [ + { role: 'system', content: SYSTEM_PROMPT }, + { role: 'user', content: text }, ]; for (let i = 0; i < MAX_ITERATIONS; i++) { onStatus?.(`AI planning iteration ${i + 1}...`); - const response = await callGemini(apiKey, contents); - const candidate = response.candidates?.[0]; - if (!candidate?.content?.parts) { - throw new Error('No response from Gemini'); + const response = await callLLM(config, messages); + const choice = response.choices?.[0]; + if (!choice?.message) { + throw new Error('No response from LLM'); } - const parts = candidate.content.parts; - // Add assistant response to conversation - contents.push({ role: 'model', parts }); + const assistantMsg = choice.message; + // Add assistant message to conversation + messages.push(assistantMsg); - // Check for function calls - const functionCalls = parts.filter( - (p: GeminiPart) => p.functionCall - ); - - if (functionCalls.length === 0) { - // No more tool calls — extract final JSON from text parts - const textParts = parts - .filter((p: GeminiPart) => p.text) - .map((p: GeminiPart) => p.text) - .join(''); - - return parseEnrichedTrip(textParts); + // Check for tool calls + const toolCalls = assistantMsg.tool_calls; + if (!toolCalls || toolCalls.length === 0) { + // No more tool calls — extract final JSON + return parseEnrichedTrip(assistantMsg.content || ''); } - // Execute tool calls (in parallel where possible) - onStatus?.(describeToolCalls(functionCalls)); + // Execute tool calls in parallel + onStatus?.(describeToolCalls(toolCalls)); const toolResults = await Promise.all( - functionCalls.map(async (part: GeminiPart) => { - const { name, args } = part.functionCall!; - const result = await executeTool(name, args); + toolCalls.map(async (tc) => { + const args = JSON.parse(tc.function.arguments); + const result = await executeTool(tc.function.name, args); return { - functionResponse: { - name, - response: { result: result ?? null }, - }, + role: 'tool' as const, + tool_call_id: tc.id, + content: JSON.stringify(result ?? null), }; }) ); - // Add tool results to conversation - contents.push({ role: 'user', parts: toolResults }); + // Add all tool results to conversation + messages.push(...toolResults); } throw new Error('AI planner exceeded maximum iterations'); } -// ─── Gemini API call ──────────────────────────────────────────── +// ─── LLM API call (OpenAI-compatible via LiteLLM) ────────────── -interface GeminiPart { - text?: string; - functionCall?: { name: string; args: ToolCallArgs }; - functionResponse?: { name: string; response: unknown }; +interface LLMConfig { + url: string; + key: string; + model: string; } -interface GeminiContent { - role: 'user' | 'model'; - parts: GeminiPart[]; -} - -interface GeminiResponse { - candidates?: Array<{ - content?: { parts: GeminiPart[] }; - finishReason?: string; +interface LLMResponse { + choices?: Array<{ + message: ChatMessage; + finish_reason?: string; }>; } -async function callGemini(apiKey: string, contents: GeminiContent[]): Promise { - const res = await fetch(`${GEMINI_URL}?key=${apiKey}`, { +async function callLLM(config: LLMConfig, messages: ChatMessage[]): Promise { + const res = await fetch(`${config.url}/v1/chat/completions`, { method: 'POST', - headers: { 'Content-Type': 'application/json' }, + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${config.key}`, + }, body: JSON.stringify({ - system_instruction: { parts: [{ text: SYSTEM_PROMPT }] }, - contents, - ...TOOL_DECLARATIONS, - generationConfig: { - temperature: 0.1, - }, + model: config.model, + messages, + tools: TOOLS, + tool_choice: 'auto', + temperature: 0.1, }), }); if (!res.ok) { const err = await res.text(); - throw new Error(`Gemini API error (${res.status}): ${err}`); + throw new Error(`LLM API error (${res.status}): ${err}`); } return res.json(); @@ -271,7 +295,6 @@ function parseEnrichedTrip(text: string): EnrichedTrip { const jsonStr = extractJSON(text); const parsed = JSON.parse(jsonStr); - // Ensure all required fields with defaults return { title: parsed.title || 'Untitled Trip', destinations: (parsed.destinations || []).map((d: Record) => ({ @@ -305,10 +328,11 @@ function extractJSON(text: string): string { // ─── Status description helpers ───────────────────────────────── -function describeToolCalls(calls: GeminiPart[]): string { +function describeToolCalls(calls: Array<{ function: { name: string; arguments: string } }>): string { const descriptions = calls.map((c) => { - const name = c.functionCall!.name; - const args = c.functionCall!.args; + const name = c.function.name; + let args: ToolCallArgs = {}; + try { args = JSON.parse(c.function.arguments); } catch { /* ignore */ } switch (name) { case 'geocodeLocation': return `Geocoding "${args.name}"`; diff --git a/src/lib/gemini.ts b/src/lib/gemini.ts index 7445e2a..07ab5cd 100644 --- a/src/lib/gemini.ts +++ b/src/lib/gemini.ts @@ -1,6 +1,23 @@ import { ParsedTrip } from './types'; -const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions'; +// Use LiteLLM proxy if available, fall back to direct Gemini +function getLLMConfig() { + const litellmKey = process.env.LITELLM_API_KEY; + if (litellmKey) { + return { + url: `${process.env.LITELLM_URL || 'http://litellm:4000'}/v1/chat/completions`, + key: litellmKey, + model: 'gemini-flash', + }; + } + const geminiKey = process.env.GEMINI_API_KEY; + if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured'); + return { + url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions', + key: geminiKey, + model: 'gemini-2.0-flash', + }; +} const TRIP_PARSER_SYSTEM_PROMPT = `You are a trip planning assistant. Parse the user's trip description into structured JSON. @@ -46,7 +63,7 @@ Rules: - Always return valid JSON matching the schema above.`; function extractJSON(text: string): string { - // Handle Gemini's tendency to wrap JSON in ```json fences + // Handle tendency to wrap JSON in ```json fences const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/); if (fenceMatch) return fenceMatch[1].trim(); // Try to find raw JSON object @@ -56,19 +73,16 @@ function extractJSON(text: string): string { } export async function parseTrip(naturalLanguage: string): Promise { - const apiKey = process.env.GEMINI_API_KEY; - if (!apiKey) { - throw new Error('GEMINI_API_KEY not configured'); - } + const config = getLLMConfig(); - const response = await fetch(GEMINI_URL, { + const response = await fetch(config.url, { method: 'POST', headers: { 'Content-Type': 'application/json', - 'Authorization': `Bearer ${apiKey}`, + 'Authorization': `Bearer ${config.key}`, }, body: JSON.stringify({ - model: 'gemini-2.0-flash', + model: config.model, messages: [ { role: 'system', content: TRIP_PARSER_SYSTEM_PROMPT }, { role: 'user', content: naturalLanguage }, @@ -79,13 +93,13 @@ export async function parseTrip(naturalLanguage: string): Promise { if (!response.ok) { const err = await response.text(); - throw new Error(`Gemini API error: ${response.status} ${err}`); + throw new Error(`LLM API error: ${response.status} ${err}`); } const data = await response.json(); const content = data.choices?.[0]?.message?.content; if (!content) { - throw new Error('No content in Gemini response'); + throw new Error('No content in LLM response'); } const jsonStr = extractJSON(content);