refactor: route AI calls through LiteLLM proxy instead of direct Gemini

Both ai-planner.ts and gemini.ts now use LiteLLM's OpenAI-compatible
API (http://litellm:4000) which proxies to Gemini Flash. Falls back
to direct Gemini API if LITELLM_API_KEY not set.

- docker-compose joins ai-internal network for LiteLLM access
- LITELLM_URL and LITELLM_API_KEY env vars added
- GEMINI_API_KEY kept as optional fallback

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-03-24 14:11:37 -07:00
parent 36a7829c7a
commit 7f590521b7
4 changed files with 182 additions and 137 deletions

View File

@ -1,8 +1,10 @@
# Database # Database
DATABASE_URL="postgresql://rtrips:changeme@localhost:5432/rtrips" DATABASE_URL="postgresql://rtrips:changeme@localhost:5432/rtrips"
# AI - Gemini 2.0 Flash for NL parsing # AI — LiteLLM proxy (preferred) or direct Gemini fallback
GEMINI_API_KEY="your-gemini-api-key" LITELLM_URL="http://litellm:4000"
LITELLM_API_KEY="your-litellm-master-key"
GEMINI_API_KEY=""
# rSpace integration # rSpace integration
NEXT_PUBLIC_RSPACE_URL="https://rspace.online" NEXT_PUBLIC_RSPACE_URL="https://rspace.online"

View File

@ -7,7 +7,9 @@ services:
restart: unless-stopped restart: unless-stopped
environment: environment:
- DATABASE_URL=postgresql://rtrips:${DB_PASSWORD}@rtrips-postgres:5432/rtrips - DATABASE_URL=postgresql://rtrips:${DB_PASSWORD}@rtrips-postgres:5432/rtrips
- GEMINI_API_KEY=${GEMINI_API_KEY} - LITELLM_URL=${LITELLM_URL:-http://litellm:4000}
- LITELLM_API_KEY=${LITELLM_API_KEY}
- GEMINI_API_KEY=${GEMINI_API_KEY:-}
- NEXT_PUBLIC_RSPACE_URL=${NEXT_PUBLIC_RSPACE_URL:-https://rspace.online} - NEXT_PUBLIC_RSPACE_URL=${NEXT_PUBLIC_RSPACE_URL:-https://rspace.online}
- RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000} - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000}
- NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://auth.ridentity.online} - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://auth.ridentity.online}
@ -35,6 +37,7 @@ services:
networks: networks:
- traefik-public - traefik-public
- rtrips-internal - rtrips-internal
- ai-internal
depends_on: depends_on:
rtrips-postgres: rtrips-postgres:
condition: service_healthy condition: service_healthy
@ -77,6 +80,8 @@ services:
networks: networks:
traefik-public: traefik-public:
external: true external: true
ai-internal:
external: true
rtrips-internal: rtrips-internal:
internal: true internal: true

View File

@ -1,7 +1,7 @@
// AI Trip Planner — Gemini 2.0 Flash function-calling agent loop // AI Trip Planner — LiteLLM-proxied Gemini Flash with function calling
// //
// Sends user's trip description + tool declarations to Gemini. // Uses LiteLLM's OpenAI-compatible API to call gemini-flash with tools.
// Gemini calls tools (geocode, flights, accommodation, routing) in a loop. // LiteLLM routes to the actual Gemini API and manages keys/rate limits.
// Returns an EnrichedTrip with real prices, coordinates, and routes. // Returns an EnrichedTrip with real prices, coordinates, and routes.
import { import {
@ -12,9 +12,24 @@ import {
} from './ai-tools'; } from './ai-tools';
import type { EnrichedTrip } from './types'; import type { EnrichedTrip } from './types';
const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent';
const MAX_ITERATIONS = 8; const MAX_ITERATIONS = 8;
function getLLMConfig() {
const url = process.env.LITELLM_URL || 'http://litellm:4000';
const key = process.env.LITELLM_API_KEY;
if (!key) {
// Fallback to direct Gemini if LiteLLM not configured
const geminiKey = process.env.GEMINI_API_KEY;
if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured');
return {
url: 'https://generativelanguage.googleapis.com/v1beta/openai',
key: geminiKey,
model: 'gemini-2.0-flash',
};
}
return { url, key, model: 'gemini-flash' };
}
// ─── System prompt ────────────────────────────────────────────── // ─── System prompt ──────────────────────────────────────────────
const SYSTEM_PROMPT = `You are a trip planning assistant with access to real search tools. Given a natural language trip description: const SYSTEM_PROMPT = `You are a trip planning assistant with access to real search tools. Given a natural language trip description:
@ -50,70 +65,78 @@ When you have gathered all data, return ONLY a JSON object matching this schema:
"routes": [{ "fromIndex": number, "toIndex": number, "distanceMeters": number, "durationSeconds": number }] "routes": [{ "fromIndex": number, "toIndex": number, "distanceMeters": number, "durationSeconds": number }]
}`; }`;
// ─── Tool declarations (Gemini function calling format) ───────── // ─── Tool declarations (OpenAI format) ──────────────────────────
const TOOL_DECLARATIONS = { const TOOLS = [
tools: [{ {
function_declarations: [ type: 'function' as const,
{ function: {
name: 'geocodeLocation', name: 'geocodeLocation',
description: 'Geocode a location name to lat/lng coordinates using Nominatim', description: 'Geocode a location name to lat/lng coordinates using Nominatim',
parameters: { parameters: {
type: 'object', type: 'object',
properties: { properties: {
name: { type: 'string', description: 'Location name (city, region, or address)' }, name: { type: 'string', description: 'Location name (city, region, or address)' },
},
required: ['name'],
}, },
required: ['name'],
}, },
{ },
name: 'searchFlights', },
description: 'Search for real flights between two cities using Kiwi Tequila API. Returns top 5 cheapest flights.', {
parameters: { type: 'function' as const,
type: 'object', function: {
properties: { name: 'searchFlights',
from: { type: 'string', description: 'Departure city or airport code' }, description: 'Search for real flights between two cities. Returns top 5 cheapest flights.',
to: { type: 'string', description: 'Arrival city or airport code' }, parameters: {
dateFrom: { type: 'string', description: 'Departure date (ISO format YYYY-MM-DD)' }, type: 'object',
dateTo: { type: 'string', description: 'Latest departure date (ISO format YYYY-MM-DD), can be same as dateFrom for one-way' }, properties: {
adults: { type: 'number', description: 'Number of adult passengers (default 1)' }, from: { type: 'string', description: 'Departure city or airport code' },
}, to: { type: 'string', description: 'Arrival city or airport code' },
required: ['from', 'to', 'dateFrom', 'dateTo'], dateFrom: { type: 'string', description: 'Departure date (YYYY-MM-DD)' },
dateTo: { type: 'string', description: 'Latest departure date (YYYY-MM-DD)' },
adults: { type: 'number', description: 'Number of adult passengers (default 1)' },
}, },
required: ['from', 'to', 'dateFrom', 'dateTo'],
}, },
{ },
name: 'searchAccommodation', },
description: 'Search for real Airbnb listings at a location. Returns top 5 listings with prices and ratings.', {
parameters: { type: 'function' as const,
type: 'object', function: {
properties: { name: 'searchAccommodation',
location: { type: 'string', description: 'Location to search (city name, optionally with country)' }, description: 'Search for real Airbnb listings at a location. Returns top 5 listings.',
checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' }, parameters: {
checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' }, type: 'object',
guests: { type: 'number', description: 'Number of guests (default 2)' }, properties: {
maxPrice: { type: 'number', description: 'Maximum price per night in USD (optional)' }, location: { type: 'string', description: 'Location to search' },
}, checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' },
required: ['location'], checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' },
guests: { type: 'number', description: 'Number of guests (default 2)' },
maxPrice: { type: 'number', description: 'Max price per night in USD (optional)' },
}, },
required: ['location'],
}, },
{ },
name: 'computeRoute', },
description: 'Compute driving route between two points. Only use for driveable distances (same continent, reasonable distance).', {
parameters: { type: 'function' as const,
type: 'object', function: {
properties: { name: 'computeRoute',
fromLng: { type: 'number', description: 'Departure longitude' }, description: 'Compute driving route between two points. Only for driveable distances.',
fromLat: { type: 'number', description: 'Departure latitude' }, parameters: {
toLng: { type: 'number', description: 'Arrival longitude' }, type: 'object',
toLat: { type: 'number', description: 'Arrival latitude' }, properties: {
profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' }, fromLng: { type: 'number', description: 'Departure longitude' },
}, fromLat: { type: 'number', description: 'Departure latitude' },
required: ['fromLng', 'fromLat', 'toLng', 'toLat'], toLng: { type: 'number', description: 'Arrival longitude' },
toLat: { type: 'number', description: 'Arrival latitude' },
profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' },
}, },
required: ['fromLng', 'fromLat', 'toLng', 'toLat'],
}, },
], },
}], },
}; ];
// ─── Tool executor ────────────────────────────────────────────── // ─── Tool executor ──────────────────────────────────────────────
@ -158,108 +181,109 @@ async function executeTool(name: string, args: ToolCallArgs): Promise<unknown> {
export type PlannerStatus = (message: string) => void; export type PlannerStatus = (message: string) => void;
// ─── OpenAI-compatible message types ────────────────────────────
interface ChatMessage {
role: 'system' | 'user' | 'assistant' | 'tool';
content?: string | null;
tool_calls?: Array<{
id: string;
type: 'function';
function: { name: string; arguments: string };
}>;
tool_call_id?: string;
name?: string;
}
// ─── Main agent loop ──────────────────────────────────────────── // ─── Main agent loop ────────────────────────────────────────────
export async function planTrip( export async function planTrip(
text: string, text: string,
onStatus?: PlannerStatus onStatus?: PlannerStatus
): Promise<EnrichedTrip> { ): Promise<EnrichedTrip> {
const apiKey = process.env.GEMINI_API_KEY; const config = getLLMConfig();
if (!apiKey) throw new Error('GEMINI_API_KEY not configured');
// Build initial conversation const messages: ChatMessage[] = [
const contents: GeminiContent[] = [ { role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', parts: [{ text }] }, { role: 'user', content: text },
]; ];
for (let i = 0; i < MAX_ITERATIONS; i++) { for (let i = 0; i < MAX_ITERATIONS; i++) {
onStatus?.(`AI planning iteration ${i + 1}...`); onStatus?.(`AI planning iteration ${i + 1}...`);
const response = await callGemini(apiKey, contents); const response = await callLLM(config, messages);
const candidate = response.candidates?.[0]; const choice = response.choices?.[0];
if (!candidate?.content?.parts) { if (!choice?.message) {
throw new Error('No response from Gemini'); throw new Error('No response from LLM');
} }
const parts = candidate.content.parts; const assistantMsg = choice.message;
// Add assistant response to conversation // Add assistant message to conversation
contents.push({ role: 'model', parts }); messages.push(assistantMsg);
// Check for function calls // Check for tool calls
const functionCalls = parts.filter( const toolCalls = assistantMsg.tool_calls;
(p: GeminiPart) => p.functionCall if (!toolCalls || toolCalls.length === 0) {
); // No more tool calls — extract final JSON
return parseEnrichedTrip(assistantMsg.content || '');
if (functionCalls.length === 0) {
// No more tool calls — extract final JSON from text parts
const textParts = parts
.filter((p: GeminiPart) => p.text)
.map((p: GeminiPart) => p.text)
.join('');
return parseEnrichedTrip(textParts);
} }
// Execute tool calls (in parallel where possible) // Execute tool calls in parallel
onStatus?.(describeToolCalls(functionCalls)); onStatus?.(describeToolCalls(toolCalls));
const toolResults = await Promise.all( const toolResults = await Promise.all(
functionCalls.map(async (part: GeminiPart) => { toolCalls.map(async (tc) => {
const { name, args } = part.functionCall!; const args = JSON.parse(tc.function.arguments);
const result = await executeTool(name, args); const result = await executeTool(tc.function.name, args);
return { return {
functionResponse: { role: 'tool' as const,
name, tool_call_id: tc.id,
response: { result: result ?? null }, content: JSON.stringify(result ?? null),
},
}; };
}) })
); );
// Add tool results to conversation // Add all tool results to conversation
contents.push({ role: 'user', parts: toolResults }); messages.push(...toolResults);
} }
throw new Error('AI planner exceeded maximum iterations'); throw new Error('AI planner exceeded maximum iterations');
} }
// ─── Gemini API call ──────────────────────────────────────────── // ─── LLM API call (OpenAI-compatible via LiteLLM) ──────────────
interface GeminiPart { interface LLMConfig {
text?: string; url: string;
functionCall?: { name: string; args: ToolCallArgs }; key: string;
functionResponse?: { name: string; response: unknown }; model: string;
} }
interface GeminiContent { interface LLMResponse {
role: 'user' | 'model'; choices?: Array<{
parts: GeminiPart[]; message: ChatMessage;
} finish_reason?: string;
interface GeminiResponse {
candidates?: Array<{
content?: { parts: GeminiPart[] };
finishReason?: string;
}>; }>;
} }
async function callGemini(apiKey: string, contents: GeminiContent[]): Promise<GeminiResponse> { async function callLLM(config: LLMConfig, messages: ChatMessage[]): Promise<LLMResponse> {
const res = await fetch(`${GEMINI_URL}?key=${apiKey}`, { const res = await fetch(`${config.url}/v1/chat/completions`, {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${config.key}`,
},
body: JSON.stringify({ body: JSON.stringify({
system_instruction: { parts: [{ text: SYSTEM_PROMPT }] }, model: config.model,
contents, messages,
...TOOL_DECLARATIONS, tools: TOOLS,
generationConfig: { tool_choice: 'auto',
temperature: 0.1, temperature: 0.1,
},
}), }),
}); });
if (!res.ok) { if (!res.ok) {
const err = await res.text(); const err = await res.text();
throw new Error(`Gemini API error (${res.status}): ${err}`); throw new Error(`LLM API error (${res.status}): ${err}`);
} }
return res.json(); return res.json();
@ -271,7 +295,6 @@ function parseEnrichedTrip(text: string): EnrichedTrip {
const jsonStr = extractJSON(text); const jsonStr = extractJSON(text);
const parsed = JSON.parse(jsonStr); const parsed = JSON.parse(jsonStr);
// Ensure all required fields with defaults
return { return {
title: parsed.title || 'Untitled Trip', title: parsed.title || 'Untitled Trip',
destinations: (parsed.destinations || []).map((d: Record<string, unknown>) => ({ destinations: (parsed.destinations || []).map((d: Record<string, unknown>) => ({
@ -305,10 +328,11 @@ function extractJSON(text: string): string {
// ─── Status description helpers ───────────────────────────────── // ─── Status description helpers ─────────────────────────────────
function describeToolCalls(calls: GeminiPart[]): string { function describeToolCalls(calls: Array<{ function: { name: string; arguments: string } }>): string {
const descriptions = calls.map((c) => { const descriptions = calls.map((c) => {
const name = c.functionCall!.name; const name = c.function.name;
const args = c.functionCall!.args; let args: ToolCallArgs = {};
try { args = JSON.parse(c.function.arguments); } catch { /* ignore */ }
switch (name) { switch (name) {
case 'geocodeLocation': case 'geocodeLocation':
return `Geocoding "${args.name}"`; return `Geocoding "${args.name}"`;

View File

@ -1,6 +1,23 @@
import { ParsedTrip } from './types'; import { ParsedTrip } from './types';
const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions'; // Use LiteLLM proxy if available, fall back to direct Gemini
function getLLMConfig() {
const litellmKey = process.env.LITELLM_API_KEY;
if (litellmKey) {
return {
url: `${process.env.LITELLM_URL || 'http://litellm:4000'}/v1/chat/completions`,
key: litellmKey,
model: 'gemini-flash',
};
}
const geminiKey = process.env.GEMINI_API_KEY;
if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured');
return {
url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
key: geminiKey,
model: 'gemini-2.0-flash',
};
}
const TRIP_PARSER_SYSTEM_PROMPT = `You are a trip planning assistant. Parse the user's trip description into structured JSON. const TRIP_PARSER_SYSTEM_PROMPT = `You are a trip planning assistant. Parse the user's trip description into structured JSON.
@ -46,7 +63,7 @@ Rules:
- Always return valid JSON matching the schema above.`; - Always return valid JSON matching the schema above.`;
function extractJSON(text: string): string { function extractJSON(text: string): string {
// Handle Gemini's tendency to wrap JSON in ```json fences // Handle tendency to wrap JSON in ```json fences
const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/); const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
if (fenceMatch) return fenceMatch[1].trim(); if (fenceMatch) return fenceMatch[1].trim();
// Try to find raw JSON object // Try to find raw JSON object
@ -56,19 +73,16 @@ function extractJSON(text: string): string {
} }
export async function parseTrip(naturalLanguage: string): Promise<ParsedTrip> { export async function parseTrip(naturalLanguage: string): Promise<ParsedTrip> {
const apiKey = process.env.GEMINI_API_KEY; const config = getLLMConfig();
if (!apiKey) {
throw new Error('GEMINI_API_KEY not configured');
}
const response = await fetch(GEMINI_URL, { const response = await fetch(config.url, {
method: 'POST', method: 'POST',
headers: { headers: {
'Content-Type': 'application/json', 'Content-Type': 'application/json',
'Authorization': `Bearer ${apiKey}`, 'Authorization': `Bearer ${config.key}`,
}, },
body: JSON.stringify({ body: JSON.stringify({
model: 'gemini-2.0-flash', model: config.model,
messages: [ messages: [
{ role: 'system', content: TRIP_PARSER_SYSTEM_PROMPT }, { role: 'system', content: TRIP_PARSER_SYSTEM_PROMPT },
{ role: 'user', content: naturalLanguage }, { role: 'user', content: naturalLanguage },
@ -79,13 +93,13 @@ export async function parseTrip(naturalLanguage: string): Promise<ParsedTrip> {
if (!response.ok) { if (!response.ok) {
const err = await response.text(); const err = await response.text();
throw new Error(`Gemini API error: ${response.status} ${err}`); throw new Error(`LLM API error: ${response.status} ${err}`);
} }
const data = await response.json(); const data = await response.json();
const content = data.choices?.[0]?.message?.content; const content = data.choices?.[0]?.message?.content;
if (!content) { if (!content) {
throw new Error('No content in Gemini response'); throw new Error('No content in LLM response');
} }
const jsonStr = extractJSON(content); const jsonStr = extractJSON(content);