refactor: route AI calls through LiteLLM proxy instead of direct Gemini

Both ai-planner.ts and gemini.ts now use LiteLLM's OpenAI-compatible API (http://litellm:4000) which proxies to Gemini Flash. Falls back to direct Gemini API if LITELLM_API_KEY not set. - docker-compose joins ai-internal network for LiteLLM access - LITELLM_URL and LITELLM_API_KEY env vars added - GEMINI_API_KEY kept as optional fallback Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-24 14:11:37 -07:00 · 2026-03-24 14:11:37 -07:00 · 7f590521b7
parent 36a7829c7a
commit 7f590521b7
4 changed files with 182 additions and 137 deletions
--- a/.env.example
+++ b/.env.example
@ -1,8 +1,10 @@
 # Database
 DATABASE_URL="postgresql://rtrips:changeme@localhost:5432/rtrips"

-# AI - Gemini 2.0 Flash for NL parsing
-GEMINI_API_KEY="your-gemini-api-key"
+# AI — LiteLLM proxy (preferred) or direct Gemini fallback
+LITELLM_URL="http://litellm:4000"
+LITELLM_API_KEY="your-litellm-master-key"
+GEMINI_API_KEY=""

 # rSpace integration
 NEXT_PUBLIC_RSPACE_URL="https://rspace.online"
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -7,7 +7,9 @@ services:
    restart: unless-stopped
    environment:
      - DATABASE_URL=postgresql://rtrips:${DB_PASSWORD}@rtrips-postgres:5432/rtrips
-      - GEMINI_API_KEY=${GEMINI_API_KEY}
+      - LITELLM_URL=${LITELLM_URL:-http://litellm:4000}
+      - LITELLM_API_KEY=${LITELLM_API_KEY}
+      - GEMINI_API_KEY=${GEMINI_API_KEY:-}
      - NEXT_PUBLIC_RSPACE_URL=${NEXT_PUBLIC_RSPACE_URL:-https://rspace.online}
      - RSPACE_INTERNAL_URL=${RSPACE_INTERNAL_URL:-http://rspace-online:3000}
      - NEXT_PUBLIC_ENCRYPTID_SERVER_URL=${NEXT_PUBLIC_ENCRYPTID_SERVER_URL:-https://auth.ridentity.online}
@ -35,6 +37,7 @@ services:
    networks:
      - traefik-public
      - rtrips-internal
+      - ai-internal
    depends_on:
      rtrips-postgres:
        condition: service_healthy
@ -77,6 +80,8 @@ services:
 networks:
  traefik-public:
    external: true
+  ai-internal:
+    external: true
  rtrips-internal:
    internal: true

--- a/src/lib/ai-planner.ts
+++ b/src/lib/ai-planner.ts
@ -1,7 +1,7 @@
-// AI Trip Planner — Gemini 2.0 Flash function-calling agent loop
+// AI Trip Planner — LiteLLM-proxied Gemini Flash with function calling
 //
-// Sends user's trip description + tool declarations to Gemini.
-// Gemini calls tools (geocode, flights, accommodation, routing) in a loop.
+// Uses LiteLLM's OpenAI-compatible API to call gemini-flash with tools.
+// LiteLLM routes to the actual Gemini API and manages keys/rate limits.
 // Returns an EnrichedTrip with real prices, coordinates, and routes.

 import {
@ -12,9 +12,24 @@ import {
 } from './ai-tools';
 import type { EnrichedTrip } from './types';

-const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent';
 const MAX_ITERATIONS = 8;

+function getLLMConfig() {
+  const url = process.env.LITELLM_URL || 'http://litellm:4000';
+  const key = process.env.LITELLM_API_KEY;
+  if (!key) {
+    // Fallback to direct Gemini if LiteLLM not configured
+    const geminiKey = process.env.GEMINI_API_KEY;
+    if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured');
+    return {
+      url: 'https://generativelanguage.googleapis.com/v1beta/openai',
+      key: geminiKey,
+      model: 'gemini-2.0-flash',
+    };
+  }
+  return { url, key, model: 'gemini-flash' };
+}
+
 // ─── System prompt ──────────────────────────────────────────────

 const SYSTEM_PROMPT = `You are a trip planning assistant with access to real search tools. Given a natural language trip description:
@ -50,70 +65,78 @@ When you have gathered all data, return ONLY a JSON object matching this schema:
  "routes": [{ "fromIndex": number, "toIndex": number, "distanceMeters": number, "durationSeconds": number }]
 }`;

-// ─── Tool declarations (Gemini function calling format) ─────────
+// ─── Tool declarations (OpenAI format) ──────────────────────────

-const TOOL_DECLARATIONS = {
-  tools: [{
-    function_declarations: [
-      {
-        name: 'geocodeLocation',
-        description: 'Geocode a location name to lat/lng coordinates using Nominatim',
-        parameters: {
-          type: 'object',
-          properties: {
-            name: { type: 'string', description: 'Location name (city, region, or address)' },
-          },
-          required: ['name'],
+const TOOLS = [
+  {
+    type: 'function' as const,
+    function: {
+      name: 'geocodeLocation',
+      description: 'Geocode a location name to lat/lng coordinates using Nominatim',
+      parameters: {
+        type: 'object',
+        properties: {
+          name: { type: 'string', description: 'Location name (city, region, or address)' },
        },
+        required: ['name'],
      },
-      {
-        name: 'searchFlights',
-        description: 'Search for real flights between two cities using Kiwi Tequila API. Returns top 5 cheapest flights.',
-        parameters: {
-          type: 'object',
-          properties: {
-            from: { type: 'string', description: 'Departure city or airport code' },
-            to: { type: 'string', description: 'Arrival city or airport code' },
-            dateFrom: { type: 'string', description: 'Departure date (ISO format YYYY-MM-DD)' },
-            dateTo: { type: 'string', description: 'Latest departure date (ISO format YYYY-MM-DD), can be same as dateFrom for one-way' },
-            adults: { type: 'number', description: 'Number of adult passengers (default 1)' },
-          },
-          required: ['from', 'to', 'dateFrom', 'dateTo'],
+    },
+  },
+  {
+    type: 'function' as const,
+    function: {
+      name: 'searchFlights',
+      description: 'Search for real flights between two cities. Returns top 5 cheapest flights.',
+      parameters: {
+        type: 'object',
+        properties: {
+          from: { type: 'string', description: 'Departure city or airport code' },
+          to: { type: 'string', description: 'Arrival city or airport code' },
+          dateFrom: { type: 'string', description: 'Departure date (YYYY-MM-DD)' },
+          dateTo: { type: 'string', description: 'Latest departure date (YYYY-MM-DD)' },
+          adults: { type: 'number', description: 'Number of adult passengers (default 1)' },
        },
+        required: ['from', 'to', 'dateFrom', 'dateTo'],
      },
-      {
-        name: 'searchAccommodation',
-        description: 'Search for real Airbnb listings at a location. Returns top 5 listings with prices and ratings.',
-        parameters: {
-          type: 'object',
-          properties: {
-            location: { type: 'string', description: 'Location to search (city name, optionally with country)' },
-            checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' },
-            checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' },
-            guests: { type: 'number', description: 'Number of guests (default 2)' },
-            maxPrice: { type: 'number', description: 'Maximum price per night in USD (optional)' },
-          },
-          required: ['location'],
+    },
+  },
+  {
+    type: 'function' as const,
+    function: {
+      name: 'searchAccommodation',
+      description: 'Search for real Airbnb listings at a location. Returns top 5 listings.',
+      parameters: {
+        type: 'object',
+        properties: {
+          location: { type: 'string', description: 'Location to search' },
+          checkin: { type: 'string', description: 'Check-in date (YYYY-MM-DD)' },
+          checkout: { type: 'string', description: 'Check-out date (YYYY-MM-DD)' },
+          guests: { type: 'number', description: 'Number of guests (default 2)' },
+          maxPrice: { type: 'number', description: 'Max price per night in USD (optional)' },
        },
+        required: ['location'],
      },
-      {
-        name: 'computeRoute',
-        description: 'Compute driving route between two points. Only use for driveable distances (same continent, reasonable distance).',
-        parameters: {
-          type: 'object',
-          properties: {
-            fromLng: { type: 'number', description: 'Departure longitude' },
-            fromLat: { type: 'number', description: 'Departure latitude' },
-            toLng: { type: 'number', description: 'Arrival longitude' },
-            toLat: { type: 'number', description: 'Arrival latitude' },
-            profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' },
-          },
-          required: ['fromLng', 'fromLat', 'toLng', 'toLat'],
+    },
+  },
+  {
+    type: 'function' as const,
+    function: {
+      name: 'computeRoute',
+      description: 'Compute driving route between two points. Only for driveable distances.',
+      parameters: {
+        type: 'object',
+        properties: {
+          fromLng: { type: 'number', description: 'Departure longitude' },
+          fromLat: { type: 'number', description: 'Departure latitude' },
+          toLng: { type: 'number', description: 'Arrival longitude' },
+          toLat: { type: 'number', description: 'Arrival latitude' },
+          profile: { type: 'string', description: 'Routing profile: driving-car, cycling-regular, or foot-walking' },
        },
+        required: ['fromLng', 'fromLat', 'toLng', 'toLat'],
      },
-    ],
-  }],
-};
+    },
+  },
+];

 // ─── Tool executor ──────────────────────────────────────────────

@ -158,108 +181,109 @@ async function executeTool(name: string, args: ToolCallArgs): Promise<unknown> {

 export type PlannerStatus = (message: string) => void;

+// ─── OpenAI-compatible message types ────────────────────────────
+
+interface ChatMessage {
+  role: 'system' | 'user' | 'assistant' | 'tool';
+  content?: string | null;
+  tool_calls?: Array<{
+    id: string;
+    type: 'function';
+    function: { name: string; arguments: string };
+  }>;
+  tool_call_id?: string;
+  name?: string;
+}
+
 // ─── Main agent loop ────────────────────────────────────────────

 export async function planTrip(
  text: string,
  onStatus?: PlannerStatus
 ): Promise<EnrichedTrip> {
-  const apiKey = process.env.GEMINI_API_KEY;
-  if (!apiKey) throw new Error('GEMINI_API_KEY not configured');
+  const config = getLLMConfig();

-  // Build initial conversation
-  const contents: GeminiContent[] = [
-    { role: 'user', parts: [{ text }] },
+  const messages: ChatMessage[] = [
+    { role: 'system', content: SYSTEM_PROMPT },
+    { role: 'user', content: text },
  ];

  for (let i = 0; i < MAX_ITERATIONS; i++) {
    onStatus?.(`AI planning iteration ${i + 1}...`);

-    const response = await callGemini(apiKey, contents);
-    const candidate = response.candidates?.[0];
-    if (!candidate?.content?.parts) {
-      throw new Error('No response from Gemini');
+    const response = await callLLM(config, messages);
+    const choice = response.choices?.[0];
+    if (!choice?.message) {
+      throw new Error('No response from LLM');
    }

-    const parts = candidate.content.parts;
-    // Add assistant response to conversation
-    contents.push({ role: 'model', parts });
+    const assistantMsg = choice.message;
+    // Add assistant message to conversation
+    messages.push(assistantMsg);

-    // Check for function calls
-    const functionCalls = parts.filter(
-      (p: GeminiPart) => p.functionCall
-    );
-
-    if (functionCalls.length === 0) {
-      // No more tool calls — extract final JSON from text parts
-      const textParts = parts
-        .filter((p: GeminiPart) => p.text)
-        .map((p: GeminiPart) => p.text)
-        .join('');
-
-      return parseEnrichedTrip(textParts);
+    // Check for tool calls
+    const toolCalls = assistantMsg.tool_calls;
+    if (!toolCalls || toolCalls.length === 0) {
+      // No more tool calls — extract final JSON
+      return parseEnrichedTrip(assistantMsg.content || '');
    }

-    // Execute tool calls (in parallel where possible)
-    onStatus?.(describeToolCalls(functionCalls));
+    // Execute tool calls in parallel
+    onStatus?.(describeToolCalls(toolCalls));

    const toolResults = await Promise.all(
-      functionCalls.map(async (part: GeminiPart) => {
-        const { name, args } = part.functionCall!;
-        const result = await executeTool(name, args);
+      toolCalls.map(async (tc) => {
+        const args = JSON.parse(tc.function.arguments);
+        const result = await executeTool(tc.function.name, args);
        return {
-          functionResponse: {
-            name,
-            response: { result: result ?? null },
-          },
+          role: 'tool' as const,
+          tool_call_id: tc.id,
+          content: JSON.stringify(result ?? null),
        };
      })
    );

-    // Add tool results to conversation
-    contents.push({ role: 'user', parts: toolResults });
+    // Add all tool results to conversation
+    messages.push(...toolResults);
  }

  throw new Error('AI planner exceeded maximum iterations');
 }

-// ─── Gemini API call ────────────────────────────────────────────
+// ─── LLM API call (OpenAI-compatible via LiteLLM) ──────────────

-interface GeminiPart {
-  text?: string;
-  functionCall?: { name: string; args: ToolCallArgs };
-  functionResponse?: { name: string; response: unknown };
+interface LLMConfig {
+  url: string;
+  key: string;
+  model: string;
 }

-interface GeminiContent {
-  role: 'user' | 'model';
-  parts: GeminiPart[];
-}
-
-interface GeminiResponse {
-  candidates?: Array<{
-    content?: { parts: GeminiPart[] };
-    finishReason?: string;
+interface LLMResponse {
+  choices?: Array<{
+    message: ChatMessage;
+    finish_reason?: string;
  }>;
 }

-async function callGemini(apiKey: string, contents: GeminiContent[]): Promise<GeminiResponse> {
-  const res = await fetch(`${GEMINI_URL}?key=${apiKey}`, {
+async function callLLM(config: LLMConfig, messages: ChatMessage[]): Promise<LLMResponse> {
+  const res = await fetch(`${config.url}/v1/chat/completions`, {
    method: 'POST',
-    headers: { 'Content-Type': 'application/json' },
+    headers: {
+      'Content-Type': 'application/json',
+      'Authorization': `Bearer ${config.key}`,
+    },
    body: JSON.stringify({
-      system_instruction: { parts: [{ text: SYSTEM_PROMPT }] },
-      contents,
-      ...TOOL_DECLARATIONS,
-      generationConfig: {
-        temperature: 0.1,
-      },
+      model: config.model,
+      messages,
+      tools: TOOLS,
+      tool_choice: 'auto',
+      temperature: 0.1,
    }),
  });

  if (!res.ok) {
    const err = await res.text();
-    throw new Error(`Gemini API error (${res.status}): ${err}`);
+    throw new Error(`LLM API error (${res.status}): ${err}`);
  }

  return res.json();
@ -271,7 +295,6 @@ function parseEnrichedTrip(text: string): EnrichedTrip {
  const jsonStr = extractJSON(text);
  const parsed = JSON.parse(jsonStr);

-  // Ensure all required fields with defaults
  return {
    title: parsed.title || 'Untitled Trip',
    destinations: (parsed.destinations || []).map((d: Record<string, unknown>) => ({
@ -305,10 +328,11 @@ function extractJSON(text: string): string {

 // ─── Status description helpers ─────────────────────────────────

-function describeToolCalls(calls: GeminiPart[]): string {
+function describeToolCalls(calls: Array<{ function: { name: string; arguments: string } }>): string {
  const descriptions = calls.map((c) => {
-    const name = c.functionCall!.name;
-    const args = c.functionCall!.args;
+    const name = c.function.name;
+    let args: ToolCallArgs = {};
+    try { args = JSON.parse(c.function.arguments); } catch { /* ignore */ }
    switch (name) {
      case 'geocodeLocation':
        return `Geocoding "${args.name}"`;
--- a/src/lib/gemini.ts
+++ b/src/lib/gemini.ts
@ -1,6 +1,23 @@
 import { ParsedTrip } from './types';

-const GEMINI_URL = 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions';
+// Use LiteLLM proxy if available, fall back to direct Gemini
+function getLLMConfig() {
+  const litellmKey = process.env.LITELLM_API_KEY;
+  if (litellmKey) {
+    return {
+      url: `${process.env.LITELLM_URL || 'http://litellm:4000'}/v1/chat/completions`,
+      key: litellmKey,
+      model: 'gemini-flash',
+    };
+  }
+  const geminiKey = process.env.GEMINI_API_KEY;
+  if (!geminiKey) throw new Error('Neither LITELLM_API_KEY nor GEMINI_API_KEY configured');
+  return {
+    url: 'https://generativelanguage.googleapis.com/v1beta/openai/chat/completions',
+    key: geminiKey,
+    model: 'gemini-2.0-flash',
+  };
+}

 const TRIP_PARSER_SYSTEM_PROMPT = `You are a trip planning assistant. Parse the user's trip description into structured JSON.

@ -46,7 +63,7 @@ Rules:
 - Always return valid JSON matching the schema above.`;

 function extractJSON(text: string): string {
-  // Handle Gemini's tendency to wrap JSON in ```json fences
+  // Handle tendency to wrap JSON in ```json fences
  const fenceMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
  if (fenceMatch) return fenceMatch[1].trim();
  // Try to find raw JSON object
@ -56,19 +73,16 @@ function extractJSON(text: string): string {
 }

 export async function parseTrip(naturalLanguage: string): Promise<ParsedTrip> {
-  const apiKey = process.env.GEMINI_API_KEY;
-  if (!apiKey) {
-    throw new Error('GEMINI_API_KEY not configured');
-  }
+  const config = getLLMConfig();

-  const response = await fetch(GEMINI_URL, {
+  const response = await fetch(config.url, {
    method: 'POST',
    headers: {
      'Content-Type': 'application/json',
-      'Authorization': `Bearer ${apiKey}`,
+      'Authorization': `Bearer ${config.key}`,
    },
    body: JSON.stringify({
-      model: 'gemini-2.0-flash',
+      model: config.model,
      messages: [
        { role: 'system', content: TRIP_PARSER_SYSTEM_PROMPT },
        { role: 'user', content: naturalLanguage },
@ -79,13 +93,13 @@ export async function parseTrip(naturalLanguage: string): Promise<ParsedTrip> {

  if (!response.ok) {
    const err = await response.text();
-    throw new Error(`Gemini API error: ${response.status} ${err}`);
+    throw new Error(`LLM API error: ${response.status} ${err}`);
  }

  const data = await response.json();
  const content = data.choices?.[0]?.message?.content;
  if (!content) {
-    throw new Error('No content in Gemini response');
+    throw new Error('No content in LLM response');
  }

  const jsonStr = extractJSON(content);