/** * Design Agent Route — Gemini tool-calling loop that drives Scribus via the bridge. * * POST /api/design-agent { brief, space, model? } * Returns SSE stream of agent steps. */ import { Hono } from "hono"; import { streamSSE } from "hono/streaming"; import { DESIGN_TOOL_DECLARATIONS, DESIGN_SYSTEM_PROMPT } from "../../lib/design-tool-declarations"; const SCRIBUS_BRIDGE_URL = process.env.SCRIBUS_BRIDGE_URL || "http://scribus-novnc:8765"; const BRIDGE_SECRET = process.env.SCRIBUS_BRIDGE_SECRET || ""; const MAX_TURNS = 10; export const designAgentRoutes = new Hono(); /** Human-readable description of a tool call. */ function describeToolCall(name: string, args: Record): string { switch (name) { case "new_document": return `Creating ${args.width || 210}\u00d7${args.height || 297}mm document (${args.pages || 1} page${(args.pages || 1) > 1 ? "s" : ""})`; case "add_text_frame": return `Adding text frame: "${(args.text || "").slice(0, 40)}${(args.text || "").length > 40 ? "\u2026" : ""}" at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`; case "add_image_frame": return `Adding image frame at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`; case "add_shape": return `Adding ${args.shapeType || "rect"} shape at ${args.x},${args.y}mm (${args.width}\u00d7${args.height}${args.fill ? ", fill:" + args.fill : ""})`; case "set_background_color": return `Setting background color to ${args.color}`; case "get_state": return "Checking current document state"; case "save_document": return `Saving document as ${args.filename || "design.sla"}`; case "generate_image": return `Generating AI image: "${(args.prompt || "").slice(0, 50)}${(args.prompt || "").length > 50 ? "\u2026" : ""}"`; default: return `Executing ${name}`; } } /** Forward a command to the Scribus bridge. */ async function bridgeCommand(action: string, args: Record = {}): Promise { const headers: Record = { "Content-Type": "application/json" }; if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET; try { const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/command`, { method: "POST", headers, body: JSON.stringify({ action, args }), signal: AbortSignal.timeout(30_000), }); return await res.json(); } catch (e: any) { return { error: `Bridge unreachable: ${e.message}` }; } } /** Get bridge state. */ async function bridgeState(): Promise { const headers: Record = {}; if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET; try { const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/state`, { headers, signal: AbortSignal.timeout(10_000) }); return await res.json(); } catch (e: any) { return { error: `Bridge unreachable: ${e.message}` }; } } /** Start Scribus if not running. */ async function ensureScribusRunning(): Promise { const headers: Record = { "Content-Type": "application/json" }; if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET; try { const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/start`, { method: "POST", headers, signal: AbortSignal.timeout(20_000), }); return await res.json(); } catch (e: any) { return { error: `Bridge unreachable: ${e.message}` }; } } /** Translate a Gemini tool call into a bridge command. */ async function executeToolCall(name: string, args: Record, space: string): Promise { switch (name) { case "new_document": return bridgeCommand("new_document", args); case "add_text_frame": return bridgeCommand("add_text_frame", args); case "add_image_frame": return bridgeCommand("add_image_frame", args); case "add_shape": return bridgeCommand("add_shape", args); case "set_background_color": return bridgeCommand("set_background_color", args); case "get_state": return bridgeState(); case "save_document": return bridgeCommand("save_as_sla", { ...args, space }); case "generate_image": { // Generate image via fal.ai, then place it const imageResult = await generateAndPlaceImage(args); return imageResult; } default: return { error: `Unknown tool: ${name}` }; } } /** Generate an image via the rSpace /api/image-gen endpoint and download it for Scribus. */ async function generateAndPlaceImage(args: Record): Promise { try { // Call internal image gen API const res = await fetch(`http://localhost:${process.env.PORT || 3000}/api/image-gen`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ prompt: args.prompt, provider: "fal", model: "flux-pro" }), signal: AbortSignal.timeout(60_000), }); const data = await res.json() as any; if (!data.url) return { error: "Image generation failed", details: data }; // Download the image to a local path inside the Scribus container const imageUrl = data.url; const downloadRes = await fetch(imageUrl, { signal: AbortSignal.timeout(30_000) }); if (!downloadRes.ok) return { error: "Failed to download generated image" }; const imageName = `gen_${Date.now()}.png`; const imagePath = `/data/designs/_generated/${imageName}`; // Write image to bridge container via a bridge command // For now, place the frame with the URL reference const placeResult = await bridgeCommand("add_image_frame", { x: args.x, y: args.y, width: args.width, height: args.height, imagePath, name: `gen_image_${Date.now()}`, }); return { ...placeResult, imageUrl, imagePath }; } catch (e: any) { return { error: `Image generation failed: ${e.message}` }; } } /** Call Gemini with tool declarations. */ async function callGemini(messages: any[], model: string): Promise { // Use the Gemini SDK from the AI services const { GoogleGenAI } = await import("@google/genai"); const apiKey = process.env.GEMINI_API_KEY; if (!apiKey) return { error: "GEMINI_API_KEY not configured" }; const genai = new GoogleGenAI({ apiKey }); const tools: any[] = [{ functionDeclarations: DESIGN_TOOL_DECLARATIONS.map(d => ({ name: d.name, description: d.description, parameters: d.parameters, })), }]; const response = await genai.models.generateContent({ model: model || "gemini-2.0-flash", contents: messages, config: { tools, systemInstruction: DESIGN_SYSTEM_PROMPT, }, } as any); return response; } designAgentRoutes.post("/api/design-agent", async (c) => { const body = await c.req.json().catch(() => null); if (!body?.brief) return c.json({ error: "Missing 'brief' in request body" }, 400); const { brief, space = "demo", model = "gemini-2.0-flash" } = body; return streamSSE(c, async (stream) => { let eventId = 0; const sendEvent = async (data: any) => { await stream.writeSSE({ data: JSON.stringify(data), event: "step", id: String(++eventId) }); }; // Keepalive to prevent Cloudflare QUIC/HTTP2 timeout (drops idle streams ~100s) const keepalive = setInterval(() => { stream.writeSSE({ data: "", event: "keepalive", id: String(++eventId) }).catch(() => {}); }, 15_000); try { // Step 1: Ensure Scribus is running await sendEvent({ step: 1, action: "starting_scribus", status: "Starting Scribus..." }); const startResult = await ensureScribusRunning(); if (startResult.error) { await sendEvent({ step: 1, action: "error", error: startResult.error }); return; } await sendEvent({ step: 1, action: "scribus_ready", result: startResult }); // Step 2: Agentic loop const messages: any[] = [ { role: "user", parts: [{ text: `Design brief: ${brief}` }] }, ]; for (let turn = 0; turn < MAX_TURNS; turn++) { const response = await callGemini(messages, model); const candidate = response?.candidates?.[0]; if (!candidate) { await sendEvent({ step: turn + 2, action: "error", error: "No response from Gemini" }); break; } const parts = candidate.content?.parts || []; const textParts = parts.filter((p: any) => p.text); const toolCalls = parts.filter((p: any) => p.functionCall); // Send thinking event with Gemini's reasoning text (if any) const thinkingText = textParts.map((p: any) => p.text).join("\n").trim(); await sendEvent({ step: turn + 2, action: "thinking", status: thinkingText || `Planning turn ${turn + 1}...`, text: thinkingText || null, }); // If Gemini returned text without tool calls, we're done if (textParts.length > 0 && toolCalls.length === 0) { await sendEvent({ step: turn + 2, action: "complete", message: thinkingText, }); break; } // Execute tool calls const toolResults: any[] = []; for (const part of toolCalls) { const { name, args } = part.functionCall; await sendEvent({ step: turn + 2, action: "executing", tool: name, args, description: describeToolCall(name, args || {}), status: describeToolCall(name, args || {}), }); const result = await executeToolCall(name, args || {}, space); await sendEvent({ step: turn + 2, action: "tool_result", tool: name, result, }); toolResults.push({ functionResponse: { name, response: result, }, }); } // Add assistant response + tool results to conversation messages.push({ role: "model", parts }); messages.push({ role: "user", parts: toolResults }); } // Final state check await sendEvent({ step: MAX_TURNS + 2, action: "verifying", status: "Getting final state..." }); const finalState = await bridgeState(); await sendEvent({ step: MAX_TURNS + 2, action: "done", state: finalState, status: "Design complete!", }); } catch (e: any) { await sendEvent({ step: 0, action: "error", error: e.message }); } finally { clearInterval(keepalive); } }); }); designAgentRoutes.get("/api/design-agent/health", (c) => { return c.json({ ok: true, bridge: SCRIBUS_BRIDGE_URL }); });