310 lines
10 KiB
TypeScript
310 lines
10 KiB
TypeScript
/**
|
|
* Design Agent Route — Gemini tool-calling loop that drives Scribus via the bridge.
|
|
*
|
|
* POST /api/design-agent { brief, space, model? }
|
|
* Returns SSE stream of agent steps.
|
|
*/
|
|
|
|
import { Hono } from "hono";
|
|
import { streamSSE } from "hono/streaming";
|
|
import { DESIGN_TOOL_DECLARATIONS, DESIGN_SYSTEM_PROMPT } from "../../lib/design-tool-declarations";
|
|
|
|
const SCRIBUS_BRIDGE_URL = process.env.SCRIBUS_BRIDGE_URL || "http://scribus-novnc:8765";
|
|
const BRIDGE_SECRET = process.env.SCRIBUS_BRIDGE_SECRET || "";
|
|
const MAX_TURNS = 10;
|
|
|
|
export const designAgentRoutes = new Hono();
|
|
|
|
/** Human-readable description of a tool call. */
|
|
function describeToolCall(name: string, args: Record<string, any>): string {
|
|
switch (name) {
|
|
case "new_document": return `Creating ${args.width || 210}\u00d7${args.height || 297}mm document (${args.pages || 1} page${(args.pages || 1) > 1 ? "s" : ""})`;
|
|
case "add_text_frame": return `Adding text frame: "${(args.text || "").slice(0, 40)}${(args.text || "").length > 40 ? "\u2026" : ""}" at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
|
|
case "add_image_frame": return `Adding image frame at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
|
|
case "add_shape": return `Adding ${args.shapeType || "rect"} shape at ${args.x},${args.y}mm (${args.width}\u00d7${args.height}${args.fill ? ", fill:" + args.fill : ""})`;
|
|
case "set_background_color": return `Setting background color to ${args.color}`;
|
|
case "get_state": return "Checking current document state";
|
|
case "save_document": return `Saving document as ${args.filename || "design.sla"}`;
|
|
case "generate_image": return `Generating AI image: "${(args.prompt || "").slice(0, 50)}${(args.prompt || "").length > 50 ? "\u2026" : ""}"`;
|
|
default: return `Executing ${name}`;
|
|
}
|
|
}
|
|
|
|
/** Forward a command to the Scribus bridge. */
|
|
async function bridgeCommand(action: string, args: Record<string, any> = {}): Promise<any> {
|
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
|
|
|
|
try {
|
|
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/command`, {
|
|
method: "POST",
|
|
headers,
|
|
body: JSON.stringify({ action, args }),
|
|
signal: AbortSignal.timeout(30_000),
|
|
});
|
|
return await res.json();
|
|
} catch (e: any) {
|
|
return { error: `Bridge unreachable: ${e.message}` };
|
|
}
|
|
}
|
|
|
|
/** Get bridge state. */
|
|
async function bridgeState(): Promise<any> {
|
|
const headers: Record<string, string> = {};
|
|
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
|
|
|
|
try {
|
|
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/state`, { headers, signal: AbortSignal.timeout(10_000) });
|
|
return await res.json();
|
|
} catch (e: any) {
|
|
return { error: `Bridge unreachable: ${e.message}` };
|
|
}
|
|
}
|
|
|
|
/** Start Scribus if not running, verify runner is connected. */
|
|
async function ensureScribusRunning(): Promise<any> {
|
|
const headers: Record<string, string> = { "Content-Type": "application/json" };
|
|
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
|
|
|
|
try {
|
|
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/start`, {
|
|
method: "POST",
|
|
headers,
|
|
signal: AbortSignal.timeout(20_000),
|
|
});
|
|
const result = await res.json();
|
|
if (result.error) return result;
|
|
|
|
// Verify runner is actually connected by checking state
|
|
const stateRes = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/state`, {
|
|
headers,
|
|
signal: AbortSignal.timeout(10_000),
|
|
});
|
|
const state = await stateRes.json();
|
|
if (!state.error) return result;
|
|
|
|
// Runner not connected — force restart by calling start again
|
|
// (server.py now kills zombie Scribus when socket is missing)
|
|
const retryRes = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/start`, {
|
|
method: "POST",
|
|
headers,
|
|
signal: AbortSignal.timeout(20_000),
|
|
});
|
|
return await retryRes.json();
|
|
} catch (e: any) {
|
|
return { error: `Bridge unreachable: ${e.message}` };
|
|
}
|
|
}
|
|
|
|
/** Translate a Gemini tool call into a bridge command. */
|
|
async function executeToolCall(name: string, args: Record<string, any>, space: string): Promise<any> {
|
|
switch (name) {
|
|
case "new_document":
|
|
return bridgeCommand("new_document", args);
|
|
case "add_text_frame":
|
|
return bridgeCommand("add_text_frame", args);
|
|
case "add_image_frame":
|
|
return bridgeCommand("add_image_frame", args);
|
|
case "add_shape":
|
|
return bridgeCommand("add_shape", args);
|
|
case "set_background_color":
|
|
return bridgeCommand("set_background_color", args);
|
|
case "get_state":
|
|
return bridgeState();
|
|
case "save_document":
|
|
return bridgeCommand("save_as_sla", { ...args, space });
|
|
case "generate_image": {
|
|
// Generate image via fal.ai, then place it
|
|
const imageResult = await generateAndPlaceImage(args);
|
|
return imageResult;
|
|
}
|
|
default:
|
|
return { error: `Unknown tool: ${name}` };
|
|
}
|
|
}
|
|
|
|
/** Generate an image via the rSpace /api/image-gen endpoint and download it for Scribus. */
|
|
async function generateAndPlaceImage(args: Record<string, any>): Promise<any> {
|
|
try {
|
|
// Call internal image gen API
|
|
const res = await fetch(`http://localhost:${process.env.PORT || 3000}/api/image-gen`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ prompt: args.prompt, provider: "fal", model: "flux-pro" }),
|
|
signal: AbortSignal.timeout(60_000),
|
|
});
|
|
const data = await res.json() as any;
|
|
if (!data.url) return { error: "Image generation failed", details: data };
|
|
|
|
// Download the image to a local path inside the Scribus container
|
|
const imageUrl = data.url;
|
|
const downloadRes = await fetch(imageUrl, { signal: AbortSignal.timeout(30_000) });
|
|
if (!downloadRes.ok) return { error: "Failed to download generated image" };
|
|
|
|
const imageName = `gen_${Date.now()}.png`;
|
|
const imagePath = `/data/designs/_generated/${imageName}`;
|
|
|
|
// Write image to bridge container via a bridge command
|
|
// For now, place the frame with the URL reference
|
|
const placeResult = await bridgeCommand("add_image_frame", {
|
|
x: args.x,
|
|
y: args.y,
|
|
width: args.width,
|
|
height: args.height,
|
|
imagePath,
|
|
name: `gen_image_${Date.now()}`,
|
|
});
|
|
|
|
return { ...placeResult, imageUrl, imagePath };
|
|
} catch (e: any) {
|
|
return { error: `Image generation failed: ${e.message}` };
|
|
}
|
|
}
|
|
|
|
/** Call Gemini with tool declarations. */
|
|
async function callGemini(messages: any[], model: string): Promise<any> {
|
|
// Use the Gemini SDK from the AI services
|
|
const { GoogleGenAI } = await import("@google/genai");
|
|
const apiKey = process.env.GEMINI_API_KEY;
|
|
if (!apiKey) return { error: "GEMINI_API_KEY not configured" };
|
|
|
|
const genai = new GoogleGenAI({ apiKey });
|
|
|
|
const tools: any[] = [{
|
|
functionDeclarations: DESIGN_TOOL_DECLARATIONS.map(d => ({
|
|
name: d.name,
|
|
description: d.description,
|
|
parameters: d.parameters,
|
|
})),
|
|
}];
|
|
|
|
const response = await genai.models.generateContent({
|
|
model: model || "gemini-2.0-flash",
|
|
contents: messages,
|
|
config: {
|
|
tools,
|
|
systemInstruction: DESIGN_SYSTEM_PROMPT,
|
|
},
|
|
} as any);
|
|
|
|
return response;
|
|
}
|
|
|
|
designAgentRoutes.post("/api/design-agent", async (c) => {
|
|
const body = await c.req.json().catch(() => null);
|
|
if (!body?.brief) return c.json({ error: "Missing 'brief' in request body" }, 400);
|
|
|
|
const { brief, space = "demo", model = "gemini-2.0-flash" } = body;
|
|
|
|
return streamSSE(c, async (stream) => {
|
|
let eventId = 0;
|
|
const sendEvent = async (data: any) => {
|
|
await stream.writeSSE({ data: JSON.stringify(data), event: "step", id: String(++eventId) });
|
|
};
|
|
|
|
// Keepalive to prevent Cloudflare QUIC/HTTP2 timeout (drops idle streams ~100s)
|
|
const keepalive = setInterval(() => {
|
|
stream.writeSSE({ data: "", event: "keepalive", id: String(++eventId) }).catch(() => {});
|
|
}, 15_000);
|
|
|
|
try {
|
|
// Step 1: Ensure Scribus is running
|
|
await sendEvent({ step: 1, action: "starting_scribus", status: "Starting Scribus..." });
|
|
const startResult = await ensureScribusRunning();
|
|
if (startResult.error) {
|
|
await sendEvent({ step: 1, action: "error", error: startResult.error });
|
|
return;
|
|
}
|
|
await sendEvent({ step: 1, action: "scribus_ready", result: startResult });
|
|
|
|
// Step 2: Agentic loop
|
|
const messages: any[] = [
|
|
{ role: "user", parts: [{ text: `Design brief: ${brief}` }] },
|
|
];
|
|
|
|
for (let turn = 0; turn < MAX_TURNS; turn++) {
|
|
const response = await callGemini(messages, model);
|
|
const candidate = response?.candidates?.[0];
|
|
if (!candidate) {
|
|
await sendEvent({ step: turn + 2, action: "error", error: "No response from Gemini" });
|
|
break;
|
|
}
|
|
|
|
const parts = candidate.content?.parts || [];
|
|
const textParts = parts.filter((p: any) => p.text);
|
|
const toolCalls = parts.filter((p: any) => p.functionCall);
|
|
|
|
// Send thinking event with Gemini's reasoning text (if any)
|
|
const thinkingText = textParts.map((p: any) => p.text).join("\n").trim();
|
|
await sendEvent({
|
|
step: turn + 2,
|
|
action: "thinking",
|
|
status: thinkingText || `Planning turn ${turn + 1}...`,
|
|
text: thinkingText || null,
|
|
});
|
|
|
|
// If Gemini returned text without tool calls, we're done
|
|
if (textParts.length > 0 && toolCalls.length === 0) {
|
|
await sendEvent({
|
|
step: turn + 2,
|
|
action: "complete",
|
|
message: thinkingText,
|
|
});
|
|
break;
|
|
}
|
|
|
|
// Execute tool calls
|
|
const toolResults: any[] = [];
|
|
for (const part of toolCalls) {
|
|
const { name, args } = part.functionCall;
|
|
await sendEvent({
|
|
step: turn + 2,
|
|
action: "executing",
|
|
tool: name,
|
|
args,
|
|
description: describeToolCall(name, args || {}),
|
|
status: describeToolCall(name, args || {}),
|
|
});
|
|
|
|
const result = await executeToolCall(name, args || {}, space);
|
|
await sendEvent({
|
|
step: turn + 2,
|
|
action: "tool_result",
|
|
tool: name,
|
|
result,
|
|
});
|
|
|
|
toolResults.push({
|
|
functionResponse: {
|
|
name,
|
|
response: result,
|
|
},
|
|
});
|
|
}
|
|
|
|
// Add assistant response + tool results to conversation
|
|
messages.push({ role: "model", parts });
|
|
messages.push({ role: "user", parts: toolResults });
|
|
}
|
|
|
|
// Final state check
|
|
await sendEvent({ step: MAX_TURNS + 2, action: "verifying", status: "Getting final state..." });
|
|
const finalState = await bridgeState();
|
|
await sendEvent({
|
|
step: MAX_TURNS + 2,
|
|
action: "done",
|
|
state: finalState,
|
|
status: "Design complete!",
|
|
});
|
|
} catch (e: any) {
|
|
await sendEvent({ step: 0, action: "error", error: e.message });
|
|
} finally {
|
|
clearInterval(keepalive);
|
|
}
|
|
});
|
|
});
|
|
|
|
designAgentRoutes.get("/api/design-agent/health", (c) => {
|
|
return c.json({ ok: true, bridge: SCRIBUS_BRIDGE_URL });
|
|
});
|