rspace-online/modules/rdesign/design-agent-route.ts

297 lines
10 KiB
TypeScript

/**
* Design Agent Route — Gemini tool-calling loop that drives Scribus via the bridge.
*
* POST /api/design-agent { brief, space, model? }
* Returns SSE stream of agent steps.
*/
import { Hono } from "hono";
import { streamSSE } from "hono/streaming";
import { DESIGN_TOOL_DECLARATIONS, DESIGN_SYSTEM_PROMPT } from "../../lib/design-tool-declarations";
const SCRIBUS_BRIDGE_URL = process.env.SCRIBUS_BRIDGE_URL || "http://scribus-novnc:8765";
const BRIDGE_SECRET = process.env.SCRIBUS_BRIDGE_SECRET || "";
const MAX_TURNS = 10;
export const designAgentRoutes = new Hono();
/** Human-readable description of a tool call. */
function describeToolCall(name: string, args: Record<string, any>): string {
switch (name) {
case "new_document": return `Creating ${args.width || 210}\u00d7${args.height || 297}mm document (${args.pages || 1} page${(args.pages || 1) > 1 ? "s" : ""})`;
case "add_text_frame": return `Adding text frame: "${(args.text || "").slice(0, 40)}${(args.text || "").length > 40 ? "\u2026" : ""}" at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
case "add_image_frame": return `Adding image frame at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
case "add_shape": return `Adding ${args.shapeType || "rect"} shape at ${args.x},${args.y}mm (${args.width}\u00d7${args.height}${args.fill ? ", fill:" + args.fill : ""})`;
case "set_background_color": return `Setting background color to ${args.color}`;
case "get_state": return "Checking current document state";
case "save_document": return `Saving document as ${args.filename || "design.sla"}`;
case "generate_image": return `Generating AI image: "${(args.prompt || "").slice(0, 50)}${(args.prompt || "").length > 50 ? "\u2026" : ""}"`;
default: return `Executing ${name}`;
}
}
/** Forward a command to the Scribus bridge. */
async function bridgeCommand(action: string, args: Record<string, any> = {}): Promise<any> {
const headers: Record<string, string> = { "Content-Type": "application/json" };
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
try {
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/command`, {
method: "POST",
headers,
body: JSON.stringify({ action, args }),
signal: AbortSignal.timeout(30_000),
});
return await res.json();
} catch (e: any) {
return { error: `Bridge unreachable: ${e.message}` };
}
}
/** Get bridge state. */
async function bridgeState(): Promise<any> {
const headers: Record<string, string> = {};
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
try {
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/state`, { headers, signal: AbortSignal.timeout(10_000) });
return await res.json();
} catch (e: any) {
return { error: `Bridge unreachable: ${e.message}` };
}
}
/** Verify the bridge runner is connected and ready. */
async function ensureScribusRunning(): Promise<any> {
const headers: Record<string, string> = { "Content-Type": "application/json" };
if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;
try {
const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/start`, {
method: "POST",
headers,
signal: AbortSignal.timeout(10_000),
});
return await res.json();
} catch (e: any) {
return { error: `Bridge unreachable: ${e.message}` };
}
}
/** Translate a Gemini tool call into a bridge command. */
async function executeToolCall(name: string, args: Record<string, any>, space: string): Promise<any> {
switch (name) {
case "new_document":
return bridgeCommand("new_document", args);
case "add_text_frame":
return bridgeCommand("add_text_frame", args);
case "add_image_frame":
return bridgeCommand("add_image_frame", args);
case "add_shape":
return bridgeCommand("add_shape", args);
case "set_background_color":
return bridgeCommand("set_background_color", args);
case "get_state":
return bridgeState();
case "save_document":
return bridgeCommand("save_as_sla", { ...args, space });
case "generate_image": {
// Generate image via fal.ai, then place it
const imageResult = await generateAndPlaceImage(args);
return imageResult;
}
default:
return { error: `Unknown tool: ${name}` };
}
}
/** Generate an image via the rSpace /api/image-gen endpoint and download it for Scribus. */
async function generateAndPlaceImage(args: Record<string, any>): Promise<any> {
try {
// Call internal image gen API
const res = await fetch(`http://localhost:${process.env.PORT || 3000}/api/image-gen`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ prompt: args.prompt, provider: "fal", model: "flux-pro" }),
signal: AbortSignal.timeout(60_000),
});
const data = await res.json() as any;
if (!data.url) return { error: "Image generation failed", details: data };
// Download the image and save to shared volume (rspace-files, mounted in both containers)
const imageUrl = data.url;
const downloadRes = await fetch(imageUrl, { signal: AbortSignal.timeout(30_000) });
if (!downloadRes.ok) return { error: "Failed to download generated image" };
const { writeFile, mkdir } = await import("node:fs/promises");
const imageName = `gen_${Date.now()}.png`;
const imageDir = "/data/files/generated";
const imagePath = `${imageDir}/${imageName}`;
await mkdir(imageDir, { recursive: true });
const imageBytes = Buffer.from(await downloadRes.arrayBuffer());
await writeFile(imagePath, imageBytes);
// Place the image frame in Scribus — path is accessible via shared rspace-files volume
const placeResult = await bridgeCommand("add_image_frame", {
x: args.x,
y: args.y,
width: args.width,
height: args.height,
imagePath,
name: `gen_image_${Date.now()}`,
});
return { ...placeResult, imageUrl, imagePath };
} catch (e: any) {
return { error: `Image generation failed: ${e.message}` };
}
}
/** Call Gemini with tool declarations. */
async function callGemini(messages: any[], model: string): Promise<any> {
// Use the Gemini SDK from the AI services
const { GoogleGenAI } = await import("@google/genai");
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) return { error: "GEMINI_API_KEY not configured" };
const genai = new GoogleGenAI({ apiKey });
const tools: any[] = [{
functionDeclarations: DESIGN_TOOL_DECLARATIONS.map(d => ({
name: d.name,
description: d.description,
parameters: d.parameters,
})),
}];
const response = await genai.models.generateContent({
model: model || "gemini-2.0-flash",
contents: messages,
config: {
tools,
systemInstruction: DESIGN_SYSTEM_PROMPT,
},
} as any);
return response;
}
designAgentRoutes.post("/api/design-agent", async (c) => {
const body = await c.req.json().catch(() => null);
if (!body?.brief) return c.json({ error: "Missing 'brief' in request body" }, 400);
const { brief, space = "demo", model = "gemini-2.0-flash" } = body;
return streamSSE(c, async (stream) => {
let eventId = 0;
const sendEvent = async (data: any) => {
await stream.writeSSE({ data: JSON.stringify(data), event: "step", id: String(++eventId) });
};
// Keepalive to prevent Cloudflare QUIC/HTTP2 timeout (drops idle streams ~100s)
const keepalive = setInterval(() => {
stream.writeSSE({ data: "", event: "keepalive", id: String(++eventId) }).catch(() => {});
}, 15_000);
try {
// Step 1: Ensure Scribus is running
await sendEvent({ step: 1, action: "starting_scribus", status: "Starting Scribus..." });
const startResult = await ensureScribusRunning();
if (startResult.error) {
await sendEvent({ step: 1, action: "error", error: startResult.error });
return;
}
await sendEvent({ step: 1, action: "scribus_ready", result: startResult });
// Step 2: Agentic loop
const messages: any[] = [
{ role: "user", parts: [{ text: `Design brief: ${brief}` }] },
];
for (let turn = 0; turn < MAX_TURNS; turn++) {
const response = await callGemini(messages, model);
const candidate = response?.candidates?.[0];
if (!candidate) {
await sendEvent({ step: turn + 2, action: "error", error: "No response from Gemini" });
break;
}
const parts = candidate.content?.parts || [];
const textParts = parts.filter((p: any) => p.text);
const toolCalls = parts.filter((p: any) => p.functionCall);
// Send thinking event with Gemini's reasoning text (if any)
const thinkingText = textParts.map((p: any) => p.text).join("\n").trim();
await sendEvent({
step: turn + 2,
action: "thinking",
status: thinkingText || `Planning turn ${turn + 1}...`,
text: thinkingText || null,
});
// If Gemini returned text without tool calls, we're done
if (textParts.length > 0 && toolCalls.length === 0) {
await sendEvent({
step: turn + 2,
action: "complete",
message: thinkingText,
});
break;
}
// Execute tool calls
const toolResults: any[] = [];
for (const part of toolCalls) {
const { name, args } = part.functionCall;
await sendEvent({
step: turn + 2,
action: "executing",
tool: name,
args,
description: describeToolCall(name, args || {}),
status: describeToolCall(name, args || {}),
});
const result = await executeToolCall(name, args || {}, space);
await sendEvent({
step: turn + 2,
action: "tool_result",
tool: name,
result,
});
toolResults.push({
functionResponse: {
name,
response: result,
},
});
}
// Add assistant response + tool results to conversation
messages.push({ role: "model", parts });
messages.push({ role: "user", parts: toolResults });
}
// Final state check
await sendEvent({ step: MAX_TURNS + 2, action: "verifying", status: "Getting final state..." });
const finalState = await bridgeState();
await sendEvent({
step: MAX_TURNS + 2,
action: "done",
state: finalState,
status: "Design complete!",
});
} catch (e: any) {
await sendEvent({ step: 0, action: "error", error: e.message });
} finally {
clearInterval(keepalive);
}
});
});
designAgentRoutes.get("/api/design-agent/health", (c) => {
return c.json({ ok: true, bridge: SCRIBUS_BRIDGE_URL });
});