rspace-online/modules/rdesign/design-agent-route.ts

/**
 * Design Agent Route — Gemini tool-calling loop that drives Scribus via the bridge.
 *
 * POST /api/design-agent  { brief, space, model? }
 * Returns SSE stream of agent steps.
 */

import { Hono } from "hono";
import { streamSSE } from "hono/streaming";
import { DESIGN_TOOL_DECLARATIONS, DESIGN_SYSTEM_PROMPT } from "../../lib/design-tool-declarations";

const SCRIBUS_BRIDGE_URL = process.env.SCRIBUS_BRIDGE_URL || "http://scribus-novnc:8765";
const BRIDGE_SECRET = process.env.SCRIBUS_BRIDGE_SECRET || "";
const MAX_TURNS = 10;

export const designAgentRoutes = new Hono();

/** Human-readable description of a tool call. */
function describeToolCall(name: string, args: Record<string, any>): string {
	switch (name) {
		case "new_document": return `Creating ${args.width || 210}\u00d7${args.height || 297}mm document (${args.pages || 1} page${(args.pages || 1) > 1 ? "s" : ""})`;
		case "add_text_frame": return `Adding text frame: "${(args.text || "").slice(0, 40)}${(args.text || "").length > 40 ? "\u2026" : ""}" at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
		case "add_image_frame": return `Adding image frame at ${args.x},${args.y}mm (${args.width}\u00d7${args.height})`;
		case "add_shape": return `Adding ${args.shapeType || "rect"} shape at ${args.x},${args.y}mm (${args.width}\u00d7${args.height}${args.fill ? ", fill:" + args.fill : ""})`;
		case "set_background_color": return `Setting background color to ${args.color}`;
		case "get_state": return "Checking current document state";
		case "save_document": return `Saving document as ${args.filename || "design.sla"}`;
		case "generate_image": return `Generating AI image: "${(args.prompt || "").slice(0, 50)}${(args.prompt || "").length > 50 ? "\u2026" : ""}"`;
		default: return `Executing ${name}`;
	}
}

/** Forward a command to the Scribus bridge. */
async function bridgeCommand(action: string, args: Record<string, any> = {}): Promise<any> {
	const headers: Record<string, string> = { "Content-Type": "application/json" };
	if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;

	try {
		const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/command`, {
			method: "POST",
			headers,
			body: JSON.stringify({ action, args }),
			signal: AbortSignal.timeout(30_000),
		});
		return await res.json();
	} catch (e: any) {
		return { error: `Bridge unreachable: ${e.message}` };
	}
}

/** Get bridge state. */
async function bridgeState(): Promise<any> {
	const headers: Record<string, string> = {};
	if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;

	try {
		const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/state`, { headers, signal: AbortSignal.timeout(10_000) });
		return await res.json();
	} catch (e: any) {
		return { error: `Bridge unreachable: ${e.message}` };
	}
}

/** Verify the bridge runner is connected and ready. */
async function ensureScribusRunning(): Promise<any> {
	const headers: Record<string, string> = { "Content-Type": "application/json" };
	if (BRIDGE_SECRET) headers["X-Bridge-Secret"] = BRIDGE_SECRET;

	try {
		const res = await fetch(`${SCRIBUS_BRIDGE_URL}/api/scribus/start`, {
			method: "POST",
			headers,
			signal: AbortSignal.timeout(10_000),
		});
		return await res.json();
	} catch (e: any) {
		return { error: `Bridge unreachable: ${e.message}` };
	}
}

/** Translate a Gemini tool call into a bridge command. */
async function executeToolCall(name: string, args: Record<string, any>, space: string): Promise<any> {
	switch (name) {
		case "new_document":
			return bridgeCommand("new_document", args);
		case "add_text_frame":
			return bridgeCommand("add_text_frame", args);
		case "add_image_frame":
			return bridgeCommand("add_image_frame", args);
		case "add_shape":
			return bridgeCommand("add_shape", args);
		case "set_background_color":
			return bridgeCommand("set_background_color", args);
		case "get_state":
			return bridgeState();
		case "save_document":
			return bridgeCommand("save_as_sla", { ...args, space });
		case "generate_image": {
			// Generate image via fal.ai, then place it
			const imageResult = await generateAndPlaceImage(args);
			return imageResult;
		}
		default:
			return { error: `Unknown tool: ${name}` };
	}
}

/** Generate an image via the rSpace /api/image-gen endpoint and download it for Scribus. */
async function generateAndPlaceImage(args: Record<string, any>): Promise<any> {
	try {
		// Call internal image gen API
		const res = await fetch(`http://localhost:${process.env.PORT || 3000}/api/image-gen`, {
			method: "POST",
			headers: { "Content-Type": "application/json" },
			body: JSON.stringify({ prompt: args.prompt, provider: "fal", model: "flux-pro" }),
			signal: AbortSignal.timeout(60_000),
		});
		const data = await res.json() as any;
		if (!data.url) return { error: "Image generation failed", details: data };

		// Download the image and save to shared volume (rspace-files, mounted in both containers)
		const imageUrl = data.url;
		const downloadRes = await fetch(imageUrl, { signal: AbortSignal.timeout(30_000) });
		if (!downloadRes.ok) return { error: "Failed to download generated image" };

		const { writeFile, mkdir } = await import("node:fs/promises");
		const imageName = `gen_${Date.now()}.png`;
		const imageDir = "/data/files/generated";
		const imagePath = `${imageDir}/${imageName}`;

		await mkdir(imageDir, { recursive: true });
		const imageBytes = Buffer.from(await downloadRes.arrayBuffer());
		await writeFile(imagePath, imageBytes);

		// Place the image frame in Scribus — path is accessible via shared rspace-files volume
		const placeResult = await bridgeCommand("add_image_frame", {
			x: args.x,
			y: args.y,
			width: args.width,
			height: args.height,
			imagePath,
			name: `gen_image_${Date.now()}`,
		});

		return { ...placeResult, imageUrl, imagePath };
	} catch (e: any) {
		return { error: `Image generation failed: ${e.message}` };
	}
}

/** Call Gemini with tool declarations. */
async function callGemini(messages: any[], model: string): Promise<any> {
	// Use the Gemini SDK from the AI services
	const { GoogleGenAI } = await import("@google/genai");
	const apiKey = process.env.GEMINI_API_KEY;
	if (!apiKey) return { error: "GEMINI_API_KEY not configured" };

	const genai = new GoogleGenAI({ apiKey });

	const tools: any[] = [{
		functionDeclarations: DESIGN_TOOL_DECLARATIONS.map(d => ({
			name: d.name,
			description: d.description,
			parameters: d.parameters,
		})),
	}];

	const response = await genai.models.generateContent({
		model: model || "gemini-2.0-flash",
		contents: messages,
		config: {
			tools,
			systemInstruction: DESIGN_SYSTEM_PROMPT,
		},
	} as any);

	return response;
}

designAgentRoutes.post("/api/design-agent", async (c) => {
	const body = await c.req.json().catch(() => null);
	if (!body?.brief) return c.json({ error: "Missing 'brief' in request body" }, 400);

	const { brief, space = "demo", model = "gemini-2.0-flash" } = body;

	return streamSSE(c, async (stream) => {
		let eventId = 0;
		const sendEvent = async (data: any) => {
			await stream.writeSSE({ data: JSON.stringify(data), event: "step", id: String(++eventId) });
		};

		// Keepalive to prevent Cloudflare QUIC/HTTP2 timeout (drops idle streams ~100s)
		const keepalive = setInterval(() => {
			stream.writeSSE({ data: "", event: "keepalive", id: String(++eventId) }).catch(() => {});
		}, 15_000);

		try {
			// Step 1: Ensure Scribus is running
			await sendEvent({ step: 1, action: "starting_scribus", status: "Starting Scribus..." });
			const startResult = await ensureScribusRunning();
			if (startResult.error) {
				await sendEvent({ step: 1, action: "error", error: startResult.error });
				return;
			}
			await sendEvent({ step: 1, action: "scribus_ready", result: startResult });

			// Step 2: Agentic loop
			const messages: any[] = [
				{ role: "user", parts: [{ text: `Design brief: ${brief}` }] },
			];

			for (let turn = 0; turn < MAX_TURNS; turn++) {
				const response = await callGemini(messages, model);
				const candidate = response?.candidates?.[0];
				if (!candidate) {
					await sendEvent({ step: turn + 2, action: "error", error: "No response from Gemini" });
					break;
				}

				const parts = candidate.content?.parts || [];
				const textParts = parts.filter((p: any) => p.text);
				const toolCalls = parts.filter((p: any) => p.functionCall);

				// Send thinking event with Gemini's reasoning text (if any)
				const thinkingText = textParts.map((p: any) => p.text).join("\n").trim();
				await sendEvent({
					step: turn + 2,
					action: "thinking",
					status: thinkingText || `Planning turn ${turn + 1}...`,
					text: thinkingText || null,
				});

				// If Gemini returned text without tool calls, we're done
				if (textParts.length > 0 && toolCalls.length === 0) {
					await sendEvent({
						step: turn + 2,
						action: "complete",
						message: thinkingText,
					});
					break;
				}

				// Execute tool calls
				const toolResults: any[] = [];
				for (const part of toolCalls) {
					const { name, args } = part.functionCall;
					await sendEvent({
						step: turn + 2,
						action: "executing",
						tool: name,
						args,
						description: describeToolCall(name, args || {}),
						status: describeToolCall(name, args || {}),
					});

					const result = await executeToolCall(name, args || {}, space);
					await sendEvent({
						step: turn + 2,
						action: "tool_result",
						tool: name,
						result,
					});

					toolResults.push({
						functionResponse: {
							name,
							response: result,
						},
					});
				}

				// Add assistant response + tool results to conversation
				messages.push({ role: "model", parts });
				messages.push({ role: "user", parts: toolResults });
			}

			// Final state check
			await sendEvent({ step: MAX_TURNS + 2, action: "verifying", status: "Getting final state..." });
			const finalState = await bridgeState();
			await sendEvent({
				step: MAX_TURNS + 2,
				action: "done",
				state: finalState,
				status: "Design complete!",
			});
		} catch (e: any) {
			await sendEvent({ step: 0, action: "error", error: e.message });
		} finally {
			clearInterval(keepalive);
		}
	});
});

designAgentRoutes.get("/api/design-agent/health", (c) => {
	return c.json({ ok: true, bridge: SCRIBUS_BRIDGE_URL });
});