feat: use Gemini native image generation (Nano Banana Pro)

Switch from text fallback to actual image generation using gemini-2.0-flash-exp-image-generation model with responseModalities. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-23 00:24:13 -05:00 · 2025-12-23 00:24:13 -05:00 · b4c234d051
parent e4d6172398
commit b4c234d051
1 changed files with 25 additions and 21 deletions
--- a/lib/gemini.ts
+++ b/lib/gemini.ts
@ -104,8 +104,15 @@ export async function generatePageImage(
  tone: string,
  feedback?: string
 ): Promise<string> {
-  // Use Gemini's image generation (Imagen 3 via Gemini API)
+  // Use Nano Banana Pro for highest quality image generation
-  const model = getGenAI().getGenerativeModel({ model: "gemini-2.0-flash-exp" });
+  // Model: gemini-2.0-flash-exp-image-generation (supports native image output)
  const model = getGenAI().getGenerativeModel({
    model: "gemini-2.0-flash-exp-image-generation",
    generationConfig: {
      // @ts-expect-error - responseModalities is valid but not in types yet
      responseModalities: ["IMAGE"],
    },
  });
  const styleDesc = STYLE_PROMPTS[style] || STYLE_PROMPTS["mycelial"];
  const toneDesc = TONE_PROMPTS[tone] || TONE_PROMPTS["regenerative"];
@ -128,30 +135,27 @@ The image should be a complete, self-contained page that could be printed. Inclu
    imagePrompt += `\n\nUser feedback for refinement: ${feedback}`;
  }
  // For now, return a placeholder - we'll integrate actual image generation
  // The actual implementation will use either Gemini's native image gen or RunPod
  // Generate with Gemini 2.0 Flash which supports image generation
  try {
-    const result = await model.generateContent({
+    const result = await model.generateContent(imagePrompt);
      contents: [{
        role: "user",
        parts: [{ text: `Generate an image: ${imagePrompt}` }]
      }],
      generationConfig: {
        // Note: Image generation config would go here when available
      }
    });
    // Check if response contains image data
    const response = result.response;
-    // For text model fallback, return a description
+    // Extract image from response parts
-    // In production, this would use imagen or other image gen API
+    for (const candidate of response.candidates || []) {
-    return `data:text/plain;base64,${Buffer.from(response.text()).toString('base64')}`;
+      for (const part of candidate.content?.parts || []) {
        // @ts-expect-error - inlineData exists on image responses
        if (part.inlineData) {
          // @ts-expect-error - inlineData has data and mimeType
          const { data, mimeType } = part.inlineData;
          return `data:${mimeType || "image/png"};base64,${data}`;
        }
      }
    }
    // If no image in response, throw error
    throw new Error("No image data in response");
  } catch (error) {
    console.error("Image generation error:", error);
-    throw new Error("Failed to generate page image");
+    throw new Error(`Failed to generate page image: ${error instanceof Error ? error.message : "Unknown error"}`);
  }
 }