rspace-online/server/mi-media.ts

203 lines
6.3 KiB
TypeScript

/**
* MI Media Generation — shared helpers for image and video generation.
*
* Extracted from server/index.ts endpoints so the agentic loop can
* call them directly without HTTP round-trips.
*/
import { resolve } from "path";
const FAL_KEY = process.env.FAL_KEY || "";
const GEMINI_API_KEY = process.env.GEMINI_API_KEY || "";
const STYLE_PROMPTS: Record<string, string> = {
illustration: "digital illustration style, ",
photorealistic: "photorealistic, high detail, ",
painting: "oil painting style, artistic, ",
sketch: "pencil sketch style, hand-drawn, ",
"punk-zine": "punk zine aesthetic, cut-and-paste collage, bold contrast, ",
};
const GEMINI_STYLE_HINTS: Record<string, string> = {
photorealistic: "photorealistic, high detail, natural lighting, ",
illustration: "digital illustration, clean lines, vibrant colors, ",
painting: "oil painting style, brushstrokes visible, painterly, ",
sketch: "pencil sketch, hand-drawn, line art, ",
"punk-zine": "punk zine aesthetic, xerox texture, high contrast, DIY, rough edges, ",
collage: "cut-and-paste collage, mixed media, layered paper textures, ",
vintage: "vintage aesthetic, retro colors, aged paper texture, ",
minimalist: "minimalist design, simple shapes, limited color palette, ",
};
export interface MediaResult {
ok: true;
url: string;
}
export interface MediaError {
ok: false;
error: string;
}
export type MediaOutcome = MediaResult | MediaError;
/**
* Generate an image via fal.ai Flux Pro.
*/
export async function generateImageViaFal(prompt: string, style?: string): Promise<MediaOutcome> {
if (!FAL_KEY) return { ok: false, error: "FAL_KEY not configured" };
const styledPrompt = (style && STYLE_PROMPTS[style] || "") + prompt;
const res = await fetch("https://fal.run/fal-ai/flux-pro/v1.1", {
method: "POST",
headers: {
Authorization: `Key ${FAL_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt: styledPrompt,
image_size: "landscape_4_3",
num_images: 1,
safety_tolerance: "2",
}),
});
if (!res.ok) {
const err = await res.text();
console.error("[mi-media] fal.ai image error:", err);
return { ok: false, error: "Image generation failed" };
}
const data = await res.json();
const imageUrl = data.images?.[0]?.url || data.output?.url;
if (!imageUrl) return { ok: false, error: "No image returned" };
return { ok: true, url: imageUrl };
}
/**
* Generate an image via Gemini (gemini-2.5-flash-image or imagen-3.0).
*/
export async function generateImageViaGemini(prompt: string, style?: string): Promise<MediaOutcome> {
if (!GEMINI_API_KEY) return { ok: false, error: "GEMINI_API_KEY not configured" };
const enhancedPrompt = (style && GEMINI_STYLE_HINTS[style] || "") + prompt;
const { GoogleGenAI } = await import("@google/genai");
const ai = new GoogleGenAI({ apiKey: GEMINI_API_KEY });
const models = ["gemini-2.5-flash-image", "imagen-3.0-generate-002"];
for (const modelName of models) {
try {
if (modelName.startsWith("gemini")) {
const result = await ai.models.generateContent({
model: modelName,
contents: enhancedPrompt,
config: { responseModalities: ["Text", "Image"] },
});
const parts = result.candidates?.[0]?.content?.parts || [];
for (const part of parts) {
if ((part as any).inlineData) {
const { data: b64, mimeType } = (part as any).inlineData;
const ext = mimeType?.includes("png") ? "png" : "jpg";
const filename = `gemini-${Date.now()}.${ext}`;
const dir = resolve(process.env.FILES_DIR || "./data/files", "generated");
await Bun.write(resolve(dir, filename), Buffer.from(b64, "base64"));
return { ok: true, url: `/data/files/generated/${filename}` };
}
}
} else {
const result = await ai.models.generateImages({
model: modelName,
prompt: enhancedPrompt,
config: { numberOfImages: 1, aspectRatio: "3:4" },
});
const img = (result as any).generatedImages?.[0];
if (img?.image?.imageBytes) {
const filename = `imagen-${Date.now()}.png`;
const dir = resolve(process.env.FILES_DIR || "./data/files", "generated");
await Bun.write(resolve(dir, filename), Buffer.from(img.image.imageBytes, "base64"));
return { ok: true, url: `/data/files/generated/${filename}` };
}
}
} catch (e: any) {
console.error(`[mi-media] ${modelName} error:`, e.message);
continue;
}
}
return { ok: false, error: "All Gemini image models failed" };
}
/**
* Generate a text-to-video via fal.ai WAN 2.1.
*/
export async function generateVideoViaFal(prompt: string, source_image?: string): Promise<MediaOutcome> {
if (!FAL_KEY) return { ok: false, error: "FAL_KEY not configured" };
if (source_image) {
// Image-to-video via Kling
const res = await fetch("https://fal.run/fal-ai/kling-video/v1/standard/image-to-video", {
method: "POST",
headers: {
Authorization: `Key ${FAL_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
image_url: source_image,
prompt: prompt || "",
duration: "5",
aspect_ratio: "16:9",
}),
});
if (!res.ok) {
const err = await res.text();
console.error("[mi-media] fal.ai i2v error:", err);
return { ok: false, error: "Video generation failed" };
}
const data = await res.json();
const videoUrl = data.video?.url || data.output?.url;
if (!videoUrl) return { ok: false, error: "No video returned" };
return { ok: true, url: videoUrl };
}
// Text-to-video via WAN 2.1
const res = await fetch("https://fal.run/fal-ai/wan/v2.1", {
method: "POST",
headers: {
Authorization: `Key ${FAL_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({
prompt,
num_frames: 49,
resolution: "480p",
}),
});
if (!res.ok) {
const err = await res.text();
console.error("[mi-media] fal.ai t2v error:", err);
return { ok: false, error: "Video generation failed" };
}
const data = await res.json();
const videoUrl = data.video?.url || data.output?.url;
if (!videoUrl) return { ok: false, error: "No video returned" };
return { ok: true, url: videoUrl };
}
/**
* Try fal.ai first, fall back to Gemini for image generation.
*/
export async function generateImage(prompt: string, style?: string): Promise<MediaOutcome> {
const falResult = await generateImageViaFal(prompt, style);
if (falResult.ok) return falResult;
return generateImageViaGemini(prompt, style);
}