203 lines
6.3 KiB
TypeScript
203 lines
6.3 KiB
TypeScript
/**
|
|
* MI Media Generation — shared helpers for image and video generation.
|
|
*
|
|
* Extracted from server/index.ts endpoints so the agentic loop can
|
|
* call them directly without HTTP round-trips.
|
|
*/
|
|
|
|
import { resolve } from "path";
|
|
|
|
const FAL_KEY = process.env.FAL_KEY || "";
|
|
const GEMINI_API_KEY = process.env.GEMINI_API_KEY || "";
|
|
|
|
const STYLE_PROMPTS: Record<string, string> = {
|
|
illustration: "digital illustration style, ",
|
|
photorealistic: "photorealistic, high detail, ",
|
|
painting: "oil painting style, artistic, ",
|
|
sketch: "pencil sketch style, hand-drawn, ",
|
|
"punk-zine": "punk zine aesthetic, cut-and-paste collage, bold contrast, ",
|
|
};
|
|
|
|
const GEMINI_STYLE_HINTS: Record<string, string> = {
|
|
photorealistic: "photorealistic, high detail, natural lighting, ",
|
|
illustration: "digital illustration, clean lines, vibrant colors, ",
|
|
painting: "oil painting style, brushstrokes visible, painterly, ",
|
|
sketch: "pencil sketch, hand-drawn, line art, ",
|
|
"punk-zine": "punk zine aesthetic, xerox texture, high contrast, DIY, rough edges, ",
|
|
collage: "cut-and-paste collage, mixed media, layered paper textures, ",
|
|
vintage: "vintage aesthetic, retro colors, aged paper texture, ",
|
|
minimalist: "minimalist design, simple shapes, limited color palette, ",
|
|
};
|
|
|
|
export interface MediaResult {
|
|
ok: true;
|
|
url: string;
|
|
}
|
|
|
|
export interface MediaError {
|
|
ok: false;
|
|
error: string;
|
|
}
|
|
|
|
export type MediaOutcome = MediaResult | MediaError;
|
|
|
|
/**
|
|
* Generate an image via fal.ai Flux Pro.
|
|
*/
|
|
export async function generateImageViaFal(prompt: string, style?: string): Promise<MediaOutcome> {
|
|
if (!FAL_KEY) return { ok: false, error: "FAL_KEY not configured" };
|
|
|
|
const styledPrompt = (style && STYLE_PROMPTS[style] || "") + prompt;
|
|
|
|
const res = await fetch("https://fal.run/fal-ai/flux-pro/v1.1", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Key ${FAL_KEY}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
prompt: styledPrompt,
|
|
image_size: "landscape_4_3",
|
|
num_images: 1,
|
|
safety_tolerance: "2",
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const err = await res.text();
|
|
console.error("[mi-media] fal.ai image error:", err);
|
|
return { ok: false, error: "Image generation failed" };
|
|
}
|
|
|
|
const data = await res.json();
|
|
const imageUrl = data.images?.[0]?.url || data.output?.url;
|
|
if (!imageUrl) return { ok: false, error: "No image returned" };
|
|
|
|
return { ok: true, url: imageUrl };
|
|
}
|
|
|
|
/**
|
|
* Generate an image via Gemini (gemini-2.5-flash-image or imagen-3.0).
|
|
*/
|
|
export async function generateImageViaGemini(prompt: string, style?: string): Promise<MediaOutcome> {
|
|
if (!GEMINI_API_KEY) return { ok: false, error: "GEMINI_API_KEY not configured" };
|
|
|
|
const enhancedPrompt = (style && GEMINI_STYLE_HINTS[style] || "") + prompt;
|
|
const { GoogleGenAI } = await import("@google/genai");
|
|
const ai = new GoogleGenAI({ apiKey: GEMINI_API_KEY });
|
|
|
|
const models = ["gemini-2.5-flash-image", "imagen-3.0-generate-002"];
|
|
for (const modelName of models) {
|
|
try {
|
|
if (modelName.startsWith("gemini")) {
|
|
const result = await ai.models.generateContent({
|
|
model: modelName,
|
|
contents: enhancedPrompt,
|
|
config: { responseModalities: ["Text", "Image"] },
|
|
});
|
|
|
|
const parts = result.candidates?.[0]?.content?.parts || [];
|
|
for (const part of parts) {
|
|
if ((part as any).inlineData) {
|
|
const { data: b64, mimeType } = (part as any).inlineData;
|
|
const ext = mimeType?.includes("png") ? "png" : "jpg";
|
|
const filename = `gemini-${Date.now()}.${ext}`;
|
|
const dir = resolve(process.env.FILES_DIR || "./data/files", "generated");
|
|
await Bun.write(resolve(dir, filename), Buffer.from(b64, "base64"));
|
|
return { ok: true, url: `/data/files/generated/${filename}` };
|
|
}
|
|
}
|
|
} else {
|
|
const result = await ai.models.generateImages({
|
|
model: modelName,
|
|
prompt: enhancedPrompt,
|
|
config: { numberOfImages: 1, aspectRatio: "3:4" },
|
|
});
|
|
const img = (result as any).generatedImages?.[0];
|
|
if (img?.image?.imageBytes) {
|
|
const filename = `imagen-${Date.now()}.png`;
|
|
const dir = resolve(process.env.FILES_DIR || "./data/files", "generated");
|
|
await Bun.write(resolve(dir, filename), Buffer.from(img.image.imageBytes, "base64"));
|
|
return { ok: true, url: `/data/files/generated/${filename}` };
|
|
}
|
|
}
|
|
} catch (e: any) {
|
|
console.error(`[mi-media] ${modelName} error:`, e.message);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return { ok: false, error: "All Gemini image models failed" };
|
|
}
|
|
|
|
/**
|
|
* Generate a text-to-video via fal.ai WAN 2.1.
|
|
*/
|
|
export async function generateVideoViaFal(prompt: string, source_image?: string): Promise<MediaOutcome> {
|
|
if (!FAL_KEY) return { ok: false, error: "FAL_KEY not configured" };
|
|
|
|
if (source_image) {
|
|
// Image-to-video via Kling
|
|
const res = await fetch("https://fal.run/fal-ai/kling-video/v1/standard/image-to-video", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Key ${FAL_KEY}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
image_url: source_image,
|
|
prompt: prompt || "",
|
|
duration: "5",
|
|
aspect_ratio: "16:9",
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const err = await res.text();
|
|
console.error("[mi-media] fal.ai i2v error:", err);
|
|
return { ok: false, error: "Video generation failed" };
|
|
}
|
|
|
|
const data = await res.json();
|
|
const videoUrl = data.video?.url || data.output?.url;
|
|
if (!videoUrl) return { ok: false, error: "No video returned" };
|
|
return { ok: true, url: videoUrl };
|
|
}
|
|
|
|
// Text-to-video via WAN 2.1
|
|
const res = await fetch("https://fal.run/fal-ai/wan/v2.1", {
|
|
method: "POST",
|
|
headers: {
|
|
Authorization: `Key ${FAL_KEY}`,
|
|
"Content-Type": "application/json",
|
|
},
|
|
body: JSON.stringify({
|
|
prompt,
|
|
num_frames: 49,
|
|
resolution: "480p",
|
|
}),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const err = await res.text();
|
|
console.error("[mi-media] fal.ai t2v error:", err);
|
|
return { ok: false, error: "Video generation failed" };
|
|
}
|
|
|
|
const data = await res.json();
|
|
const videoUrl = data.video?.url || data.output?.url;
|
|
if (!videoUrl) return { ok: false, error: "No video returned" };
|
|
|
|
return { ok: true, url: videoUrl };
|
|
}
|
|
|
|
/**
|
|
* Try fal.ai first, fall back to Gemini for image generation.
|
|
*/
|
|
export async function generateImage(prompt: string, style?: string): Promise<MediaOutcome> {
|
|
const falResult = await generateImageViaFal(prompt, style);
|
|
if (falResult.ok) return falResult;
|
|
|
|
return generateImageViaGemini(prompt, style);
|
|
}
|