fix(rsplat): use box_prompts to bypass SAM 3D grounding detection

Text prompts require Grounding DINO to detect specific objects, which
fails on arbitrary images. Using a full-image bounding box bypasses
text detection entirely and reconstructs the whole scene as a Gaussian
splat.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-03-16 14:30:32 -07:00
parent 8ea2bb871b
commit 2813886738
1 changed files with 5 additions and 3 deletions

View File

@ -1025,7 +1025,6 @@ app.post("/api/3d-gen", async (c) => {
const { image_url } = await c.req.json();
if (!image_url) return c.json({ error: "image_url required" }, 400);
console.log("[3d-gen] image_url:", image_url);
try {
const controller = new AbortController();
@ -1036,7 +1035,11 @@ app.post("/api/3d-gen", async (c) => {
Authorization: `Key ${FAL_KEY}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ image_url, prompt: "person . animal . object . furniture . vehicle . building", detection_threshold: 0.1, export_textured_glb: true }),
body: JSON.stringify({
image_url,
box_prompts: [{ x_min: 0, y_min: 0, x_max: 9999, y_max: 9999 }],
export_textured_glb: true,
}),
signal: controller.signal,
});
clearTimeout(timeout);
@ -1057,7 +1060,6 @@ app.post("/api/3d-gen", async (c) => {
}
const data = await res.json();
console.log("[3d-gen] response keys:", Object.keys(data), JSON.stringify(data).slice(0, 500));
// SAM 3D: prefer Gaussian splat (.ply), fallback to GLB mesh
const splatUrl = data.gaussian_splat?.url;
const glbUrl = data.model_glb?.url || data.glb_url || data.model_mesh?.url;