From bed124f869b84275473f6eb793bc7e38d4665972 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Tue, 31 Mar 2026 14:26:23 -0700 Subject: [PATCH] feat(infra): add ollama to on-demand sidecar lifecycle Ollama now starts on API request and stops after 5min idle, saving ~5-6GB RAM when not in use. Part of server-wide resource caps rollout. Co-Authored-By: Claude Opus 4.6 --- server/index.ts | 2 ++ server/sidecar-manager.ts | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/server/index.ts b/server/index.ts index 3d16b16..719a80a 100644 --- a/server/index.ts +++ b/server/index.ts @@ -1969,6 +1969,7 @@ app.post("/api/prompt", async (c) => { if (OLLAMA_MODELS[model]) { try { + await ensureSidecar("ollama"); const ollamaRes = await fetch(`${OLLAMA_URL}/api/chat`, { method: "POST", headers: { "Content-Type": "application/json" }, @@ -1989,6 +1990,7 @@ app.post("/api/prompt", async (c) => { } const data = await ollamaRes.json(); + markSidecarUsed("ollama"); return c.json({ content: data.message?.content || "" }); } catch (e: any) { console.error("[prompt] Ollama unreachable:", e.message); diff --git a/server/sidecar-manager.ts b/server/sidecar-manager.ts index 9c5ebfe..ca0da6e 100644 --- a/server/sidecar-manager.ts +++ b/server/sidecar-manager.ts @@ -39,6 +39,12 @@ const SIDECARS: Record = { port: 8810, healthTimeout: 15_000, }, + ollama: { + container: "ollama", + host: "ollama", + port: 11434, + healthTimeout: 30_000, + }, }; const lastUsed = new Map();