feat(infra): add ollama to on-demand sidecar lifecycle

Ollama now starts on API request and stops after 5min idle, saving
~5-6GB RAM when not in use. Part of server-wide resource caps rollout.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-03-31 14:26:23 -07:00
parent dbfddb2fb5
commit bed124f869
2 changed files with 8 additions and 0 deletions

View File

@ -1969,6 +1969,7 @@ app.post("/api/prompt", async (c) => {
if (OLLAMA_MODELS[model]) {
try {
await ensureSidecar("ollama");
const ollamaRes = await fetch(`${OLLAMA_URL}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
@ -1989,6 +1990,7 @@ app.post("/api/prompt", async (c) => {
}
const data = await ollamaRes.json();
markSidecarUsed("ollama");
return c.json({ content: data.message?.content || "" });
} catch (e: any) {
console.error("[prompt] Ollama unreachable:", e.message);

View File

@ -39,6 +39,12 @@ const SIDECARS: Record<string, SidecarConfig> = {
port: 8810,
healthTimeout: 15_000,
},
ollama: {
container: "ollama",
host: "ollama",
port: 11434,
healthTimeout: 30_000,
},
};
const lastUsed = new Map<string, number>();