feat(infra): add ollama to on-demand sidecar lifecycle

Ollama now starts on API request and stops after 5min idle, saving ~5-6GB RAM when not in use. Part of server-wide resource caps rollout. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-31 14:26:23 -07:00 · 2026-03-31 14:26:23 -07:00 · bed124f869
parent dbfddb2fb5
commit bed124f869
2 changed files with 8 additions and 0 deletions
--- a/server/index.ts
+++ b/server/index.ts
@ -1969,6 +1969,7 @@ app.post("/api/prompt", async (c) => {

 	if (OLLAMA_MODELS[model]) {
 		try {
+			await ensureSidecar("ollama");
 			const ollamaRes = await fetch(`${OLLAMA_URL}/api/chat`, {
 				method: "POST",
 				headers: { "Content-Type": "application/json" },
@ -1989,6 +1990,7 @@ app.post("/api/prompt", async (c) => {
 			}

 			const data = await ollamaRes.json();
+			markSidecarUsed("ollama");
 			return c.json({ content: data.message?.content || "" });
 		} catch (e: any) {
 			console.error("[prompt] Ollama unreachable:", e.message);
--- a/server/sidecar-manager.ts
+++ b/server/sidecar-manager.ts
@ -39,6 +39,12 @@ const SIDECARS: Record<string, SidecarConfig> = {
 		port: 8810,
 		healthTimeout: 15_000,
 	},
+	ollama: {
+		container: "ollama",
+		host: "ollama",
+		port: 11434,
+		healthTimeout: 30_000,
+	},
 };

 const lastUsed = new Map<string, number>();