feat(infra): add ollama to on-demand sidecar lifecycle
Ollama now starts on API request and stops after 5min idle, saving ~5-6GB RAM when not in use. Part of server-wide resource caps rollout. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
dbfddb2fb5
commit
bed124f869
|
|
@ -1969,6 +1969,7 @@ app.post("/api/prompt", async (c) => {
|
|||
|
||||
if (OLLAMA_MODELS[model]) {
|
||||
try {
|
||||
await ensureSidecar("ollama");
|
||||
const ollamaRes = await fetch(`${OLLAMA_URL}/api/chat`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
|
|
@ -1989,6 +1990,7 @@ app.post("/api/prompt", async (c) => {
|
|||
}
|
||||
|
||||
const data = await ollamaRes.json();
|
||||
markSidecarUsed("ollama");
|
||||
return c.json({ content: data.message?.content || "" });
|
||||
} catch (e: any) {
|
||||
console.error("[prompt] Ollama unreachable:", e.message);
|
||||
|
|
|
|||
|
|
@ -39,6 +39,12 @@ const SIDECARS: Record<string, SidecarConfig> = {
|
|||
port: 8810,
|
||||
healthTimeout: 15_000,
|
||||
},
|
||||
ollama: {
|
||||
container: "ollama",
|
||||
host: "ollama",
|
||||
port: 11434,
|
||||
healthTimeout: 30_000,
|
||||
},
|
||||
};
|
||||
|
||||
const lastUsed = new Map<string, number>();
|
||||
|
|
|
|||
Loading…
Reference in New Issue