feat(infra): on-demand sidecar lifecycle + resource caps
KiCad, FreeCAD, and Blender sidecars now start on API request and stop after 5min idle, saving ~8GB RAM when not in use. Docker socket mounted into rspace container for container lifecycle control. Memory/CPU limits added to all services to prevent runaway resource consumption. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
26aa6433be
commit
dbfddb2fb5
|
|
@ -15,6 +15,7 @@ services:
|
|||
- rspace-docs:/data/docs
|
||||
- rspace-backups:/data/backups
|
||||
- /opt/dev-ops:/repos/dev-ops:rw
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
environment:
|
||||
- NODE_ENV=production
|
||||
- STORAGE_DIR=/data/communities
|
||||
|
|
@ -166,6 +167,8 @@ services:
|
|||
# Service configuration
|
||||
- "traefik.http.services.rspace-online.loadbalancer.server.port=3000"
|
||||
- "traefik.docker.network=traefik-public"
|
||||
mem_limit: 1536m
|
||||
cpus: 2
|
||||
networks:
|
||||
- traefik-public
|
||||
- rspace-internal
|
||||
|
|
@ -177,6 +180,8 @@ services:
|
|||
image: postgres:16-alpine
|
||||
container_name: rspace-db
|
||||
restart: unless-stopped
|
||||
mem_limit: 256m
|
||||
cpus: 1
|
||||
volumes:
|
||||
- rspace-pgdata:/var/lib/postgresql/data
|
||||
- ./db/init.sql:/docker-entrypoint-initdb.d/init.sql:ro
|
||||
|
|
@ -201,6 +206,8 @@ services:
|
|||
encryptid-sdk: ../encryptid-sdk
|
||||
container_name: encryptid
|
||||
restart: unless-stopped
|
||||
mem_limit: 256m
|
||||
cpus: 1
|
||||
depends_on:
|
||||
encryptid-db:
|
||||
condition: service_healthy
|
||||
|
|
@ -248,6 +255,8 @@ services:
|
|||
image: postgres:16-alpine
|
||||
container_name: encryptid-db
|
||||
restart: unless-stopped
|
||||
mem_limit: 256m
|
||||
cpus: 1
|
||||
environment:
|
||||
- POSTGRES_DB=encryptid
|
||||
- POSTGRES_USER=encryptid
|
||||
|
|
@ -263,31 +272,43 @@ services:
|
|||
retries: 5
|
||||
start_period: 10s
|
||||
|
||||
# ── KiCad MCP sidecar (PCB design via SSE) ──
|
||||
# ── On-demand sidecars (started/stopped by server/sidecar-manager.ts) ──
|
||||
# Build: docker compose --profile sidecar build
|
||||
# Create: docker compose --profile sidecar create
|
||||
# These containers are NOT started with `docker compose up -d`.
|
||||
# The rspace server starts them on API request and stops them after 5min idle.
|
||||
|
||||
kicad-mcp:
|
||||
build: ./docker/kicad-mcp
|
||||
container_name: kicad-mcp
|
||||
restart: unless-stopped
|
||||
restart: "no"
|
||||
profiles: ["sidecar"]
|
||||
mem_limit: 2g
|
||||
cpus: 1
|
||||
volumes:
|
||||
- rspace-files:/data/files
|
||||
networks:
|
||||
- rspace-internal
|
||||
|
||||
# ── FreeCAD MCP sidecar (3D CAD via SSE) ──
|
||||
freecad-mcp:
|
||||
build: ./docker/freecad-mcp
|
||||
container_name: freecad-mcp
|
||||
restart: unless-stopped
|
||||
restart: "no"
|
||||
profiles: ["sidecar"]
|
||||
mem_limit: 1g
|
||||
cpus: 1
|
||||
volumes:
|
||||
- rspace-files:/data/files
|
||||
networks:
|
||||
- rspace-internal
|
||||
|
||||
# ── Blender headless render worker ──
|
||||
blender-worker:
|
||||
build: ./docker/blender-worker
|
||||
container_name: blender-worker
|
||||
restart: unless-stopped
|
||||
restart: "no"
|
||||
profiles: ["sidecar"]
|
||||
mem_limit: 1g
|
||||
cpus: 2
|
||||
volumes:
|
||||
- rspace-files:/data/files
|
||||
networks:
|
||||
|
|
@ -299,6 +320,8 @@ services:
|
|||
context: ./docker/scribus-novnc
|
||||
container_name: scribus-novnc
|
||||
restart: unless-stopped
|
||||
mem_limit: 512m
|
||||
cpus: 1
|
||||
volumes:
|
||||
- scribus-designs:/data/designs
|
||||
- rspace-files:/data/files
|
||||
|
|
@ -325,6 +348,8 @@ services:
|
|||
image: ghcr.io/lfnovo/open-notebook:v1-latest-single
|
||||
container_name: open-notebook
|
||||
restart: always
|
||||
mem_limit: 1g
|
||||
cpus: 1
|
||||
env_file: ./open-notebook.env
|
||||
volumes:
|
||||
- open-notebook-data:/app/data
|
||||
|
|
|
|||
|
|
@ -1575,12 +1575,15 @@ app.get("/api/blender-gen/health", async (c) => {
|
|||
const warnings: string[] = [];
|
||||
if (!GEMINI_API_KEY) issues.push("GEMINI_API_KEY not configured");
|
||||
|
||||
// Check blender-worker health
|
||||
try {
|
||||
const res = await fetch(`${BLENDER_WORKER_URL}/health`, { signal: AbortSignal.timeout(3000) });
|
||||
if (!res.ok) warnings.push("blender-worker unhealthy");
|
||||
} catch {
|
||||
warnings.push("blender-worker unreachable — script-only mode");
|
||||
const running = await isSidecarRunning("blender-worker");
|
||||
if (!running) warnings.push("blender-worker stopped (will start on demand)");
|
||||
else {
|
||||
try {
|
||||
const res = await fetch(`${BLENDER_WORKER_URL}/health`, { signal: AbortSignal.timeout(3000) });
|
||||
if (!res.ok) warnings.push("blender-worker unhealthy");
|
||||
} catch {
|
||||
warnings.push("blender-worker unreachable");
|
||||
}
|
||||
}
|
||||
|
||||
return c.json({ available: issues.length === 0, issues, warnings });
|
||||
|
|
@ -1622,8 +1625,9 @@ Output ONLY the Python code, no explanations or comments outside the code.`);
|
|||
return c.json({ error: "Failed to generate Blender script" }, 502);
|
||||
}
|
||||
|
||||
// Step 2: Execute on blender-worker sidecar (headless Blender)
|
||||
// Step 2: Start sidecar on demand, execute on blender-worker
|
||||
try {
|
||||
await ensureSidecar("blender-worker");
|
||||
const workerRes = await fetch(`${BLENDER_WORKER_URL}/render`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
|
|
@ -1639,6 +1643,8 @@ Output ONLY the Python code, no explanations or comments outside the code.`);
|
|||
stderr?: string;
|
||||
};
|
||||
|
||||
markSidecarUsed("blender-worker");
|
||||
|
||||
if (data.success && data.render_url) {
|
||||
return c.json({ script, render_url: data.render_url });
|
||||
}
|
||||
|
|
@ -1660,11 +1666,13 @@ Output ONLY the Python code, no explanations or comments outside the code.`);
|
|||
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
||||
import { StreamableHTTPClientTransport } from "@modelcontextprotocol/sdk/client/streamableHttp.js";
|
||||
import { runCadAgentLoop, assembleKicadResult, assembleFreecadResult, KICAD_SYSTEM_PROMPT, FREECAD_SYSTEM_PROMPT } from "./cad-orchestrator";
|
||||
import { ensureSidecar, markSidecarUsed, isSidecarRunning, startIdleWatcher } from "./sidecar-manager";
|
||||
|
||||
const KICAD_MCP_URL = process.env.KICAD_MCP_URL || "http://kicad-mcp:8809/mcp";
|
||||
let kicadClient: Client | null = null;
|
||||
|
||||
async function getKicadClient(): Promise<Client> {
|
||||
await ensureSidecar("kicad-mcp");
|
||||
if (kicadClient) return kicadClient;
|
||||
|
||||
const transport = new StreamableHTTPClientTransport(new URL(KICAD_MCP_URL));
|
||||
|
|
@ -1680,6 +1688,8 @@ async function getKicadClient(): Promise<Client> {
|
|||
|
||||
app.get("/api/kicad/health", async (c) => {
|
||||
try {
|
||||
const running = await isSidecarRunning("kicad-mcp");
|
||||
if (!running) return c.json({ available: false, status: "stopped (starts on demand)" });
|
||||
const client = await getKicadClient();
|
||||
const tools = await client.listTools();
|
||||
return c.json({ available: true, tools: tools.tools.length });
|
||||
|
|
@ -1703,8 +1713,7 @@ app.post("/api/kicad/generate", async (c) => {
|
|||
const client = await getKicadClient();
|
||||
const orch = await runCadAgentLoop(client, KICAD_SYSTEM_PROMPT, enrichedPrompt, GEMINI_API_KEY);
|
||||
const result = assembleKicadResult(orch);
|
||||
|
||||
// Files are already on the shared /data/files volume — no copy needed
|
||||
markSidecarUsed("kicad-mcp");
|
||||
|
||||
return c.json({
|
||||
schematic_svg: result.schematicSvg,
|
||||
|
|
@ -1764,6 +1773,7 @@ const FREECAD_MCP_URL = process.env.FREECAD_MCP_URL || "http://freecad-mcp:8808/
|
|||
let freecadClient: Client | null = null;
|
||||
|
||||
async function getFreecadClient(): Promise<Client> {
|
||||
await ensureSidecar("freecad-mcp");
|
||||
if (freecadClient) return freecadClient;
|
||||
|
||||
const transport = new StreamableHTTPClientTransport(new URL(FREECAD_MCP_URL));
|
||||
|
|
@ -1779,6 +1789,8 @@ async function getFreecadClient(): Promise<Client> {
|
|||
|
||||
app.get("/api/freecad/health", async (c) => {
|
||||
try {
|
||||
const running = await isSidecarRunning("freecad-mcp");
|
||||
if (!running) return c.json({ available: false, status: "stopped (starts on demand)" });
|
||||
const client = await getFreecadClient();
|
||||
const tools = await client.listTools();
|
||||
return c.json({ available: true, tools: tools.tools.length });
|
||||
|
|
@ -1798,8 +1810,7 @@ app.post("/api/freecad/generate", async (c) => {
|
|||
const client = await getFreecadClient();
|
||||
const orch = await runCadAgentLoop(client, FREECAD_SYSTEM_PROMPT, prompt, GEMINI_API_KEY);
|
||||
const result = assembleFreecadResult(orch);
|
||||
|
||||
// Files are already on the shared /data/files volume — no copy needed
|
||||
markSidecarUsed("freecad-mcp");
|
||||
|
||||
return c.json({
|
||||
preview_url: result.previewUrl,
|
||||
|
|
@ -3757,3 +3768,6 @@ loadAllDocs(syncServer)
|
|||
|
||||
console.log(`rSpace unified server running on http://localhost:${PORT}`);
|
||||
console.log(`Modules: ${getAllModules().map((m) => `${m.icon} ${m.name}`).join(", ")}`);
|
||||
|
||||
// Start sidecar lifecycle manager — stops idle containers after 5min
|
||||
startIdleWatcher();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,210 @@
|
|||
/**
|
||||
* Sidecar Lifecycle Manager — starts Docker containers on demand, stops after idle.
|
||||
*
|
||||
* Uses Docker Engine API over Unix socket to control sidecar containers
|
||||
* (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7.
|
||||
*
|
||||
* Requires /var/run/docker.sock mounted into the rspace container.
|
||||
*/
|
||||
|
||||
import http from "node:http";
|
||||
import fs from "node:fs";
|
||||
|
||||
const DOCKER_SOCKET = "/var/run/docker.sock";
|
||||
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
interface SidecarConfig {
|
||||
container: string;
|
||||
host: string;
|
||||
port: number;
|
||||
healthTimeout: number; // max ms to wait for ready
|
||||
}
|
||||
|
||||
const SIDECARS: Record<string, SidecarConfig> = {
|
||||
"kicad-mcp": {
|
||||
container: "kicad-mcp",
|
||||
host: "kicad-mcp",
|
||||
port: 8809,
|
||||
healthTimeout: 45_000, // KiCad takes a while to init
|
||||
},
|
||||
"freecad-mcp": {
|
||||
container: "freecad-mcp",
|
||||
host: "freecad-mcp",
|
||||
port: 8808,
|
||||
healthTimeout: 30_000,
|
||||
},
|
||||
"blender-worker": {
|
||||
container: "blender-worker",
|
||||
host: "blender-worker",
|
||||
port: 8810,
|
||||
healthTimeout: 15_000,
|
||||
},
|
||||
};
|
||||
|
||||
const lastUsed = new Map<string, number>();
|
||||
let idleCheckTimer: ReturnType<typeof setInterval> | null = null;
|
||||
let dockerAvailable = false;
|
||||
|
||||
// Check Docker socket on load
|
||||
try {
|
||||
fs.accessSync(DOCKER_SOCKET);
|
||||
dockerAvailable = true;
|
||||
} catch {
|
||||
console.log("[sidecar] Docker socket not available — lifecycle management disabled");
|
||||
}
|
||||
|
||||
// ── Docker Engine API over Unix socket ──
|
||||
|
||||
function dockerApi(method: string, path: string): Promise<{ status: number; body: any }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const req = http.request(
|
||||
{
|
||||
socketPath: DOCKER_SOCKET,
|
||||
path: `/v1.43${path}`,
|
||||
method,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
(res) => {
|
||||
let data = "";
|
||||
res.on("data", (chunk) => (data += chunk));
|
||||
res.on("end", () => {
|
||||
let body: any = data;
|
||||
try {
|
||||
body = JSON.parse(data);
|
||||
} catch {}
|
||||
resolve({ status: res.statusCode || 0, body });
|
||||
});
|
||||
},
|
||||
);
|
||||
req.on("error", reject);
|
||||
req.setTimeout(10_000, () => {
|
||||
req.destroy(new Error("Docker API timeout"));
|
||||
});
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
async function isContainerRunning(name: string): Promise<boolean> {
|
||||
try {
|
||||
const { body } = await dockerApi("GET", `/containers/${name}/json`);
|
||||
return body?.State?.Running === true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
async function startContainer(name: string): Promise<void> {
|
||||
const { status } = await dockerApi("POST", `/containers/${name}/start`);
|
||||
// 204 = started, 304 = already running
|
||||
if (status !== 204 && status !== 304) {
|
||||
throw new Error(`Failed to start ${name}: HTTP ${status}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function stopContainer(name: string): Promise<void> {
|
||||
try {
|
||||
await dockerApi("POST", `/containers/${name}/stop?t=10`);
|
||||
console.log(`[sidecar] Stopped ${name}`);
|
||||
} catch (e) {
|
||||
console.warn(`[sidecar] Failed to stop ${name}:`, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Wait until the sidecar's HTTP port accepts connections */
|
||||
async function waitForReady(config: SidecarConfig): Promise<void> {
|
||||
const deadline = Date.now() + config.healthTimeout;
|
||||
const url =
|
||||
config.container === "blender-worker"
|
||||
? `http://${config.host}:${config.port}/health`
|
||||
: `http://${config.host}:${config.port}/`;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
await fetch(url, { signal: AbortSignal.timeout(2000) });
|
||||
return; // Any response means the server is up
|
||||
} catch {
|
||||
// Connection refused or timeout — retry
|
||||
}
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`);
|
||||
}
|
||||
|
||||
// ── Public API ──
|
||||
|
||||
/**
|
||||
* Ensure a sidecar container is running and ready.
|
||||
* Starts the container if stopped, waits for health, updates last-used timestamp.
|
||||
* No-op if Docker socket is not available (local dev).
|
||||
*/
|
||||
export async function ensureSidecar(name: string): Promise<void> {
|
||||
const config = SIDECARS[name];
|
||||
if (!config) throw new Error(`Unknown sidecar: ${name}`);
|
||||
|
||||
lastUsed.set(name, Date.now());
|
||||
|
||||
if (!dockerAvailable) return;
|
||||
|
||||
const running = await isContainerRunning(config.container);
|
||||
if (running) return;
|
||||
|
||||
console.log(`[sidecar] Starting ${name}...`);
|
||||
await startContainer(config.container);
|
||||
await waitForReady(config);
|
||||
console.log(`[sidecar] ${name} ready`);
|
||||
}
|
||||
|
||||
/** Update last-used timestamp (call after long-running operations complete) */
|
||||
export function markSidecarUsed(name: string): void {
|
||||
lastUsed.set(name, Date.now());
|
||||
}
|
||||
|
||||
/** Check if a sidecar container is currently running (for health endpoints) */
|
||||
export async function isSidecarRunning(name: string): Promise<boolean> {
|
||||
if (!dockerAvailable) return true; // Assume running in local dev
|
||||
const config = SIDECARS[name];
|
||||
if (!config) return false;
|
||||
return isContainerRunning(config.container);
|
||||
}
|
||||
|
||||
// ── Idle watcher ──
|
||||
|
||||
async function checkIdleContainers(): Promise<void> {
|
||||
if (!dockerAvailable) return;
|
||||
const now = Date.now();
|
||||
|
||||
for (const [name, config] of Object.entries(SIDECARS)) {
|
||||
let running: boolean;
|
||||
try {
|
||||
running = await isContainerRunning(config.container);
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!running) continue;
|
||||
|
||||
const last = lastUsed.get(name);
|
||||
if (!last) {
|
||||
// Running but never used via API in this session — stop it
|
||||
console.log(`[sidecar] Stopping unused ${name}`);
|
||||
await stopContainer(config.container);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (now - last > IDLE_TIMEOUT_MS) {
|
||||
console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`);
|
||||
await stopContainer(config.container);
|
||||
lastUsed.delete(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */
|
||||
export function startIdleWatcher(): void {
|
||||
if (!dockerAvailable) return;
|
||||
if (idleCheckTimer) return;
|
||||
idleCheckTimer = setInterval(checkIdleContainers, 60_000);
|
||||
console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`);
|
||||
|
||||
// Initial check after 30s — stop any sidecars that were left running from a previous deploy
|
||||
setTimeout(checkIdleContainers, 30_000);
|
||||
}
|
||||
Loading…
Reference in New Issue