From ee251fd621d6d86186fac6f6d79f95a085ed05c5 Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Thu, 16 Apr 2026 18:49:52 -0400 Subject: [PATCH] refactor(sidecar): delegate lifecycle to Sablier instead of Docker socket MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the custom Docker Engine API implementation in sidecar-manager.ts with HTTP calls to Sablier's blocking strategy endpoint. Sablier owns the Docker socket, handles start + readiness + session-TTL idle stop. - Drops ~80 lines of Docker API plumbing and the idle-watcher interval - Public API (ensureSidecar/markSidecarUsed/isSidecarRunning/startIdleWatcher) unchanged — callers in server/index.ts untouched - SABLIER_URL defaults to http://sablier:10000 (reachable once sablier is attached to rspace-online_rspace-internal; dev-ops change separate) - SIDECAR_SESSION_DURATION env (default 5m) matches previous idle timeout - Graceful no-op when Sablier unreachable (local dev) Co-Authored-By: Claude Opus 4.7 (1M context) --- server/sidecar-manager.ts | 285 ++++++++++++-------------------------- 1 file changed, 89 insertions(+), 196 deletions(-) diff --git a/server/sidecar-manager.ts b/server/sidecar-manager.ts index 18079123..91e6f5ab 100644 --- a/server/sidecar-manager.ts +++ b/server/sidecar-manager.ts @@ -1,232 +1,125 @@ /** - * Sidecar Lifecycle Manager — starts Docker containers on demand, stops after idle. + * Sidecar Lifecycle Manager — on-demand container wake-up via Sablier. * - * Uses Docker Engine API over Unix socket to control sidecar containers - * (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7. + * Delegates container start / idle-stop to the Sablier service reachable at + * SABLIER_URL (default http://sablier:10000 on the rspace-internal network). + * Sablier uses the Docker Engine API on its own socket mount to start named + * containers and stops them after the session TTL expires with no refresh. * - * Requires /var/run/docker.sock mounted into the rspace container. + * Public API is unchanged from the previous Docker-socket implementation so + * callers in server/index.ts do not need to change. */ -import http from "node:http"; -import fs from "node:fs"; - -const DOCKER_SOCKET = "/var/run/docker.sock"; -const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes - interface SidecarConfig { container: string; host: string; port: number; - healthTimeout: number; // max ms to wait for ready + /** Max ms to block waiting for the container to become ready. */ + healthTimeout: number; } const SIDECARS: Record = { - "kicad-mcp": { - container: "kicad-mcp", - host: "kicad-mcp", - port: 8809, - healthTimeout: 45_000, // KiCad takes a while to init - }, - "freecad-mcp": { - container: "freecad-mcp", - host: "freecad-mcp", - port: 8808, - healthTimeout: 30_000, - }, - "blender-worker": { - container: "blender-worker", - host: "blender-worker", - port: 8810, - healthTimeout: 15_000, - }, - ollama: { - container: "ollama", - host: "ollama", - port: 11434, - healthTimeout: 30_000, - }, - "scribus-novnc": { - container: "scribus-novnc", - host: "scribus-novnc", - port: 8765, - healthTimeout: 30_000, - }, - "open-notebook": { - container: "open-notebook", - host: "open-notebook", - port: 5055, - healthTimeout: 45_000, - }, + "kicad-mcp": { container: "kicad-mcp", host: "kicad-mcp", port: 8809, healthTimeout: 45_000 }, + "freecad-mcp": { container: "freecad-mcp", host: "freecad-mcp", port: 8808, healthTimeout: 30_000 }, + "blender-worker":{ container: "blender-worker",host: "blender-worker",port: 8810, healthTimeout: 15_000 }, + "ollama": { container: "ollama", host: "ollama", port: 11434, healthTimeout: 30_000 }, + "scribus-novnc": { container: "scribus-novnc", host: "scribus-novnc", port: 8765, healthTimeout: 30_000 }, + "open-notebook": { container: "open-notebook", host: "open-notebook", port: 5055, healthTimeout: 45_000 }, }; -const lastUsed = new Map(); -let idleCheckTimer: ReturnType | null = null; -let dockerAvailable = false; +const SABLIER_URL = process.env.SABLIER_URL || "http://sablier:10000"; +const SESSION_DURATION = process.env.SIDECAR_SESSION_DURATION || "5m"; -// Check Docker socket on load -try { - fs.accessSync(DOCKER_SOCKET); - dockerAvailable = true; -} catch { - console.log("[sidecar] Docker socket not available — lifecycle management disabled"); -} +let sablierReachable: boolean | null = null; -// ── Docker Engine API over Unix socket ── - -function dockerApi(method: string, path: string, sendBody?: boolean): Promise<{ status: number; body: any }> { - return new Promise((resolve, reject) => { - const headers: Record = {}; - // Only set Content-Type when we actually send a JSON body - if (sendBody) headers["Content-Type"] = "application/json"; - const req = http.request( - { - socketPath: DOCKER_SOCKET, - path: `/v1.44${path}`, - method, - headers, - }, - (res) => { - let data = ""; - res.on("data", (chunk) => (data += chunk)); - res.on("end", () => { - let body: any = data; - try { - body = JSON.parse(data); - } catch {} - resolve({ status: res.statusCode || 0, body }); - }); - }, - ); - req.on("error", reject); - req.setTimeout(10_000, () => { - req.destroy(new Error("Docker API timeout")); - }); - req.end(); - }); -} - -async function isContainerRunning(name: string): Promise { +async function probeSablier(): Promise { + if (sablierReachable !== null) return sablierReachable; try { - const { body } = await dockerApi("GET", `/containers/${name}/json`); - return body?.State?.Running === true; + const res = await fetch(`${SABLIER_URL}/health`, { signal: AbortSignal.timeout(2000) }); + sablierReachable = res.ok; } catch { - return false; + sablierReachable = false; + console.log("[sidecar] Sablier unreachable at", SABLIER_URL, "— lifecycle management disabled"); } + return sablierReachable; } -async function startContainer(name: string): Promise { - const { status, body } = await dockerApi("POST", `/containers/${name}/start`); - // 204 = started, 304 = already running - if (status !== 204 && status !== 304) { - const detail = typeof body === "object" ? JSON.stringify(body) : body; - throw new Error(`Failed to start ${name}: HTTP ${status} — ${detail}`); +/** + * GET /api/strategies/blocking — Sablier starts the named container, waits + * for it to be ready (per its own health check policy), and returns 200. + * 202 = still starting past our timeout; we proceed anyway and let the + * caller's request retry logic handle the brief window. + */ +async function sablierWake(config: SidecarConfig): Promise { + const qs = new URLSearchParams({ + names: config.container, + session_duration: SESSION_DURATION, + timeout: `${Math.max(1, Math.floor(config.healthTimeout / 1000))}s`, + }); + const url = `${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`; + const res = await fetch(url, { signal: AbortSignal.timeout(config.healthTimeout + 5_000) }); + if (!res.ok && res.status !== 202) { + throw new Error(`Sablier wake returned ${res.status} for ${config.container}`); } } -async function stopContainer(name: string): Promise { - try { - await dockerApi("POST", `/containers/${name}/stop?t=10`); - console.log(`[sidecar] Stopped ${name}`); - } catch (e) { - console.warn(`[sidecar] Failed to stop ${name}:`, e); - } -} - -/** Wait until the sidecar's HTTP port accepts connections */ -async function waitForReady(config: SidecarConfig): Promise { - const deadline = Date.now() + config.healthTimeout; - const url = - config.container === "blender-worker" - ? `http://${config.host}:${config.port}/health` - : `http://${config.host}:${config.port}/`; - - while (Date.now() < deadline) { - try { - await fetch(url, { signal: AbortSignal.timeout(2000) }); - return; // Any response means the server is up - } catch { - // Connection refused or timeout — retry - } - await new Promise((r) => setTimeout(r, 1000)); - } - console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`); -} - // ── Public API ── /** - * Ensure a sidecar container is running and ready. - * Starts the container if stopped, waits for health, updates last-used timestamp. - * No-op if Docker socket is not available (local dev). + * Ensure the named sidecar is running and ready. Extends the Sablier session + * TTL as a side effect. Silent no-op when Sablier isn't reachable (local dev). */ export async function ensureSidecar(name: string): Promise { const config = SIDECARS[name]; if (!config) throw new Error(`Unknown sidecar: ${name}`); + if (!(await probeSablier())) return; - lastUsed.set(name, Date.now()); - - if (!dockerAvailable) return; - - const running = await isContainerRunning(config.container); - if (running) return; - - console.log(`[sidecar] Starting ${name}...`); - await startContainer(config.container); - await waitForReady(config); - console.log(`[sidecar] ${name} ready`); -} - -/** Update last-used timestamp (call after long-running operations complete) */ -export function markSidecarUsed(name: string): void { - lastUsed.set(name, Date.now()); -} - -/** Check if a sidecar container is currently running (for health endpoints) */ -export async function isSidecarRunning(name: string): Promise { - if (!dockerAvailable) return true; // Assume running in local dev - const config = SIDECARS[name]; - if (!config) return false; - return isContainerRunning(config.container); -} - -// ── Idle watcher ── - -async function checkIdleContainers(): Promise { - if (!dockerAvailable) return; - const now = Date.now(); - - for (const [name, config] of Object.entries(SIDECARS)) { - let running: boolean; - try { - running = await isContainerRunning(config.container); - } catch { - continue; - } - if (!running) continue; - - const last = lastUsed.get(name); - if (!last) { - // Running but never used via API in this session — stop it - console.log(`[sidecar] Stopping unused ${name}`); - await stopContainer(config.container); - continue; - } - - if (now - last > IDLE_TIMEOUT_MS) { - console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`); - await stopContainer(config.container); - lastUsed.delete(name); - } + try { + await sablierWake(config); + } catch (e) { + console.warn(`[sidecar] Wake failed for ${name}:`, e instanceof Error ? e.message : e); } } -/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */ -export function startIdleWatcher(): void { - if (!dockerAvailable) return; - if (idleCheckTimer) return; - idleCheckTimer = setInterval(checkIdleContainers, 60_000); - console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`); - - // Initial check after 30s — stop any sidecars that were left running from a previous deploy - setTimeout(checkIdleContainers, 30_000); +/** + * Refresh the session TTL without blocking on readiness — call after a + * long-running operation completes so the sidecar stays warm for follow-ups. + */ +export function markSidecarUsed(name: string): void { + const config = SIDECARS[name]; + if (!config || sablierReachable === false) return; + const qs = new URLSearchParams({ names: config.container, session_duration: SESSION_DURATION }); + // Fire-and-forget; readiness already verified earlier via ensureSidecar. + fetch(`${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`, { signal: AbortSignal.timeout(2000) }) + .catch(() => {}); +} + +/** + * Probe whether the sidecar's own HTTP port is accepting connections. + * Used by health endpoints; falls back to "assume running" when Sablier is + * unreachable so local dev health checks don't fail. + */ +export async function isSidecarRunning(name: string): Promise { + const config = SIDECARS[name]; + if (!config) return false; + if (!(await probeSablier())) return true; + try { + const url = config.container === "blender-worker" + ? `http://${config.host}:${config.port}/health` + : `http://${config.host}:${config.port}/`; + const res = await fetch(url, { signal: AbortSignal.timeout(1500) }); + return res.status < 500; + } catch { + return false; + } +} + +/** + * No-op in the Sablier era — idle shutdown is handled by Sablier's own + * session expiration (SESSION_DURATION). Kept for API compatibility. + */ +export function startIdleWatcher(): void { + probeSablier().then((ok) => { + if (ok) console.log(`[sidecar] Lifecycle delegated to Sablier at ${SABLIER_URL} (ttl ${SESSION_DURATION})`); + }); }