refactor(sidecar): delegate lifecycle to Sablier instead of Docker socket

Replaces the custom Docker Engine API implementation in sidecar-manager.ts
with HTTP calls to Sablier's blocking strategy endpoint. Sablier owns the
Docker socket, handles start + readiness + session-TTL idle stop.

- Drops ~80 lines of Docker API plumbing and the idle-watcher interval
- Public API (ensureSidecar/markSidecarUsed/isSidecarRunning/startIdleWatcher)
  unchanged — callers in server/index.ts untouched
- SABLIER_URL defaults to http://sablier:10000 (reachable once sablier is
  attached to rspace-online_rspace-internal; dev-ops change separate)
- SIDECAR_SESSION_DURATION env (default 5m) matches previous idle timeout
- Graceful no-op when Sablier unreachable (local dev)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-04-16 18:49:52 -04:00
parent 7ada95b46a
commit ee251fd621
1 changed files with 89 additions and 196 deletions

View File

@ -1,232 +1,125 @@
/**
* Sidecar Lifecycle Manager starts Docker containers on demand, stops after idle.
* Sidecar Lifecycle Manager on-demand container wake-up via Sablier.
*
* Uses Docker Engine API over Unix socket to control sidecar containers
* (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7.
* Delegates container start / idle-stop to the Sablier service reachable at
* SABLIER_URL (default http://sablier:10000 on the rspace-internal network).
* Sablier uses the Docker Engine API on its own socket mount to start named
* containers and stops them after the session TTL expires with no refresh.
*
* Requires /var/run/docker.sock mounted into the rspace container.
* Public API is unchanged from the previous Docker-socket implementation so
* callers in server/index.ts do not need to change.
*/
import http from "node:http";
import fs from "node:fs";
const DOCKER_SOCKET = "/var/run/docker.sock";
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
interface SidecarConfig {
container: string;
host: string;
port: number;
healthTimeout: number; // max ms to wait for ready
/** Max ms to block waiting for the container to become ready. */
healthTimeout: number;
}
const SIDECARS: Record<string, SidecarConfig> = {
"kicad-mcp": {
container: "kicad-mcp",
host: "kicad-mcp",
port: 8809,
healthTimeout: 45_000, // KiCad takes a while to init
},
"freecad-mcp": {
container: "freecad-mcp",
host: "freecad-mcp",
port: 8808,
healthTimeout: 30_000,
},
"blender-worker": {
container: "blender-worker",
host: "blender-worker",
port: 8810,
healthTimeout: 15_000,
},
ollama: {
container: "ollama",
host: "ollama",
port: 11434,
healthTimeout: 30_000,
},
"scribus-novnc": {
container: "scribus-novnc",
host: "scribus-novnc",
port: 8765,
healthTimeout: 30_000,
},
"open-notebook": {
container: "open-notebook",
host: "open-notebook",
port: 5055,
healthTimeout: 45_000,
},
"kicad-mcp": { container: "kicad-mcp", host: "kicad-mcp", port: 8809, healthTimeout: 45_000 },
"freecad-mcp": { container: "freecad-mcp", host: "freecad-mcp", port: 8808, healthTimeout: 30_000 },
"blender-worker":{ container: "blender-worker",host: "blender-worker",port: 8810, healthTimeout: 15_000 },
"ollama": { container: "ollama", host: "ollama", port: 11434, healthTimeout: 30_000 },
"scribus-novnc": { container: "scribus-novnc", host: "scribus-novnc", port: 8765, healthTimeout: 30_000 },
"open-notebook": { container: "open-notebook", host: "open-notebook", port: 5055, healthTimeout: 45_000 },
};
const lastUsed = new Map<string, number>();
let idleCheckTimer: ReturnType<typeof setInterval> | null = null;
let dockerAvailable = false;
const SABLIER_URL = process.env.SABLIER_URL || "http://sablier:10000";
const SESSION_DURATION = process.env.SIDECAR_SESSION_DURATION || "5m";
// Check Docker socket on load
try {
fs.accessSync(DOCKER_SOCKET);
dockerAvailable = true;
} catch {
console.log("[sidecar] Docker socket not available — lifecycle management disabled");
}
let sablierReachable: boolean | null = null;
// ── Docker Engine API over Unix socket ──
function dockerApi(method: string, path: string, sendBody?: boolean): Promise<{ status: number; body: any }> {
return new Promise((resolve, reject) => {
const headers: Record<string, string> = {};
// Only set Content-Type when we actually send a JSON body
if (sendBody) headers["Content-Type"] = "application/json";
const req = http.request(
{
socketPath: DOCKER_SOCKET,
path: `/v1.44${path}`,
method,
headers,
},
(res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
let body: any = data;
try {
body = JSON.parse(data);
} catch {}
resolve({ status: res.statusCode || 0, body });
});
},
);
req.on("error", reject);
req.setTimeout(10_000, () => {
req.destroy(new Error("Docker API timeout"));
});
req.end();
});
}
async function isContainerRunning(name: string): Promise<boolean> {
async function probeSablier(): Promise<boolean> {
if (sablierReachable !== null) return sablierReachable;
try {
const { body } = await dockerApi("GET", `/containers/${name}/json`);
return body?.State?.Running === true;
const res = await fetch(`${SABLIER_URL}/health`, { signal: AbortSignal.timeout(2000) });
sablierReachable = res.ok;
} catch {
return false;
sablierReachable = false;
console.log("[sidecar] Sablier unreachable at", SABLIER_URL, "— lifecycle management disabled");
}
return sablierReachable;
}
async function startContainer(name: string): Promise<void> {
const { status, body } = await dockerApi("POST", `/containers/${name}/start`);
// 204 = started, 304 = already running
if (status !== 204 && status !== 304) {
const detail = typeof body === "object" ? JSON.stringify(body) : body;
throw new Error(`Failed to start ${name}: HTTP ${status}${detail}`);
/**
* GET /api/strategies/blocking Sablier starts the named container, waits
* for it to be ready (per its own health check policy), and returns 200.
* 202 = still starting past our timeout; we proceed anyway and let the
* caller's request retry logic handle the brief window.
*/
async function sablierWake(config: SidecarConfig): Promise<void> {
const qs = new URLSearchParams({
names: config.container,
session_duration: SESSION_DURATION,
timeout: `${Math.max(1, Math.floor(config.healthTimeout / 1000))}s`,
});
const url = `${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`;
const res = await fetch(url, { signal: AbortSignal.timeout(config.healthTimeout + 5_000) });
if (!res.ok && res.status !== 202) {
throw new Error(`Sablier wake returned ${res.status} for ${config.container}`);
}
}
async function stopContainer(name: string): Promise<void> {
try {
await dockerApi("POST", `/containers/${name}/stop?t=10`);
console.log(`[sidecar] Stopped ${name}`);
} catch (e) {
console.warn(`[sidecar] Failed to stop ${name}:`, e);
}
}
/** Wait until the sidecar's HTTP port accepts connections */
async function waitForReady(config: SidecarConfig): Promise<void> {
const deadline = Date.now() + config.healthTimeout;
const url =
config.container === "blender-worker"
? `http://${config.host}:${config.port}/health`
: `http://${config.host}:${config.port}/`;
while (Date.now() < deadline) {
try {
await fetch(url, { signal: AbortSignal.timeout(2000) });
return; // Any response means the server is up
} catch {
// Connection refused or timeout — retry
}
await new Promise((r) => setTimeout(r, 1000));
}
console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`);
}
// ── Public API ──
/**
* Ensure a sidecar container is running and ready.
* Starts the container if stopped, waits for health, updates last-used timestamp.
* No-op if Docker socket is not available (local dev).
* Ensure the named sidecar is running and ready. Extends the Sablier session
* TTL as a side effect. Silent no-op when Sablier isn't reachable (local dev).
*/
export async function ensureSidecar(name: string): Promise<void> {
const config = SIDECARS[name];
if (!config) throw new Error(`Unknown sidecar: ${name}`);
if (!(await probeSablier())) return;
lastUsed.set(name, Date.now());
if (!dockerAvailable) return;
const running = await isContainerRunning(config.container);
if (running) return;
console.log(`[sidecar] Starting ${name}...`);
await startContainer(config.container);
await waitForReady(config);
console.log(`[sidecar] ${name} ready`);
}
/** Update last-used timestamp (call after long-running operations complete) */
export function markSidecarUsed(name: string): void {
lastUsed.set(name, Date.now());
}
/** Check if a sidecar container is currently running (for health endpoints) */
export async function isSidecarRunning(name: string): Promise<boolean> {
if (!dockerAvailable) return true; // Assume running in local dev
const config = SIDECARS[name];
if (!config) return false;
return isContainerRunning(config.container);
}
// ── Idle watcher ──
async function checkIdleContainers(): Promise<void> {
if (!dockerAvailable) return;
const now = Date.now();
for (const [name, config] of Object.entries(SIDECARS)) {
let running: boolean;
try {
running = await isContainerRunning(config.container);
} catch {
continue;
}
if (!running) continue;
const last = lastUsed.get(name);
if (!last) {
// Running but never used via API in this session — stop it
console.log(`[sidecar] Stopping unused ${name}`);
await stopContainer(config.container);
continue;
}
if (now - last > IDLE_TIMEOUT_MS) {
console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`);
await stopContainer(config.container);
lastUsed.delete(name);
}
try {
await sablierWake(config);
} catch (e) {
console.warn(`[sidecar] Wake failed for ${name}:`, e instanceof Error ? e.message : e);
}
}
/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */
export function startIdleWatcher(): void {
if (!dockerAvailable) return;
if (idleCheckTimer) return;
idleCheckTimer = setInterval(checkIdleContainers, 60_000);
console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`);
// Initial check after 30s — stop any sidecars that were left running from a previous deploy
setTimeout(checkIdleContainers, 30_000);
/**
* Refresh the session TTL without blocking on readiness call after a
* long-running operation completes so the sidecar stays warm for follow-ups.
*/
export function markSidecarUsed(name: string): void {
const config = SIDECARS[name];
if (!config || sablierReachable === false) return;
const qs = new URLSearchParams({ names: config.container, session_duration: SESSION_DURATION });
// Fire-and-forget; readiness already verified earlier via ensureSidecar.
fetch(`${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`, { signal: AbortSignal.timeout(2000) })
.catch(() => {});
}
/**
* Probe whether the sidecar's own HTTP port is accepting connections.
* Used by health endpoints; falls back to "assume running" when Sablier is
* unreachable so local dev health checks don't fail.
*/
export async function isSidecarRunning(name: string): Promise<boolean> {
const config = SIDECARS[name];
if (!config) return false;
if (!(await probeSablier())) return true;
try {
const url = config.container === "blender-worker"
? `http://${config.host}:${config.port}/health`
: `http://${config.host}:${config.port}/`;
const res = await fetch(url, { signal: AbortSignal.timeout(1500) });
return res.status < 500;
} catch {
return false;
}
}
/**
* No-op in the Sablier era idle shutdown is handled by Sablier's own
* session expiration (SESSION_DURATION). Kept for API compatibility.
*/
export function startIdleWatcher(): void {
probeSablier().then((ok) => {
if (ok) console.log(`[sidecar] Lifecycle delegated to Sablier at ${SABLIER_URL} (ttl ${SESSION_DURATION})`);
});
}