refactor(sidecar): delegate lifecycle to Sablier instead of Docker socket
Replaces the custom Docker Engine API implementation in sidecar-manager.ts with HTTP calls to Sablier's blocking strategy endpoint. Sablier owns the Docker socket, handles start + readiness + session-TTL idle stop. - Drops ~80 lines of Docker API plumbing and the idle-watcher interval - Public API (ensureSidecar/markSidecarUsed/isSidecarRunning/startIdleWatcher) unchanged — callers in server/index.ts untouched - SABLIER_URL defaults to http://sablier:10000 (reachable once sablier is attached to rspace-online_rspace-internal; dev-ops change separate) - SIDECAR_SESSION_DURATION env (default 5m) matches previous idle timeout - Graceful no-op when Sablier unreachable (local dev) Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7ada95b46a
commit
ee251fd621
|
|
@ -1,232 +1,125 @@
|
|||
/**
|
||||
* Sidecar Lifecycle Manager — starts Docker containers on demand, stops after idle.
|
||||
* Sidecar Lifecycle Manager — on-demand container wake-up via Sablier.
|
||||
*
|
||||
* Uses Docker Engine API over Unix socket to control sidecar containers
|
||||
* (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7.
|
||||
* Delegates container start / idle-stop to the Sablier service reachable at
|
||||
* SABLIER_URL (default http://sablier:10000 on the rspace-internal network).
|
||||
* Sablier uses the Docker Engine API on its own socket mount to start named
|
||||
* containers and stops them after the session TTL expires with no refresh.
|
||||
*
|
||||
* Requires /var/run/docker.sock mounted into the rspace container.
|
||||
* Public API is unchanged from the previous Docker-socket implementation so
|
||||
* callers in server/index.ts do not need to change.
|
||||
*/
|
||||
|
||||
import http from "node:http";
|
||||
import fs from "node:fs";
|
||||
|
||||
const DOCKER_SOCKET = "/var/run/docker.sock";
|
||||
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
||||
|
||||
interface SidecarConfig {
|
||||
container: string;
|
||||
host: string;
|
||||
port: number;
|
||||
healthTimeout: number; // max ms to wait for ready
|
||||
/** Max ms to block waiting for the container to become ready. */
|
||||
healthTimeout: number;
|
||||
}
|
||||
|
||||
const SIDECARS: Record<string, SidecarConfig> = {
|
||||
"kicad-mcp": {
|
||||
container: "kicad-mcp",
|
||||
host: "kicad-mcp",
|
||||
port: 8809,
|
||||
healthTimeout: 45_000, // KiCad takes a while to init
|
||||
},
|
||||
"freecad-mcp": {
|
||||
container: "freecad-mcp",
|
||||
host: "freecad-mcp",
|
||||
port: 8808,
|
||||
healthTimeout: 30_000,
|
||||
},
|
||||
"blender-worker": {
|
||||
container: "blender-worker",
|
||||
host: "blender-worker",
|
||||
port: 8810,
|
||||
healthTimeout: 15_000,
|
||||
},
|
||||
ollama: {
|
||||
container: "ollama",
|
||||
host: "ollama",
|
||||
port: 11434,
|
||||
healthTimeout: 30_000,
|
||||
},
|
||||
"scribus-novnc": {
|
||||
container: "scribus-novnc",
|
||||
host: "scribus-novnc",
|
||||
port: 8765,
|
||||
healthTimeout: 30_000,
|
||||
},
|
||||
"open-notebook": {
|
||||
container: "open-notebook",
|
||||
host: "open-notebook",
|
||||
port: 5055,
|
||||
healthTimeout: 45_000,
|
||||
},
|
||||
"kicad-mcp": { container: "kicad-mcp", host: "kicad-mcp", port: 8809, healthTimeout: 45_000 },
|
||||
"freecad-mcp": { container: "freecad-mcp", host: "freecad-mcp", port: 8808, healthTimeout: 30_000 },
|
||||
"blender-worker":{ container: "blender-worker",host: "blender-worker",port: 8810, healthTimeout: 15_000 },
|
||||
"ollama": { container: "ollama", host: "ollama", port: 11434, healthTimeout: 30_000 },
|
||||
"scribus-novnc": { container: "scribus-novnc", host: "scribus-novnc", port: 8765, healthTimeout: 30_000 },
|
||||
"open-notebook": { container: "open-notebook", host: "open-notebook", port: 5055, healthTimeout: 45_000 },
|
||||
};
|
||||
|
||||
const lastUsed = new Map<string, number>();
|
||||
let idleCheckTimer: ReturnType<typeof setInterval> | null = null;
|
||||
let dockerAvailable = false;
|
||||
const SABLIER_URL = process.env.SABLIER_URL || "http://sablier:10000";
|
||||
const SESSION_DURATION = process.env.SIDECAR_SESSION_DURATION || "5m";
|
||||
|
||||
// Check Docker socket on load
|
||||
try {
|
||||
fs.accessSync(DOCKER_SOCKET);
|
||||
dockerAvailable = true;
|
||||
} catch {
|
||||
console.log("[sidecar] Docker socket not available — lifecycle management disabled");
|
||||
}
|
||||
let sablierReachable: boolean | null = null;
|
||||
|
||||
// ── Docker Engine API over Unix socket ──
|
||||
|
||||
function dockerApi(method: string, path: string, sendBody?: boolean): Promise<{ status: number; body: any }> {
|
||||
return new Promise((resolve, reject) => {
|
||||
const headers: Record<string, string> = {};
|
||||
// Only set Content-Type when we actually send a JSON body
|
||||
if (sendBody) headers["Content-Type"] = "application/json";
|
||||
const req = http.request(
|
||||
{
|
||||
socketPath: DOCKER_SOCKET,
|
||||
path: `/v1.44${path}`,
|
||||
method,
|
||||
headers,
|
||||
},
|
||||
(res) => {
|
||||
let data = "";
|
||||
res.on("data", (chunk) => (data += chunk));
|
||||
res.on("end", () => {
|
||||
let body: any = data;
|
||||
async function probeSablier(): Promise<boolean> {
|
||||
if (sablierReachable !== null) return sablierReachable;
|
||||
try {
|
||||
body = JSON.parse(data);
|
||||
} catch {}
|
||||
resolve({ status: res.statusCode || 0, body });
|
||||
});
|
||||
},
|
||||
);
|
||||
req.on("error", reject);
|
||||
req.setTimeout(10_000, () => {
|
||||
req.destroy(new Error("Docker API timeout"));
|
||||
});
|
||||
req.end();
|
||||
});
|
||||
}
|
||||
|
||||
async function isContainerRunning(name: string): Promise<boolean> {
|
||||
try {
|
||||
const { body } = await dockerApi("GET", `/containers/${name}/json`);
|
||||
return body?.State?.Running === true;
|
||||
const res = await fetch(`${SABLIER_URL}/health`, { signal: AbortSignal.timeout(2000) });
|
||||
sablierReachable = res.ok;
|
||||
} catch {
|
||||
return false;
|
||||
sablierReachable = false;
|
||||
console.log("[sidecar] Sablier unreachable at", SABLIER_URL, "— lifecycle management disabled");
|
||||
}
|
||||
return sablierReachable;
|
||||
}
|
||||
|
||||
async function startContainer(name: string): Promise<void> {
|
||||
const { status, body } = await dockerApi("POST", `/containers/${name}/start`);
|
||||
// 204 = started, 304 = already running
|
||||
if (status !== 204 && status !== 304) {
|
||||
const detail = typeof body === "object" ? JSON.stringify(body) : body;
|
||||
throw new Error(`Failed to start ${name}: HTTP ${status} — ${detail}`);
|
||||
/**
|
||||
* GET /api/strategies/blocking — Sablier starts the named container, waits
|
||||
* for it to be ready (per its own health check policy), and returns 200.
|
||||
* 202 = still starting past our timeout; we proceed anyway and let the
|
||||
* caller's request retry logic handle the brief window.
|
||||
*/
|
||||
async function sablierWake(config: SidecarConfig): Promise<void> {
|
||||
const qs = new URLSearchParams({
|
||||
names: config.container,
|
||||
session_duration: SESSION_DURATION,
|
||||
timeout: `${Math.max(1, Math.floor(config.healthTimeout / 1000))}s`,
|
||||
});
|
||||
const url = `${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`;
|
||||
const res = await fetch(url, { signal: AbortSignal.timeout(config.healthTimeout + 5_000) });
|
||||
if (!res.ok && res.status !== 202) {
|
||||
throw new Error(`Sablier wake returned ${res.status} for ${config.container}`);
|
||||
}
|
||||
}
|
||||
|
||||
async function stopContainer(name: string): Promise<void> {
|
||||
try {
|
||||
await dockerApi("POST", `/containers/${name}/stop?t=10`);
|
||||
console.log(`[sidecar] Stopped ${name}`);
|
||||
} catch (e) {
|
||||
console.warn(`[sidecar] Failed to stop ${name}:`, e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Wait until the sidecar's HTTP port accepts connections */
|
||||
async function waitForReady(config: SidecarConfig): Promise<void> {
|
||||
const deadline = Date.now() + config.healthTimeout;
|
||||
const url =
|
||||
config.container === "blender-worker"
|
||||
? `http://${config.host}:${config.port}/health`
|
||||
: `http://${config.host}:${config.port}/`;
|
||||
|
||||
while (Date.now() < deadline) {
|
||||
try {
|
||||
await fetch(url, { signal: AbortSignal.timeout(2000) });
|
||||
return; // Any response means the server is up
|
||||
} catch {
|
||||
// Connection refused or timeout — retry
|
||||
}
|
||||
await new Promise((r) => setTimeout(r, 1000));
|
||||
}
|
||||
console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`);
|
||||
}
|
||||
|
||||
// ── Public API ──
|
||||
|
||||
/**
|
||||
* Ensure a sidecar container is running and ready.
|
||||
* Starts the container if stopped, waits for health, updates last-used timestamp.
|
||||
* No-op if Docker socket is not available (local dev).
|
||||
* Ensure the named sidecar is running and ready. Extends the Sablier session
|
||||
* TTL as a side effect. Silent no-op when Sablier isn't reachable (local dev).
|
||||
*/
|
||||
export async function ensureSidecar(name: string): Promise<void> {
|
||||
const config = SIDECARS[name];
|
||||
if (!config) throw new Error(`Unknown sidecar: ${name}`);
|
||||
if (!(await probeSablier())) return;
|
||||
|
||||
lastUsed.set(name, Date.now());
|
||||
|
||||
if (!dockerAvailable) return;
|
||||
|
||||
const running = await isContainerRunning(config.container);
|
||||
if (running) return;
|
||||
|
||||
console.log(`[sidecar] Starting ${name}...`);
|
||||
await startContainer(config.container);
|
||||
await waitForReady(config);
|
||||
console.log(`[sidecar] ${name} ready`);
|
||||
try {
|
||||
await sablierWake(config);
|
||||
} catch (e) {
|
||||
console.warn(`[sidecar] Wake failed for ${name}:`, e instanceof Error ? e.message : e);
|
||||
}
|
||||
}
|
||||
|
||||
/** Update last-used timestamp (call after long-running operations complete) */
|
||||
/**
|
||||
* Refresh the session TTL without blocking on readiness — call after a
|
||||
* long-running operation completes so the sidecar stays warm for follow-ups.
|
||||
*/
|
||||
export function markSidecarUsed(name: string): void {
|
||||
lastUsed.set(name, Date.now());
|
||||
const config = SIDECARS[name];
|
||||
if (!config || sablierReachable === false) return;
|
||||
const qs = new URLSearchParams({ names: config.container, session_duration: SESSION_DURATION });
|
||||
// Fire-and-forget; readiness already verified earlier via ensureSidecar.
|
||||
fetch(`${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`, { signal: AbortSignal.timeout(2000) })
|
||||
.catch(() => {});
|
||||
}
|
||||
|
||||
/** Check if a sidecar container is currently running (for health endpoints) */
|
||||
/**
|
||||
* Probe whether the sidecar's own HTTP port is accepting connections.
|
||||
* Used by health endpoints; falls back to "assume running" when Sablier is
|
||||
* unreachable so local dev health checks don't fail.
|
||||
*/
|
||||
export async function isSidecarRunning(name: string): Promise<boolean> {
|
||||
if (!dockerAvailable) return true; // Assume running in local dev
|
||||
const config = SIDECARS[name];
|
||||
if (!config) return false;
|
||||
return isContainerRunning(config.container);
|
||||
}
|
||||
|
||||
// ── Idle watcher ──
|
||||
|
||||
async function checkIdleContainers(): Promise<void> {
|
||||
if (!dockerAvailable) return;
|
||||
const now = Date.now();
|
||||
|
||||
for (const [name, config] of Object.entries(SIDECARS)) {
|
||||
let running: boolean;
|
||||
if (!(await probeSablier())) return true;
|
||||
try {
|
||||
running = await isContainerRunning(config.container);
|
||||
const url = config.container === "blender-worker"
|
||||
? `http://${config.host}:${config.port}/health`
|
||||
: `http://${config.host}:${config.port}/`;
|
||||
const res = await fetch(url, { signal: AbortSignal.timeout(1500) });
|
||||
return res.status < 500;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
if (!running) continue;
|
||||
|
||||
const last = lastUsed.get(name);
|
||||
if (!last) {
|
||||
// Running but never used via API in this session — stop it
|
||||
console.log(`[sidecar] Stopping unused ${name}`);
|
||||
await stopContainer(config.container);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (now - last > IDLE_TIMEOUT_MS) {
|
||||
console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`);
|
||||
await stopContainer(config.container);
|
||||
lastUsed.delete(name);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */
|
||||
/**
|
||||
* No-op in the Sablier era — idle shutdown is handled by Sablier's own
|
||||
* session expiration (SESSION_DURATION). Kept for API compatibility.
|
||||
*/
|
||||
export function startIdleWatcher(): void {
|
||||
if (!dockerAvailable) return;
|
||||
if (idleCheckTimer) return;
|
||||
idleCheckTimer = setInterval(checkIdleContainers, 60_000);
|
||||
console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`);
|
||||
|
||||
// Initial check after 30s — stop any sidecars that were left running from a previous deploy
|
||||
setTimeout(checkIdleContainers, 30_000);
|
||||
probeSablier().then((ok) => {
|
||||
if (ok) console.log(`[sidecar] Lifecycle delegated to Sablier at ${SABLIER_URL} (ttl ${SESSION_DURATION})`);
|
||||
});
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue