rspace-online/server/sidecar-manager.ts

217 lines
6.0 KiB
TypeScript

/**
* Sidecar Lifecycle Manager — starts Docker containers on demand, stops after idle.
*
* Uses Docker Engine API over Unix socket to control sidecar containers
* (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7.
*
* Requires /var/run/docker.sock mounted into the rspace container.
*/
import http from "node:http";
import fs from "node:fs";
const DOCKER_SOCKET = "/var/run/docker.sock";
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
interface SidecarConfig {
container: string;
host: string;
port: number;
healthTimeout: number; // max ms to wait for ready
}
const SIDECARS: Record<string, SidecarConfig> = {
"kicad-mcp": {
container: "kicad-mcp",
host: "kicad-mcp",
port: 8809,
healthTimeout: 45_000, // KiCad takes a while to init
},
"freecad-mcp": {
container: "freecad-mcp",
host: "freecad-mcp",
port: 8808,
healthTimeout: 30_000,
},
"blender-worker": {
container: "blender-worker",
host: "blender-worker",
port: 8810,
healthTimeout: 15_000,
},
ollama: {
container: "ollama",
host: "ollama",
port: 11434,
healthTimeout: 30_000,
},
};
const lastUsed = new Map<string, number>();
let idleCheckTimer: ReturnType<typeof setInterval> | null = null;
let dockerAvailable = false;
// Check Docker socket on load
try {
fs.accessSync(DOCKER_SOCKET);
dockerAvailable = true;
} catch {
console.log("[sidecar] Docker socket not available — lifecycle management disabled");
}
// ── Docker Engine API over Unix socket ──
function dockerApi(method: string, path: string): Promise<{ status: number; body: any }> {
return new Promise((resolve, reject) => {
const req = http.request(
{
socketPath: DOCKER_SOCKET,
path: `/v1.43${path}`,
method,
headers: { "Content-Type": "application/json" },
},
(res) => {
let data = "";
res.on("data", (chunk) => (data += chunk));
res.on("end", () => {
let body: any = data;
try {
body = JSON.parse(data);
} catch {}
resolve({ status: res.statusCode || 0, body });
});
},
);
req.on("error", reject);
req.setTimeout(10_000, () => {
req.destroy(new Error("Docker API timeout"));
});
req.end();
});
}
async function isContainerRunning(name: string): Promise<boolean> {
try {
const { body } = await dockerApi("GET", `/containers/${name}/json`);
return body?.State?.Running === true;
} catch {
return false;
}
}
async function startContainer(name: string): Promise<void> {
const { status } = await dockerApi("POST", `/containers/${name}/start`);
// 204 = started, 304 = already running
if (status !== 204 && status !== 304) {
throw new Error(`Failed to start ${name}: HTTP ${status}`);
}
}
async function stopContainer(name: string): Promise<void> {
try {
await dockerApi("POST", `/containers/${name}/stop?t=10`);
console.log(`[sidecar] Stopped ${name}`);
} catch (e) {
console.warn(`[sidecar] Failed to stop ${name}:`, e);
}
}
/** Wait until the sidecar's HTTP port accepts connections */
async function waitForReady(config: SidecarConfig): Promise<void> {
const deadline = Date.now() + config.healthTimeout;
const url =
config.container === "blender-worker"
? `http://${config.host}:${config.port}/health`
: `http://${config.host}:${config.port}/`;
while (Date.now() < deadline) {
try {
await fetch(url, { signal: AbortSignal.timeout(2000) });
return; // Any response means the server is up
} catch {
// Connection refused or timeout — retry
}
await new Promise((r) => setTimeout(r, 1000));
}
console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`);
}
// ── Public API ──
/**
* Ensure a sidecar container is running and ready.
* Starts the container if stopped, waits for health, updates last-used timestamp.
* No-op if Docker socket is not available (local dev).
*/
export async function ensureSidecar(name: string): Promise<void> {
const config = SIDECARS[name];
if (!config) throw new Error(`Unknown sidecar: ${name}`);
lastUsed.set(name, Date.now());
if (!dockerAvailable) return;
const running = await isContainerRunning(config.container);
if (running) return;
console.log(`[sidecar] Starting ${name}...`);
await startContainer(config.container);
await waitForReady(config);
console.log(`[sidecar] ${name} ready`);
}
/** Update last-used timestamp (call after long-running operations complete) */
export function markSidecarUsed(name: string): void {
lastUsed.set(name, Date.now());
}
/** Check if a sidecar container is currently running (for health endpoints) */
export async function isSidecarRunning(name: string): Promise<boolean> {
if (!dockerAvailable) return true; // Assume running in local dev
const config = SIDECARS[name];
if (!config) return false;
return isContainerRunning(config.container);
}
// ── Idle watcher ──
async function checkIdleContainers(): Promise<void> {
if (!dockerAvailable) return;
const now = Date.now();
for (const [name, config] of Object.entries(SIDECARS)) {
let running: boolean;
try {
running = await isContainerRunning(config.container);
} catch {
continue;
}
if (!running) continue;
const last = lastUsed.get(name);
if (!last) {
// Running but never used via API in this session — stop it
console.log(`[sidecar] Stopping unused ${name}`);
await stopContainer(config.container);
continue;
}
if (now - last > IDLE_TIMEOUT_MS) {
console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`);
await stopContainer(config.container);
lastUsed.delete(name);
}
}
}
/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */
export function startIdleWatcher(): void {
if (!dockerAvailable) return;
if (idleCheckTimer) return;
idleCheckTimer = setInterval(checkIdleContainers, 60_000);
console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`);
// Initial check after 30s — stop any sidecars that were left running from a previous deploy
setTimeout(checkIdleContainers, 30_000);
}