Compare commits
2 Commits
8dc1c11227
...
000ee0da9a
| Author | SHA1 | Date |
|---|---|---|
|
|
000ee0da9a | |
|
|
ee251fd621 |
|
|
@ -1,232 +1,125 @@
|
||||||
/**
|
/**
|
||||||
* Sidecar Lifecycle Manager — starts Docker containers on demand, stops after idle.
|
* Sidecar Lifecycle Manager — on-demand container wake-up via Sablier.
|
||||||
*
|
*
|
||||||
* Uses Docker Engine API over Unix socket to control sidecar containers
|
* Delegates container start / idle-stop to the Sablier service reachable at
|
||||||
* (kicad-mcp, freecad-mcp, blender-worker) without keeping them running 24/7.
|
* SABLIER_URL (default http://sablier:10000 on the rspace-internal network).
|
||||||
|
* Sablier uses the Docker Engine API on its own socket mount to start named
|
||||||
|
* containers and stops them after the session TTL expires with no refresh.
|
||||||
*
|
*
|
||||||
* Requires /var/run/docker.sock mounted into the rspace container.
|
* Public API is unchanged from the previous Docker-socket implementation so
|
||||||
|
* callers in server/index.ts do not need to change.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import http from "node:http";
|
|
||||||
import fs from "node:fs";
|
|
||||||
|
|
||||||
const DOCKER_SOCKET = "/var/run/docker.sock";
|
|
||||||
const IDLE_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
|
||||||
|
|
||||||
interface SidecarConfig {
|
interface SidecarConfig {
|
||||||
container: string;
|
container: string;
|
||||||
host: string;
|
host: string;
|
||||||
port: number;
|
port: number;
|
||||||
healthTimeout: number; // max ms to wait for ready
|
/** Max ms to block waiting for the container to become ready. */
|
||||||
|
healthTimeout: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
const SIDECARS: Record<string, SidecarConfig> = {
|
const SIDECARS: Record<string, SidecarConfig> = {
|
||||||
"kicad-mcp": {
|
"kicad-mcp": { container: "kicad-mcp", host: "kicad-mcp", port: 8809, healthTimeout: 45_000 },
|
||||||
container: "kicad-mcp",
|
"freecad-mcp": { container: "freecad-mcp", host: "freecad-mcp", port: 8808, healthTimeout: 30_000 },
|
||||||
host: "kicad-mcp",
|
"blender-worker":{ container: "blender-worker",host: "blender-worker",port: 8810, healthTimeout: 15_000 },
|
||||||
port: 8809,
|
"ollama": { container: "ollama", host: "ollama", port: 11434, healthTimeout: 30_000 },
|
||||||
healthTimeout: 45_000, // KiCad takes a while to init
|
"scribus-novnc": { container: "scribus-novnc", host: "scribus-novnc", port: 8765, healthTimeout: 30_000 },
|
||||||
},
|
"open-notebook": { container: "open-notebook", host: "open-notebook", port: 5055, healthTimeout: 45_000 },
|
||||||
"freecad-mcp": {
|
|
||||||
container: "freecad-mcp",
|
|
||||||
host: "freecad-mcp",
|
|
||||||
port: 8808,
|
|
||||||
healthTimeout: 30_000,
|
|
||||||
},
|
|
||||||
"blender-worker": {
|
|
||||||
container: "blender-worker",
|
|
||||||
host: "blender-worker",
|
|
||||||
port: 8810,
|
|
||||||
healthTimeout: 15_000,
|
|
||||||
},
|
|
||||||
ollama: {
|
|
||||||
container: "ollama",
|
|
||||||
host: "ollama",
|
|
||||||
port: 11434,
|
|
||||||
healthTimeout: 30_000,
|
|
||||||
},
|
|
||||||
"scribus-novnc": {
|
|
||||||
container: "scribus-novnc",
|
|
||||||
host: "scribus-novnc",
|
|
||||||
port: 8765,
|
|
||||||
healthTimeout: 30_000,
|
|
||||||
},
|
|
||||||
"open-notebook": {
|
|
||||||
container: "open-notebook",
|
|
||||||
host: "open-notebook",
|
|
||||||
port: 5055,
|
|
||||||
healthTimeout: 45_000,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const lastUsed = new Map<string, number>();
|
const SABLIER_URL = process.env.SABLIER_URL || "http://sablier:10000";
|
||||||
let idleCheckTimer: ReturnType<typeof setInterval> | null = null;
|
const SESSION_DURATION = process.env.SIDECAR_SESSION_DURATION || "5m";
|
||||||
let dockerAvailable = false;
|
|
||||||
|
|
||||||
// Check Docker socket on load
|
let sablierReachable: boolean | null = null;
|
||||||
|
|
||||||
|
async function probeSablier(): Promise<boolean> {
|
||||||
|
if (sablierReachable !== null) return sablierReachable;
|
||||||
try {
|
try {
|
||||||
fs.accessSync(DOCKER_SOCKET);
|
const res = await fetch(`${SABLIER_URL}/health`, { signal: AbortSignal.timeout(2000) });
|
||||||
dockerAvailable = true;
|
sablierReachable = res.ok;
|
||||||
} catch {
|
} catch {
|
||||||
console.log("[sidecar] Docker socket not available — lifecycle management disabled");
|
sablierReachable = false;
|
||||||
|
console.log("[sidecar] Sablier unreachable at", SABLIER_URL, "— lifecycle management disabled");
|
||||||
|
}
|
||||||
|
return sablierReachable;
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Docker Engine API over Unix socket ──
|
/**
|
||||||
|
* GET /api/strategies/blocking — Sablier starts the named container, waits
|
||||||
function dockerApi(method: string, path: string, sendBody?: boolean): Promise<{ status: number; body: any }> {
|
* for it to be ready (per its own health check policy), and returns 200.
|
||||||
return new Promise((resolve, reject) => {
|
* 202 = still starting past our timeout; we proceed anyway and let the
|
||||||
const headers: Record<string, string> = {};
|
* caller's request retry logic handle the brief window.
|
||||||
// Only set Content-Type when we actually send a JSON body
|
*/
|
||||||
if (sendBody) headers["Content-Type"] = "application/json";
|
async function sablierWake(config: SidecarConfig): Promise<void> {
|
||||||
const req = http.request(
|
const qs = new URLSearchParams({
|
||||||
{
|
names: config.container,
|
||||||
socketPath: DOCKER_SOCKET,
|
session_duration: SESSION_DURATION,
|
||||||
path: `/v1.44${path}`,
|
timeout: `${Math.max(1, Math.floor(config.healthTimeout / 1000))}s`,
|
||||||
method,
|
|
||||||
headers,
|
|
||||||
},
|
|
||||||
(res) => {
|
|
||||||
let data = "";
|
|
||||||
res.on("data", (chunk) => (data += chunk));
|
|
||||||
res.on("end", () => {
|
|
||||||
let body: any = data;
|
|
||||||
try {
|
|
||||||
body = JSON.parse(data);
|
|
||||||
} catch {}
|
|
||||||
resolve({ status: res.statusCode || 0, body });
|
|
||||||
});
|
|
||||||
},
|
|
||||||
);
|
|
||||||
req.on("error", reject);
|
|
||||||
req.setTimeout(10_000, () => {
|
|
||||||
req.destroy(new Error("Docker API timeout"));
|
|
||||||
});
|
|
||||||
req.end();
|
|
||||||
});
|
});
|
||||||
|
const url = `${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`;
|
||||||
|
const res = await fetch(url, { signal: AbortSignal.timeout(config.healthTimeout + 5_000) });
|
||||||
|
if (!res.ok && res.status !== 202) {
|
||||||
|
throw new Error(`Sablier wake returned ${res.status} for ${config.container}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async function isContainerRunning(name: string): Promise<boolean> {
|
|
||||||
try {
|
|
||||||
const { body } = await dockerApi("GET", `/containers/${name}/json`);
|
|
||||||
return body?.State?.Running === true;
|
|
||||||
} catch {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function startContainer(name: string): Promise<void> {
|
|
||||||
const { status, body } = await dockerApi("POST", `/containers/${name}/start`);
|
|
||||||
// 204 = started, 304 = already running
|
|
||||||
if (status !== 204 && status !== 304) {
|
|
||||||
const detail = typeof body === "object" ? JSON.stringify(body) : body;
|
|
||||||
throw new Error(`Failed to start ${name}: HTTP ${status} — ${detail}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async function stopContainer(name: string): Promise<void> {
|
|
||||||
try {
|
|
||||||
await dockerApi("POST", `/containers/${name}/stop?t=10`);
|
|
||||||
console.log(`[sidecar] Stopped ${name}`);
|
|
||||||
} catch (e) {
|
|
||||||
console.warn(`[sidecar] Failed to stop ${name}:`, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Wait until the sidecar's HTTP port accepts connections */
|
|
||||||
async function waitForReady(config: SidecarConfig): Promise<void> {
|
|
||||||
const deadline = Date.now() + config.healthTimeout;
|
|
||||||
const url =
|
|
||||||
config.container === "blender-worker"
|
|
||||||
? `http://${config.host}:${config.port}/health`
|
|
||||||
: `http://${config.host}:${config.port}/`;
|
|
||||||
|
|
||||||
while (Date.now() < deadline) {
|
|
||||||
try {
|
|
||||||
await fetch(url, { signal: AbortSignal.timeout(2000) });
|
|
||||||
return; // Any response means the server is up
|
|
||||||
} catch {
|
|
||||||
// Connection refused or timeout — retry
|
|
||||||
}
|
|
||||||
await new Promise((r) => setTimeout(r, 1000));
|
|
||||||
}
|
|
||||||
console.warn(`[sidecar] ${config.container} health check timed out after ${config.healthTimeout}ms — proceeding anyway`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Public API ──
|
// ── Public API ──
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Ensure a sidecar container is running and ready.
|
* Ensure the named sidecar is running and ready. Extends the Sablier session
|
||||||
* Starts the container if stopped, waits for health, updates last-used timestamp.
|
* TTL as a side effect. Silent no-op when Sablier isn't reachable (local dev).
|
||||||
* No-op if Docker socket is not available (local dev).
|
|
||||||
*/
|
*/
|
||||||
export async function ensureSidecar(name: string): Promise<void> {
|
export async function ensureSidecar(name: string): Promise<void> {
|
||||||
const config = SIDECARS[name];
|
const config = SIDECARS[name];
|
||||||
if (!config) throw new Error(`Unknown sidecar: ${name}`);
|
if (!config) throw new Error(`Unknown sidecar: ${name}`);
|
||||||
|
if (!(await probeSablier())) return;
|
||||||
|
|
||||||
lastUsed.set(name, Date.now());
|
try {
|
||||||
|
await sablierWake(config);
|
||||||
if (!dockerAvailable) return;
|
} catch (e) {
|
||||||
|
console.warn(`[sidecar] Wake failed for ${name}:`, e instanceof Error ? e.message : e);
|
||||||
const running = await isContainerRunning(config.container);
|
}
|
||||||
if (running) return;
|
|
||||||
|
|
||||||
console.log(`[sidecar] Starting ${name}...`);
|
|
||||||
await startContainer(config.container);
|
|
||||||
await waitForReady(config);
|
|
||||||
console.log(`[sidecar] ${name} ready`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Update last-used timestamp (call after long-running operations complete) */
|
/**
|
||||||
|
* Refresh the session TTL without blocking on readiness — call after a
|
||||||
|
* long-running operation completes so the sidecar stays warm for follow-ups.
|
||||||
|
*/
|
||||||
export function markSidecarUsed(name: string): void {
|
export function markSidecarUsed(name: string): void {
|
||||||
lastUsed.set(name, Date.now());
|
const config = SIDECARS[name];
|
||||||
|
if (!config || sablierReachable === false) return;
|
||||||
|
const qs = new URLSearchParams({ names: config.container, session_duration: SESSION_DURATION });
|
||||||
|
// Fire-and-forget; readiness already verified earlier via ensureSidecar.
|
||||||
|
fetch(`${SABLIER_URL}/api/strategies/blocking?${qs.toString()}`, { signal: AbortSignal.timeout(2000) })
|
||||||
|
.catch(() => {});
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Check if a sidecar container is currently running (for health endpoints) */
|
/**
|
||||||
|
* Probe whether the sidecar's own HTTP port is accepting connections.
|
||||||
|
* Used by health endpoints; falls back to "assume running" when Sablier is
|
||||||
|
* unreachable so local dev health checks don't fail.
|
||||||
|
*/
|
||||||
export async function isSidecarRunning(name: string): Promise<boolean> {
|
export async function isSidecarRunning(name: string): Promise<boolean> {
|
||||||
if (!dockerAvailable) return true; // Assume running in local dev
|
|
||||||
const config = SIDECARS[name];
|
const config = SIDECARS[name];
|
||||||
if (!config) return false;
|
if (!config) return false;
|
||||||
return isContainerRunning(config.container);
|
if (!(await probeSablier())) return true;
|
||||||
}
|
|
||||||
|
|
||||||
// ── Idle watcher ──
|
|
||||||
|
|
||||||
async function checkIdleContainers(): Promise<void> {
|
|
||||||
if (!dockerAvailable) return;
|
|
||||||
const now = Date.now();
|
|
||||||
|
|
||||||
for (const [name, config] of Object.entries(SIDECARS)) {
|
|
||||||
let running: boolean;
|
|
||||||
try {
|
try {
|
||||||
running = await isContainerRunning(config.container);
|
const url = config.container === "blender-worker"
|
||||||
|
? `http://${config.host}:${config.port}/health`
|
||||||
|
: `http://${config.host}:${config.port}/`;
|
||||||
|
const res = await fetch(url, { signal: AbortSignal.timeout(1500) });
|
||||||
|
return res.status < 500;
|
||||||
} catch {
|
} catch {
|
||||||
continue;
|
return false;
|
||||||
}
|
|
||||||
if (!running) continue;
|
|
||||||
|
|
||||||
const last = lastUsed.get(name);
|
|
||||||
if (!last) {
|
|
||||||
// Running but never used via API in this session — stop it
|
|
||||||
console.log(`[sidecar] Stopping unused ${name}`);
|
|
||||||
await stopContainer(config.container);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (now - last > IDLE_TIMEOUT_MS) {
|
|
||||||
console.log(`[sidecar] Stopping idle ${name} (${Math.round((now - last) / 1000)}s inactive)`);
|
|
||||||
await stopContainer(config.container);
|
|
||||||
lastUsed.delete(name);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Start the idle watcher that stops containers after IDLE_TIMEOUT_MS of inactivity */
|
/**
|
||||||
|
* No-op in the Sablier era — idle shutdown is handled by Sablier's own
|
||||||
|
* session expiration (SESSION_DURATION). Kept for API compatibility.
|
||||||
|
*/
|
||||||
export function startIdleWatcher(): void {
|
export function startIdleWatcher(): void {
|
||||||
if (!dockerAvailable) return;
|
probeSablier().then((ok) => {
|
||||||
if (idleCheckTimer) return;
|
if (ok) console.log(`[sidecar] Lifecycle delegated to Sablier at ${SABLIER_URL} (ttl ${SESSION_DURATION})`);
|
||||||
idleCheckTimer = setInterval(checkIdleContainers, 60_000);
|
});
|
||||||
console.log(`[sidecar] Idle watcher started (timeout: ${IDLE_TIMEOUT_MS / 1000}s)`);
|
|
||||||
|
|
||||||
// Initial check after 30s — stop any sidecars that were left running from a previous deploy
|
|
||||||
setTimeout(checkIdleContainers, 30_000);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue