rspace-online/server/mi-provider.ts

279 lines
8.2 KiB
TypeScript

/**
* MI Provider Abstraction — swappable AI backends for Mycelial Intelligence.
*
* Normalises Ollama, Gemini, Anthropic, and OpenAI-compatible endpoints into
* a single stream interface. The registry auto-detects available providers
* from environment variables at startup.
*/
export interface MiMessage {
role: "system" | "user" | "assistant";
content: string;
images?: string[];
}
export interface MiStreamChunk {
content: string;
done: boolean;
}
export interface MiModelConfig {
/** Unique ID used in API requests (e.g. "gemini-flash") */
id: string;
/** Provider key (e.g. "ollama", "gemini") */
provider: string;
/** Model name as the provider expects it (e.g. "gemini-2.5-flash") */
providerModel: string;
/** Display label for UI */
label: string;
/** Group header in model selector */
group: string;
}
export interface MiProvider {
id: string;
isAvailable(): boolean;
stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk>;
}
// ── Model Registry ──
const MODEL_REGISTRY: MiModelConfig[] = [
{ id: "gemini-flash", provider: "gemini", providerModel: "gemini-2.5-flash", label: "Gemini Flash", group: "Gemini" },
{ id: "gemini-pro", provider: "gemini", providerModel: "gemini-2.5-pro", label: "Gemini Pro", group: "Gemini" },
{ id: "llama3.2", provider: "ollama", providerModel: "llama3.2:3b", label: "Llama 3.2 (3B)", group: "Local" },
{ id: "llama3.1", provider: "ollama", providerModel: "llama3.1:8b", label: "Llama 3.1 (8B)", group: "Local" },
{ id: "qwen2.5-coder", provider: "ollama", providerModel: "qwen2.5-coder:7b", label: "Qwen Coder", group: "Local" },
{ id: "mistral-small", provider: "ollama", providerModel: "mistral-small:24b", label: "Mistral Small", group: "Local" },
];
// ── Ollama Provider ──
class OllamaProvider implements MiProvider {
id = "ollama";
#url: string;
constructor() {
this.#url = process.env.OLLAMA_URL || "http://localhost:11434";
}
isAvailable(): boolean {
return true; // Ollama is always "available" — fails at call time if offline
}
async *stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk> {
const res = await fetch(`${this.#url}/api/chat`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ model, messages, stream: true }),
});
if (!res.ok) {
const errText = await res.text().catch(() => "");
throw new Error(`Ollama error ${res.status}: ${errText}`);
}
if (!res.body) throw new Error("No response body from Ollama");
const reader = res.body.getReader();
const decoder = new TextDecoder();
let buffer = "";
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
const lines = buffer.split("\n");
buffer = lines.pop() || "";
for (const line of lines) {
if (!line.trim()) continue;
try {
const data = JSON.parse(line);
yield {
content: data.message?.content || "",
done: !!data.done,
};
} catch {
// skip malformed lines
}
}
}
// Process remaining buffer
if (buffer.trim()) {
try {
const data = JSON.parse(buffer);
yield { content: data.message?.content || "", done: true };
} catch {
// ignore
}
}
}
}
// ── Gemini Provider ──
class GeminiProvider implements MiProvider {
id = "gemini";
isAvailable(): boolean {
return !!(process.env.GEMINI_API_KEY);
}
async *stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk> {
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) throw new Error("GEMINI_API_KEY not configured");
const { GoogleGenerativeAI } = await import("@google/generative-ai");
const genAI = new GoogleGenerativeAI(apiKey);
// Extract system instruction from messages
const systemMsg = messages.find((m) => m.role === "system");
const chatMessages = messages.filter((m) => m.role !== "system");
const geminiModel = genAI.getGenerativeModel({
model,
...(systemMsg ? { systemInstruction: { role: "user", parts: [{ text: systemMsg.content }] } } : {}),
});
// Convert messages to Gemini format
const history = chatMessages.slice(0, -1).map((m) => ({
role: m.role === "assistant" ? "model" : "user",
parts: [{ text: m.content }],
}));
const lastMsg = chatMessages[chatMessages.length - 1];
if (!lastMsg) throw new Error("No messages to send");
const chat = geminiModel.startChat({ history });
const result = await chat.sendMessageStream(lastMsg.content);
for await (const chunk of result.stream) {
const text = chunk.text();
if (text) {
yield { content: text, done: false };
}
}
yield { content: "", done: true };
}
}
// ── Anthropic Provider (stub) ──
class AnthropicProvider implements MiProvider {
id = "anthropic";
isAvailable(): boolean {
return !!(process.env.ANTHROPIC_API_KEY);
}
async *stream(_messages: MiMessage[], _model: string): AsyncGenerator<MiStreamChunk> {
throw new Error("Anthropic provider not yet implemented — add @anthropic-ai/sdk");
}
}
// ── OpenAI-Compatible Provider (stub) ──
class OpenAICompatProvider implements MiProvider {
id = "openai-compat";
isAvailable(): boolean {
return !!(process.env.OPENAI_COMPAT_URL && process.env.OPENAI_COMPAT_KEY);
}
async *stream(_messages: MiMessage[], _model: string): AsyncGenerator<MiStreamChunk> {
throw new Error("OpenAI-compatible provider not yet implemented");
}
}
// ── Provider Registry ──
export class MiProviderRegistry {
#providers = new Map<string, MiProvider>();
constructor() {
this.register(new OllamaProvider());
this.register(new GeminiProvider());
this.register(new AnthropicProvider());
this.register(new OpenAICompatProvider());
}
register(provider: MiProvider): void {
this.#providers.set(provider.id, provider);
}
/** Look up a model by its friendly ID and return the provider + provider-model. */
resolveModel(modelId: string): { provider: MiProvider; providerModel: string } | null {
const config = MODEL_REGISTRY.find((m) => m.id === modelId);
if (!config) return null;
const provider = this.#providers.get(config.provider);
if (!provider || !provider.isAvailable()) return null;
return { provider, providerModel: config.providerModel };
}
/** Get provider directly by provider ID (for fallback when modelId is a raw provider model name). */
getProviderById(providerId: string): MiProvider | null {
const provider = this.#providers.get(providerId);
return provider && provider.isAvailable() ? provider : null;
}
/** All models whose provider is currently available. */
getAvailableModels(): MiModelConfig[] {
return MODEL_REGISTRY.filter((m) => {
const p = this.#providers.get(m.provider);
return p && p.isAvailable();
});
}
/** MI_MODEL env or first available model. */
getDefaultModel(): string {
const envModel = process.env.MI_MODEL;
if (envModel) {
// Check if it's a registry ID
if (MODEL_REGISTRY.find((m) => m.id === envModel)) return envModel;
// It might be a raw provider model name — find the registry entry
const entry = MODEL_REGISTRY.find((m) => m.providerModel === envModel);
if (entry) return entry.id;
// Unknown — return as-is for backward compat (Ollama will handle it)
return envModel;
}
// First available
const available = this.getAvailableModels();
return available[0]?.id || "llama3.2";
}
/**
* Stream from any provider and normalize to NDJSON `{message:{content:"..."}}` format
* matching Ollama's wire format — zero changes needed in the client parser.
*/
streamToNDJSON(gen: AsyncGenerator<MiStreamChunk>): ReadableStream {
const encoder = new TextEncoder();
return new ReadableStream({
async pull(controller) {
try {
const { value, done: iterDone } = await gen.next();
if (iterDone) {
controller.close();
return;
}
const payload = JSON.stringify({
message: { role: "assistant", content: value.content },
done: value.done,
});
controller.enqueue(encoder.encode(payload + "\n"));
if (value.done) controller.close();
} catch (err: any) {
controller.error(err);
}
},
});
}
}
/** Singleton registry, ready at import time. */
export const miRegistry = new MiProviderRegistry();