279 lines
8.2 KiB
TypeScript
279 lines
8.2 KiB
TypeScript
/**
|
|
* MI Provider Abstraction — swappable AI backends for Mycelial Intelligence.
|
|
*
|
|
* Normalises Ollama, Gemini, Anthropic, and OpenAI-compatible endpoints into
|
|
* a single stream interface. The registry auto-detects available providers
|
|
* from environment variables at startup.
|
|
*/
|
|
|
|
export interface MiMessage {
|
|
role: "system" | "user" | "assistant";
|
|
content: string;
|
|
images?: string[];
|
|
}
|
|
|
|
export interface MiStreamChunk {
|
|
content: string;
|
|
done: boolean;
|
|
}
|
|
|
|
export interface MiModelConfig {
|
|
/** Unique ID used in API requests (e.g. "gemini-flash") */
|
|
id: string;
|
|
/** Provider key (e.g. "ollama", "gemini") */
|
|
provider: string;
|
|
/** Model name as the provider expects it (e.g. "gemini-2.5-flash") */
|
|
providerModel: string;
|
|
/** Display label for UI */
|
|
label: string;
|
|
/** Group header in model selector */
|
|
group: string;
|
|
}
|
|
|
|
export interface MiProvider {
|
|
id: string;
|
|
isAvailable(): boolean;
|
|
stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk>;
|
|
}
|
|
|
|
// ── Model Registry ──
|
|
|
|
const MODEL_REGISTRY: MiModelConfig[] = [
|
|
{ id: "gemini-flash", provider: "gemini", providerModel: "gemini-2.5-flash", label: "Gemini Flash", group: "Gemini" },
|
|
{ id: "gemini-pro", provider: "gemini", providerModel: "gemini-2.5-pro", label: "Gemini Pro", group: "Gemini" },
|
|
{ id: "llama3.2", provider: "ollama", providerModel: "llama3.2:3b", label: "Llama 3.2 (3B)", group: "Local" },
|
|
{ id: "llama3.1", provider: "ollama", providerModel: "llama3.1:8b", label: "Llama 3.1 (8B)", group: "Local" },
|
|
{ id: "qwen2.5-coder", provider: "ollama", providerModel: "qwen2.5-coder:7b", label: "Qwen Coder", group: "Local" },
|
|
{ id: "mistral-small", provider: "ollama", providerModel: "mistral-small:24b", label: "Mistral Small", group: "Local" },
|
|
];
|
|
|
|
// ── Ollama Provider ──
|
|
|
|
class OllamaProvider implements MiProvider {
|
|
id = "ollama";
|
|
#url: string;
|
|
|
|
constructor() {
|
|
this.#url = process.env.OLLAMA_URL || "http://localhost:11434";
|
|
}
|
|
|
|
isAvailable(): boolean {
|
|
return true; // Ollama is always "available" — fails at call time if offline
|
|
}
|
|
|
|
async *stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk> {
|
|
const res = await fetch(`${this.#url}/api/chat`, {
|
|
method: "POST",
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({ model, messages, stream: true }),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text().catch(() => "");
|
|
throw new Error(`Ollama error ${res.status}: ${errText}`);
|
|
}
|
|
|
|
if (!res.body) throw new Error("No response body from Ollama");
|
|
|
|
const reader = res.body.getReader();
|
|
const decoder = new TextDecoder();
|
|
let buffer = "";
|
|
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
|
|
buffer += decoder.decode(value, { stream: true });
|
|
const lines = buffer.split("\n");
|
|
buffer = lines.pop() || "";
|
|
|
|
for (const line of lines) {
|
|
if (!line.trim()) continue;
|
|
try {
|
|
const data = JSON.parse(line);
|
|
yield {
|
|
content: data.message?.content || "",
|
|
done: !!data.done,
|
|
};
|
|
} catch {
|
|
// skip malformed lines
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process remaining buffer
|
|
if (buffer.trim()) {
|
|
try {
|
|
const data = JSON.parse(buffer);
|
|
yield { content: data.message?.content || "", done: true };
|
|
} catch {
|
|
// ignore
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// ── Gemini Provider ──
|
|
|
|
class GeminiProvider implements MiProvider {
|
|
id = "gemini";
|
|
|
|
isAvailable(): boolean {
|
|
return !!(process.env.GEMINI_API_KEY);
|
|
}
|
|
|
|
async *stream(messages: MiMessage[], model: string): AsyncGenerator<MiStreamChunk> {
|
|
const apiKey = process.env.GEMINI_API_KEY;
|
|
if (!apiKey) throw new Error("GEMINI_API_KEY not configured");
|
|
|
|
const { GoogleGenerativeAI } = await import("@google/generative-ai");
|
|
const genAI = new GoogleGenerativeAI(apiKey);
|
|
|
|
// Extract system instruction from messages
|
|
const systemMsg = messages.find((m) => m.role === "system");
|
|
const chatMessages = messages.filter((m) => m.role !== "system");
|
|
|
|
const geminiModel = genAI.getGenerativeModel({
|
|
model,
|
|
...(systemMsg ? { systemInstruction: { role: "user", parts: [{ text: systemMsg.content }] } } : {}),
|
|
});
|
|
|
|
// Convert messages to Gemini format
|
|
const history = chatMessages.slice(0, -1).map((m) => ({
|
|
role: m.role === "assistant" ? "model" : "user",
|
|
parts: [{ text: m.content }],
|
|
}));
|
|
|
|
const lastMsg = chatMessages[chatMessages.length - 1];
|
|
if (!lastMsg) throw new Error("No messages to send");
|
|
|
|
const chat = geminiModel.startChat({ history });
|
|
const result = await chat.sendMessageStream(lastMsg.content);
|
|
|
|
for await (const chunk of result.stream) {
|
|
const text = chunk.text();
|
|
if (text) {
|
|
yield { content: text, done: false };
|
|
}
|
|
}
|
|
yield { content: "", done: true };
|
|
}
|
|
}
|
|
|
|
// ── Anthropic Provider (stub) ──
|
|
|
|
class AnthropicProvider implements MiProvider {
|
|
id = "anthropic";
|
|
|
|
isAvailable(): boolean {
|
|
return !!(process.env.ANTHROPIC_API_KEY);
|
|
}
|
|
|
|
async *stream(_messages: MiMessage[], _model: string): AsyncGenerator<MiStreamChunk> {
|
|
throw new Error("Anthropic provider not yet implemented — add @anthropic-ai/sdk");
|
|
}
|
|
}
|
|
|
|
// ── OpenAI-Compatible Provider (stub) ──
|
|
|
|
class OpenAICompatProvider implements MiProvider {
|
|
id = "openai-compat";
|
|
|
|
isAvailable(): boolean {
|
|
return !!(process.env.OPENAI_COMPAT_URL && process.env.OPENAI_COMPAT_KEY);
|
|
}
|
|
|
|
async *stream(_messages: MiMessage[], _model: string): AsyncGenerator<MiStreamChunk> {
|
|
throw new Error("OpenAI-compatible provider not yet implemented");
|
|
}
|
|
}
|
|
|
|
// ── Provider Registry ──
|
|
|
|
export class MiProviderRegistry {
|
|
#providers = new Map<string, MiProvider>();
|
|
|
|
constructor() {
|
|
this.register(new OllamaProvider());
|
|
this.register(new GeminiProvider());
|
|
this.register(new AnthropicProvider());
|
|
this.register(new OpenAICompatProvider());
|
|
}
|
|
|
|
register(provider: MiProvider): void {
|
|
this.#providers.set(provider.id, provider);
|
|
}
|
|
|
|
/** Look up a model by its friendly ID and return the provider + provider-model. */
|
|
resolveModel(modelId: string): { provider: MiProvider; providerModel: string } | null {
|
|
const config = MODEL_REGISTRY.find((m) => m.id === modelId);
|
|
if (!config) return null;
|
|
|
|
const provider = this.#providers.get(config.provider);
|
|
if (!provider || !provider.isAvailable()) return null;
|
|
|
|
return { provider, providerModel: config.providerModel };
|
|
}
|
|
|
|
/** Get provider directly by provider ID (for fallback when modelId is a raw provider model name). */
|
|
getProviderById(providerId: string): MiProvider | null {
|
|
const provider = this.#providers.get(providerId);
|
|
return provider && provider.isAvailable() ? provider : null;
|
|
}
|
|
|
|
/** All models whose provider is currently available. */
|
|
getAvailableModels(): MiModelConfig[] {
|
|
return MODEL_REGISTRY.filter((m) => {
|
|
const p = this.#providers.get(m.provider);
|
|
return p && p.isAvailable();
|
|
});
|
|
}
|
|
|
|
/** MI_MODEL env or first available model. */
|
|
getDefaultModel(): string {
|
|
const envModel = process.env.MI_MODEL;
|
|
if (envModel) {
|
|
// Check if it's a registry ID
|
|
if (MODEL_REGISTRY.find((m) => m.id === envModel)) return envModel;
|
|
// It might be a raw provider model name — find the registry entry
|
|
const entry = MODEL_REGISTRY.find((m) => m.providerModel === envModel);
|
|
if (entry) return entry.id;
|
|
// Unknown — return as-is for backward compat (Ollama will handle it)
|
|
return envModel;
|
|
}
|
|
// First available
|
|
const available = this.getAvailableModels();
|
|
return available[0]?.id || "llama3.2";
|
|
}
|
|
|
|
/**
|
|
* Stream from any provider and normalize to NDJSON `{message:{content:"..."}}` format
|
|
* matching Ollama's wire format — zero changes needed in the client parser.
|
|
*/
|
|
streamToNDJSON(gen: AsyncGenerator<MiStreamChunk>): ReadableStream {
|
|
const encoder = new TextEncoder();
|
|
return new ReadableStream({
|
|
async pull(controller) {
|
|
try {
|
|
const { value, done: iterDone } = await gen.next();
|
|
if (iterDone) {
|
|
controller.close();
|
|
return;
|
|
}
|
|
const payload = JSON.stringify({
|
|
message: { role: "assistant", content: value.content },
|
|
done: value.done,
|
|
});
|
|
controller.enqueue(encoder.encode(payload + "\n"));
|
|
if (value.done) controller.close();
|
|
} catch (err: any) {
|
|
controller.error(err);
|
|
}
|
|
},
|
|
});
|
|
}
|
|
}
|
|
|
|
/** Singleton registry, ready at import time. */
|
|
export const miRegistry = new MiProviderRegistry();
|