From 60c4a6e21919943acfa930ca9927c03d69aa2acd Mon Sep 17 00:00:00 2001 From: Jeff Emmett Date: Thu, 4 Dec 2025 03:47:40 -0800 Subject: [PATCH] Update task task-014 --- ...image-generation-to-reduce-RunPod-costs.md | 295 ++++++++++++++++++ 1 file changed, 295 insertions(+) diff --git a/backlog/tasks/task-014 - Implement-WebGPU-based-local-image-generation-to-reduce-RunPod-costs.md b/backlog/tasks/task-014 - Implement-WebGPU-based-local-image-generation-to-reduce-RunPod-costs.md index 3be4bda..d1add94 100644 --- a/backlog/tasks/task-014 - Implement-WebGPU-based-local-image-generation-to-reduce-RunPod-costs.md +++ b/backlog/tasks/task-014 - Implement-WebGPU-based-local-image-generation-to-reduce-RunPod-costs.md @@ -4,6 +4,7 @@ title: Implement WebGPU-based local image generation to reduce RunPod costs status: To Do assignee: [] created_date: '2025-12-04 11:46' +updated_date: '2025-12-04 11:47' labels: - performance - cost-optimization @@ -54,3 +55,297 @@ Integrate WebGPU-powered browser-based image generation (SD-Turbo) to reduce Run - [ ] #8 Model download progress indicator shown to user - [ ] #9 Works offline after initial model download + +## Implementation Plan + + +## Phase 1: Foundation (Quick Wins) + +### 1.1 WebGPU Capability Detection +**File:** `src/lib/clientConfig.ts` + +```typescript +export async function detectWebGPUCapabilities(): Promise<{ + hasWebGPU: boolean + hasF16: boolean + adapterInfo?: GPUAdapterInfo + estimatedVRAM?: number +}> { + if (!navigator.gpu) { + return { hasWebGPU: false, hasF16: false } + } + + const adapter = await navigator.gpu.requestAdapter() + if (!adapter) { + return { hasWebGPU: false, hasF16: false } + } + + const hasF16 = adapter.features.has('shader-f16') + const adapterInfo = await adapter.requestAdapterInfo() + + return { + hasWebGPU: true, + hasF16, + adapterInfo, + estimatedVRAM: adapterInfo.memoryHeaps?.[0]?.size + } +} +``` + +### 1.2 Install Dependencies +```bash +npm install @anthropic-ai/sdk onnxruntime-web +# Or for transformers.js v3: +npm install @huggingface/transformers +``` + +### 1.3 Vite Config Updates +**File:** `vite.config.ts` +- Ensure WASM/ONNX assets are properly bundled +- Add WebGPU shader compilation support +- Configure chunk splitting for ML models + +--- + +## Phase 2: Browser Diffusion Integration + +### 2.1 Create WebGPU Diffusion Module +**New File:** `src/lib/webgpuDiffusion.ts` + +```typescript +import { pipeline } from '@huggingface/transformers' + +let generator: any = null +let loadingPromise: Promise | null = null + +export async function initSDTurbo( + onProgress?: (progress: number, status: string) => void +): Promise { + if (generator) return + if (loadingPromise) return loadingPromise + + loadingPromise = (async () => { + onProgress?.(0, 'Loading SD-Turbo model...') + + generator = await pipeline( + 'text-to-image', + 'Xenova/sdxl-turbo', // or 'stabilityai/sd-turbo' + { + device: 'webgpu', + dtype: 'fp16', + progress_callback: (p) => onProgress?.(p.progress, p.status) + } + ) + + onProgress?.(100, 'Ready') + })() + + return loadingPromise +} + +export async function generateLocalImage( + prompt: string, + options?: { + width?: number + height?: number + steps?: number + seed?: number + } +): Promise { + if (!generator) { + throw new Error('SD-Turbo not initialized. Call initSDTurbo() first.') + } + + const result = await generator(prompt, { + width: options?.width || 512, + height: options?.height || 512, + num_inference_steps: options?.steps || 1, // SD-Turbo = 1 step + seed: options?.seed + }) + + // Returns base64 data URL + return result[0].image +} + +export function isSDTurboReady(): boolean { + return generator !== null +} + +export async function unloadSDTurbo(): Promise { + generator = null + loadingPromise = null + // Force garbage collection of GPU memory +} +``` + +### 2.2 Create Model Download Manager +**New File:** `src/lib/modelDownloadManager.ts` + +Handle progressive model downloads with: +- IndexedDB caching for persistence +- Progress tracking UI +- Resume capability for interrupted downloads +- Storage quota management + +--- + +## Phase 3: UI Integration + +### 3.1 Update ImageGenShapeUtil +**File:** `src/shapes/ImageGenShapeUtil.tsx` + +Add to shape props: +```typescript +type IImageGen = TLBaseShape<"ImageGen", { + // ... existing props + generationMode: 'auto' | 'local' | 'cloud' // NEW + localModelStatus: 'not-loaded' | 'loading' | 'ready' | 'error' // NEW + localModelProgress: number // NEW (0-100) +}> +``` + +Add UI toggle: +```tsx +
+ + +
+``` + +### 3.2 Smart Generation Logic +```typescript +const generateImage = async (prompt: string) => { + const mode = shape.props.generationMode + const capabilities = await detectWebGPUCapabilities() + + // Auto mode: local for iterations, cloud for final + if (mode === 'auto' || mode === 'local') { + if (capabilities.hasWebGPU && isSDTurboReady()) { + // Generate locally - instant! + const imageUrl = await generateLocalImage(prompt) + updateShape({ imageUrl, source: 'local' }) + return + } + } + + // Fall back to RunPod + await generateWithRunPod(prompt) +} +``` + +--- + +## Phase 4: AI Orchestrator Integration + +### 4.1 Update aiOrchestrator.ts +**File:** `src/lib/aiOrchestrator.ts` + +Add browser as compute target: +```typescript +type ComputeTarget = 'browser' | 'netcup' | 'runpod' + +interface ImageGenerationOptions { + prompt: string + priority: 'draft' | 'final' + preferLocal?: boolean +} + +async function generateImage(options: ImageGenerationOptions) { + const { hasWebGPU } = await detectWebGPUCapabilities() + + // Routing logic + if (options.priority === 'draft' && hasWebGPU && isSDTurboReady()) { + return { target: 'browser', cost: 0 } + } + + if (options.priority === 'final') { + return { target: 'runpod', cost: 0.02 } + } + + // Fallback chain + return { target: 'runpod', cost: 0.02 } +} +``` + +--- + +## Phase 5: Advanced Features (Future) + +### 5.1 Real-time img2img Refinement +- Start with browser SD-Turbo draft +- User adjusts/annotates +- Send to RunPod SDXL for final with img2img + +### 5.2 Browser-based Upscaling +- Add Real-ESRGAN-lite via ONNX Runtime +- 2x/4x upscale locally before cloud render + +### 5.3 Background Removal +- U2Net in browser via transformers.js +- Zero-cost background removal + +### 5.4 Style Transfer +- Fast neural style transfer via WebGPU shaders +- Real-time preview on canvas + +--- + +## Technical Considerations + +### Model Sizes +| Model | Size | Load Time | Generation | +|-------|------|-----------|------------| +| SD-Turbo | ~2GB | 30-60s (first) | 1-3s | +| SD-Turbo (quantized) | ~1GB | 15-30s | 2-4s | + +### Memory Management +- Unload model when tab backgrounded +- Clear GPU memory on low-memory warnings +- IndexedDB for model caching (survives refresh) + +### Error Handling +- Graceful degradation to WASM if WebGPU fails +- Clear error messages for unsupported browsers +- Automatic fallback to RunPod on local failure + +--- + +## Files to Create/Modify + +**New Files:** +- `src/lib/webgpuDiffusion.ts` - SD-Turbo wrapper +- `src/lib/modelDownloadManager.ts` - Model caching +- `src/lib/webgpuCapabilities.ts` - Detection utilities +- `src/components/ModelDownloadProgress.tsx` - UI component + +**Modified Files:** +- `src/lib/clientConfig.ts` - Add WebGPU detection +- `src/lib/aiOrchestrator.ts` - Add browser routing +- `src/shapes/ImageGenShapeUtil.tsx` - Add mode toggle +- `vite.config.ts` - ONNX/WASM config +- `package.json` - New dependencies + +--- + +## Testing Checklist + +- [ ] WebGPU detection works on Chrome, Edge, Firefox +- [ ] WASM fallback works on Safari/older browsers +- [ ] Model downloads and caches correctly +- [ ] Generation completes in <5s on modern GPU +- [ ] Memory cleaned up properly on unload +- [ ] Offline generation works after model cached +- [ ] RunPod fallback triggers correctly +- [ ] Cost tracking reflects local vs cloud usage +