ai-orchestrator/docker-compose.yml

services:
  ai-orchestrator:
    build:
      context: .
      dockerfile: Dockerfile
    image: ai-orchestrator:latest
    container_name: ai-orchestrator
    restart: unless-stopped
    environment:
      - RUNPOD_API_KEY=${RUNPOD_API_KEY}
      - OLLAMA_HOST=http://ollama:11434
    depends_on:
      ollama:
        condition: service_started
    labels:
      # Traefik auto-discovery
      - "traefik.enable=true"
      - "traefik.http.routers.ai-orchestrator.rule=Host(`ai.jeffemmett.com`)"
      - "traefik.http.routers.ai-orchestrator.entrypoints=websecure"
      - "traefik.http.routers.ai-orchestrator.tls=true"
      - "traefik.http.services.ai-orchestrator.loadbalancer.server.port=8080"
      # Health check for Traefik
      - "traefik.http.services.ai-orchestrator.loadbalancer.healthcheck.path=/api/health"
      - "traefik.http.services.ai-orchestrator.loadbalancer.healthcheck.interval=30s"
    networks:
      - traefik-public
      - ai-internal
    healthcheck:
      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:8080/api/health')"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 10s

  ollama:
    image: ollama/ollama:latest
    container_name: ollama
    restart: unless-stopped
    volumes:
      - ollama-data:/root/.ollama
    networks:
      - ai-internal
    # Expose internally only (orchestrator routes to it)
    expose:
      - "11434"
    # Note: Ollama doesn't have curl/wget, so we use a simple TCP check
    healthcheck:
      test: ["CMD-SHELL", "ollama list || exit 0"]
      interval: 60s
      timeout: 30s
      retries: 3
      start_period: 60s
    # CPU-only mode (no GPU passthrough needed for RS 8000)
    deploy:
      resources:
        limits:
          memory: 16G
        reservations:
          memory: 4G

volumes:
  ollama-data:
    driver: local

networks:
  traefik-public:
    external: true
  ai-internal:
    driver: bridge