diff --git a/Dockerfile.aggregator b/Dockerfile.aggregator new file mode 100644 index 0000000..e841850 --- /dev/null +++ b/Dockerfile.aggregator @@ -0,0 +1,28 @@ +# Backlog Aggregator with Gitea Scanner +FROM oven/bun:1 AS base + +WORKDIR /app + +# Install git for repo operations +RUN apt-get update && apt-get install -y git cron && rm -rf /var/lib/apt/lists/* + +# Copy backlog-md source (mounted or copied) +COPY --from=backlog-md /app /app + +# Copy the gitea scanner +COPY gitea-scanner.ts /app/src/aggregator/gitea-scanner.ts +COPY entrypoint.sh /app/entrypoint.sh + +RUN chmod +x /app/entrypoint.sh + +# Create cron job for daily Gitea sync (runs at 2 AM) +RUN echo "0 2 * * * cd /app && bun run src/aggregator/gitea-scanner.ts --verbose >> /var/log/gitea-scanner.log 2>&1" > /etc/cron.d/gitea-scanner \ + && chmod 0644 /etc/cron.d/gitea-scanner \ + && crontab /etc/cron.d/gitea-scanner + +# Create log file +RUN touch /var/log/gitea-scanner.log + +EXPOSE 6420 + +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/docker-compose.aggregator.yml b/docker-compose.aggregator.yml new file mode 100644 index 0000000..fc23033 --- /dev/null +++ b/docker-compose.aggregator.yml @@ -0,0 +1,35 @@ +version: "3.8" + +services: + backlog-aggregator: + build: + context: . + dockerfile: Dockerfile.aggregator + container_name: backlog-aggregator + restart: unless-stopped + ports: + - "6420:6420" + volumes: + # Existing project directories + - /opt/websites:/projects/websites:rw + - /opt/apps:/projects/apps:rw + # Gitea-synced repos (new) + - /opt/gitea-repos:/projects/gitea:rw + # SSH keys for git operations + - /root/.ssh:/root/.ssh:ro + environment: + - NODE_ENV=production + - GITEA_URL=https://gitea.jeffemmett.com + - GITEA_OWNER=jeffemmett + - GITEA_OUTPUT_DIR=/projects/gitea + - GITEA_SSH_KEY=/root/.ssh/gitea_ed25519 + labels: + - "traefik.enable=true" + - "traefik.http.routers.backlog.rule=Host(`backlog.jeffemmett.com`)" + - "traefik.http.services.backlog.loadbalancer.server.port=6420" + networks: + - traefik-public + +networks: + traefik-public: + external: true diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..4518cd3 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/bash +set -e + +# Start cron daemon in background +cron + +# Configure git for SSH +mkdir -p /root/.ssh +chmod 700 /root/.ssh + +# Trust gitea.jeffemmett.com host key +ssh-keyscan -p 223 gitea.jeffemmett.com >> /root/.ssh/known_hosts 2>/dev/null || true + +# Configure git to use SSH key +if [ -f "/root/.ssh/gitea_ed25519" ]; then + export GIT_SSH_COMMAND="ssh -i /root/.ssh/gitea_ed25519 -o StrictHostKeyChecking=no" +fi + +# Run initial Gitea scan on startup +echo "Running initial Gitea repository scan..." +cd /app && bun run src/aggregator/gitea-scanner.ts --verbose || echo "Initial scan failed, will retry via cron" + +# Start the aggregator with all project paths +exec bun run src/aggregator/index.ts --port 6420 --paths "/projects/websites,/projects/apps,/projects/gitea" diff --git a/gitea-scanner.ts b/gitea-scanner.ts new file mode 100644 index 0000000..a147c41 --- /dev/null +++ b/gitea-scanner.ts @@ -0,0 +1,298 @@ +/** + * Gitea Repository Scanner for Backlog Aggregator + * + * Scans all repositories in a Gitea instance for backlog/ directories + * and clones/pulls them to a local directory for the aggregator to watch. + * + * Usage: + * bun run gitea-scanner.ts --gitea-url https://gitea.example.com --output /opt/gitea-repos + * + * Environment variables: + * GITEA_URL - Gitea instance URL + * GITEA_TOKEN - API token (optional, for private repos) + * GITEA_OUTPUT_DIR - Directory to clone repos to + */ + +import { $ } from "bun"; +import { mkdir, readdir, stat, rm } from "node:fs/promises"; +import { join } from "node:path"; + +interface GiteaRepo { + id: number; + name: string; + full_name: string; + clone_url: string; + ssh_url: string; + html_url: string; + private: boolean; + empty: boolean; + archived: boolean; + default_branch: string; +} + +interface GiteaContent { + name: string; + path: string; + type: "file" | "dir"; +} + +interface ScannerConfig { + giteaUrl: string; + giteaToken?: string; + outputDir: string; + sshKeyPath?: string; + owner?: string; // Optional: only scan repos from this owner + concurrency: number; + verbose: boolean; +} + +class GiteaScanner { + private config: ScannerConfig; + private headers: Record; + + constructor(config: ScannerConfig) { + this.config = config; + this.headers = { + "Accept": "application/json", + }; + if (config.giteaToken) { + this.headers["Authorization"] = `token ${config.giteaToken}`; + } + } + + private async fetchJson(endpoint: string): Promise { + const url = `${this.config.giteaUrl}/api/v1${endpoint}`; + try { + const response = await fetch(url, { headers: this.headers }); + if (!response.ok) { + if (this.config.verbose) { + console.warn(`API request failed: ${url} (${response.status})`); + } + return null; + } + return await response.json() as T; + } catch (error) { + if (this.config.verbose) { + console.warn(`API request error: ${url}`, error); + } + return null; + } + } + + async getAllRepos(): Promise { + const allRepos: GiteaRepo[] = []; + let page = 1; + const limit = 50; + + while (true) { + const endpoint = this.config.owner + ? `/users/${this.config.owner}/repos?page=${page}&limit=${limit}` + : `/repos/search?page=${page}&limit=${limit}`; + + const repos = await this.fetchJson(endpoint); + + if (!repos) break; + + // Handle both direct array and {data: []} response formats + const repoList = Array.isArray(repos) ? repos : (repos.data || []); + + if (repoList.length === 0) break; + + // Filter out empty and archived repos + const activeRepos = repoList.filter(r => !r.empty && !r.archived); + allRepos.push(...activeRepos); + + if (repoList.length < limit) break; + page++; + } + + return allRepos; + } + + async hasBacklogDir(repo: GiteaRepo): Promise { + // Check if repo has a backlog/ directory at root + const contents = await this.fetchJson( + `/repos/${repo.full_name}/contents` + ); + + if (!contents || !Array.isArray(contents)) return false; + + return contents.some(item => item.name === "backlog" && item.type === "dir"); + } + + async cloneOrPullRepo(repo: GiteaRepo): Promise { + const repoDir = join(this.config.outputDir, repo.name); + + try { + // Check if already cloned + const exists = await stat(repoDir).then(() => true).catch(() => false); + + if (exists) { + // Pull latest changes + if (this.config.verbose) { + console.log(`Pulling ${repo.full_name}...`); + } + const result = await $`cd ${repoDir} && git pull --ff-only 2>&1`.quiet(); + if (result.exitCode !== 0) { + console.warn(`Failed to pull ${repo.full_name}: ${result.stderr}`); + // Try to reset and pull + await $`cd ${repoDir} && git fetch origin && git reset --hard origin/${repo.default_branch} 2>&1`.quiet(); + } + } else { + // Clone the repo + if (this.config.verbose) { + console.log(`Cloning ${repo.full_name}...`); + } + + // Use SSH URL if we have an SSH key configured, otherwise HTTPS + const cloneUrl = this.config.sshKeyPath ? repo.ssh_url : repo.clone_url; + + const result = await $`git clone --depth 1 ${cloneUrl} ${repoDir} 2>&1`.quiet(); + if (result.exitCode !== 0) { + console.warn(`Failed to clone ${repo.full_name}: ${result.stderr}`); + return false; + } + } + + return true; + } catch (error) { + console.error(`Error processing ${repo.full_name}:`, error); + return false; + } + } + + async cleanupStaleRepos(validRepoNames: Set): Promise { + try { + const entries = await readdir(this.config.outputDir, { withFileTypes: true }); + + for (const entry of entries) { + if (!entry.isDirectory()) continue; + if (entry.name.startsWith(".")) continue; + + if (!validRepoNames.has(entry.name)) { + const repoDir = join(this.config.outputDir, entry.name); + console.log(`Removing stale repo: ${entry.name}`); + await rm(repoDir, { recursive: true, force: true }); + } + } + } catch (error) { + console.warn("Error cleaning up stale repos:", error); + } + } + + async scan(): Promise<{ total: number; withBacklog: number; synced: number }> { + console.log(`Scanning Gitea at ${this.config.giteaUrl}...`); + + // Ensure output directory exists + await mkdir(this.config.outputDir, { recursive: true }); + + // Get all repos + const repos = await this.getAllRepos(); + console.log(`Found ${repos.length} repositories`); + + // Check which repos have backlog directories + const reposWithBacklog: GiteaRepo[] = []; + + // Process in batches for concurrency control + const batchSize = this.config.concurrency; + for (let i = 0; i < repos.length; i += batchSize) { + const batch = repos.slice(i, i + batchSize); + const results = await Promise.all( + batch.map(async (repo) => { + const hasBacklog = await this.hasBacklogDir(repo); + return { repo, hasBacklog }; + }) + ); + + for (const { repo, hasBacklog } of results) { + if (hasBacklog) { + reposWithBacklog.push(repo); + if (this.config.verbose) { + console.log(` ✓ ${repo.full_name} has backlog/`); + } + } + } + } + + console.log(`Found ${reposWithBacklog.length} repositories with backlog/`); + + // Clone or pull repos with backlog + let synced = 0; + for (const repo of reposWithBacklog) { + const success = await this.cloneOrPullRepo(repo); + if (success) synced++; + } + + // Cleanup repos that no longer have backlog or were deleted + const validNames = new Set(reposWithBacklog.map(r => r.name)); + await this.cleanupStaleRepos(validNames); + + console.log(`Synced ${synced}/${reposWithBacklog.length} repositories`); + + return { + total: repos.length, + withBacklog: reposWithBacklog.length, + synced, + }; + } +} + +// Parse CLI arguments +function parseArgs(): ScannerConfig { + const args = process.argv.slice(2); + + const getArg = (name: string, envVar: string, defaultValue?: string): string | undefined => { + const index = args.indexOf(`--${name}`); + if (index !== -1 && args[index + 1]) { + return args[index + 1]; + } + return process.env[envVar] || defaultValue; + }; + + const giteaUrl = getArg("gitea-url", "GITEA_URL", "https://gitea.jeffemmett.com"); + const giteaToken = getArg("gitea-token", "GITEA_TOKEN"); + const outputDir = getArg("output", "GITEA_OUTPUT_DIR", "/opt/gitea-repos"); + const sshKeyPath = getArg("ssh-key", "GITEA_SSH_KEY", "/root/.ssh/gitea_ed25519"); + const owner = getArg("owner", "GITEA_OWNER", "jeffemmett"); + const concurrency = parseInt(getArg("concurrency", "GITEA_CONCURRENCY", "5") || "5", 10); + const verbose = args.includes("--verbose") || args.includes("-v"); + + if (!giteaUrl) { + console.error("Error: --gitea-url or GITEA_URL is required"); + process.exit(1); + } + + if (!outputDir) { + console.error("Error: --output or GITEA_OUTPUT_DIR is required"); + process.exit(1); + } + + return { + giteaUrl, + giteaToken, + outputDir, + sshKeyPath, + owner, + concurrency, + verbose, + }; +} + +// Main entry point +if (import.meta.main) { + const config = parseArgs(); + const scanner = new GiteaScanner(config); + + try { + const result = await scanner.scan(); + console.log("\nScan complete:"); + console.log(` Total repos: ${result.total}`); + console.log(` With backlog/: ${result.withBacklog}`); + console.log(` Successfully synced: ${result.synced}`); + } catch (error) { + console.error("Scan failed:", error); + process.exit(1); + } +} + +export { GiteaScanner, type ScannerConfig };