folkjs-migration/gitea-scanner.ts

299 lines
8.2 KiB
TypeScript

/**
* Gitea Repository Scanner for Backlog Aggregator
*
* Scans all repositories in a Gitea instance for backlog/ directories
* and clones/pulls them to a local directory for the aggregator to watch.
*
* Usage:
* bun run gitea-scanner.ts --gitea-url https://gitea.example.com --output /opt/gitea-repos
*
* Environment variables:
* GITEA_URL - Gitea instance URL
* GITEA_TOKEN - API token (optional, for private repos)
* GITEA_OUTPUT_DIR - Directory to clone repos to
*/
import { $ } from "bun";
import { mkdir, readdir, stat, rm } from "node:fs/promises";
import { join } from "node:path";
interface GiteaRepo {
id: number;
name: string;
full_name: string;
clone_url: string;
ssh_url: string;
html_url: string;
private: boolean;
empty: boolean;
archived: boolean;
default_branch: string;
}
interface GiteaContent {
name: string;
path: string;
type: "file" | "dir";
}
interface ScannerConfig {
giteaUrl: string;
giteaToken?: string;
outputDir: string;
sshKeyPath?: string;
owner?: string; // Optional: only scan repos from this owner
concurrency: number;
verbose: boolean;
}
class GiteaScanner {
private config: ScannerConfig;
private headers: Record<string, string>;
constructor(config: ScannerConfig) {
this.config = config;
this.headers = {
"Accept": "application/json",
};
if (config.giteaToken) {
this.headers["Authorization"] = `token ${config.giteaToken}`;
}
}
private async fetchJson<T>(endpoint: string): Promise<T | null> {
const url = `${this.config.giteaUrl}/api/v1${endpoint}`;
try {
const response = await fetch(url, { headers: this.headers });
if (!response.ok) {
if (this.config.verbose) {
console.warn(`API request failed: ${url} (${response.status})`);
}
return null;
}
return await response.json() as T;
} catch (error) {
if (this.config.verbose) {
console.warn(`API request error: ${url}`, error);
}
return null;
}
}
async getAllRepos(): Promise<GiteaRepo[]> {
const allRepos: GiteaRepo[] = [];
let page = 1;
const limit = 50;
while (true) {
const endpoint = this.config.owner
? `/users/${this.config.owner}/repos?page=${page}&limit=${limit}`
: `/repos/search?page=${page}&limit=${limit}`;
const repos = await this.fetchJson<GiteaRepo[] | { data: GiteaRepo[] }>(endpoint);
if (!repos) break;
// Handle both direct array and {data: []} response formats
const repoList = Array.isArray(repos) ? repos : (repos.data || []);
if (repoList.length === 0) break;
// Filter out empty and archived repos
const activeRepos = repoList.filter(r => !r.empty && !r.archived);
allRepos.push(...activeRepos);
if (repoList.length < limit) break;
page++;
}
return allRepos;
}
async hasBacklogDir(repo: GiteaRepo): Promise<boolean> {
// Check if repo has a backlog/ directory at root
const contents = await this.fetchJson<GiteaContent[]>(
`/repos/${repo.full_name}/contents`
);
if (!contents || !Array.isArray(contents)) return false;
return contents.some(item => item.name === "backlog" && item.type === "dir");
}
async cloneOrPullRepo(repo: GiteaRepo): Promise<boolean> {
const repoDir = join(this.config.outputDir, repo.name);
try {
// Check if already cloned
const exists = await stat(repoDir).then(() => true).catch(() => false);
if (exists) {
// Pull latest changes
if (this.config.verbose) {
console.log(`Pulling ${repo.full_name}...`);
}
const result = await $`cd ${repoDir} && git pull --ff-only 2>&1`.quiet();
if (result.exitCode !== 0) {
console.warn(`Failed to pull ${repo.full_name}: ${result.stderr}`);
// Try to reset and pull
await $`cd ${repoDir} && git fetch origin && git reset --hard origin/${repo.default_branch} 2>&1`.quiet();
}
} else {
// Clone the repo
if (this.config.verbose) {
console.log(`Cloning ${repo.full_name}...`);
}
// Use SSH URL if we have an SSH key configured, otherwise HTTPS
const cloneUrl = this.config.sshKeyPath ? repo.ssh_url : repo.clone_url;
const result = await $`git clone --depth 1 ${cloneUrl} ${repoDir} 2>&1`.quiet();
if (result.exitCode !== 0) {
console.warn(`Failed to clone ${repo.full_name}: ${result.stderr}`);
return false;
}
}
return true;
} catch (error) {
console.error(`Error processing ${repo.full_name}:`, error);
return false;
}
}
async cleanupStaleRepos(validRepoNames: Set<string>): Promise<void> {
try {
const entries = await readdir(this.config.outputDir, { withFileTypes: true });
for (const entry of entries) {
if (!entry.isDirectory()) continue;
if (entry.name.startsWith(".")) continue;
if (!validRepoNames.has(entry.name)) {
const repoDir = join(this.config.outputDir, entry.name);
console.log(`Removing stale repo: ${entry.name}`);
await rm(repoDir, { recursive: true, force: true });
}
}
} catch (error) {
console.warn("Error cleaning up stale repos:", error);
}
}
async scan(): Promise<{ total: number; withBacklog: number; synced: number }> {
console.log(`Scanning Gitea at ${this.config.giteaUrl}...`);
// Ensure output directory exists
await mkdir(this.config.outputDir, { recursive: true });
// Get all repos
const repos = await this.getAllRepos();
console.log(`Found ${repos.length} repositories`);
// Check which repos have backlog directories
const reposWithBacklog: GiteaRepo[] = [];
// Process in batches for concurrency control
const batchSize = this.config.concurrency;
for (let i = 0; i < repos.length; i += batchSize) {
const batch = repos.slice(i, i + batchSize);
const results = await Promise.all(
batch.map(async (repo) => {
const hasBacklog = await this.hasBacklogDir(repo);
return { repo, hasBacklog };
})
);
for (const { repo, hasBacklog } of results) {
if (hasBacklog) {
reposWithBacklog.push(repo);
if (this.config.verbose) {
console.log(`${repo.full_name} has backlog/`);
}
}
}
}
console.log(`Found ${reposWithBacklog.length} repositories with backlog/`);
// Clone or pull repos with backlog
let synced = 0;
for (const repo of reposWithBacklog) {
const success = await this.cloneOrPullRepo(repo);
if (success) synced++;
}
// Cleanup repos that no longer have backlog or were deleted
const validNames = new Set(reposWithBacklog.map(r => r.name));
await this.cleanupStaleRepos(validNames);
console.log(`Synced ${synced}/${reposWithBacklog.length} repositories`);
return {
total: repos.length,
withBacklog: reposWithBacklog.length,
synced,
};
}
}
// Parse CLI arguments
function parseArgs(): ScannerConfig {
const args = process.argv.slice(2);
const getArg = (name: string, envVar: string, defaultValue?: string): string | undefined => {
const index = args.indexOf(`--${name}`);
if (index !== -1 && args[index + 1]) {
return args[index + 1];
}
return process.env[envVar] || defaultValue;
};
const giteaUrl = getArg("gitea-url", "GITEA_URL", "https://gitea.jeffemmett.com");
const giteaToken = getArg("gitea-token", "GITEA_TOKEN");
const outputDir = getArg("output", "GITEA_OUTPUT_DIR", "/opt/gitea-repos");
const sshKeyPath = getArg("ssh-key", "GITEA_SSH_KEY", "/root/.ssh/gitea_ed25519");
const owner = getArg("owner", "GITEA_OWNER", "jeffemmett");
const concurrency = parseInt(getArg("concurrency", "GITEA_CONCURRENCY", "5") || "5", 10);
const verbose = args.includes("--verbose") || args.includes("-v");
if (!giteaUrl) {
console.error("Error: --gitea-url or GITEA_URL is required");
process.exit(1);
}
if (!outputDir) {
console.error("Error: --output or GITEA_OUTPUT_DIR is required");
process.exit(1);
}
return {
giteaUrl,
giteaToken,
outputDir,
sshKeyPath,
owner,
concurrency,
verbose,
};
}
// Main entry point
if (import.meta.main) {
const config = parseArgs();
const scanner = new GiteaScanner(config);
try {
const result = await scanner.scan();
console.log("\nScan complete:");
console.log(` Total repos: ${result.total}`);
console.log(` With backlog/: ${result.withBacklog}`);
console.log(` Successfully synced: ${result.synced}`);
} catch (error) {
console.error("Scan failed:", error);
process.exit(1);
}
}
export { GiteaScanner, type ScannerConfig };