rspace-online/server/security.ts

/**
 * Security Middleware — bot protection, rate limiting, and MCP guard.
 *
 * Layer 2 defense (Hono-level). Layer 1 is Traefik rate limiting at the edge.
 *
 * Exports:
 *   createSecurityMiddleware(opts) → Hono MiddlewareHandler
 *   mcpGuard                      → Hono MiddlewareHandler for /api/mcp/*
 */

import type { MiddlewareHandler } from "hono";

// ── IP extraction (Cloudflare tunnel → X-Forwarded-For fallback) ──

function getClientIP(headers: Headers): string {
	return (
		headers.get("cf-connecting-ip") ||
		headers.get("x-forwarded-for")?.split(",")[0].trim() ||
		"unknown"
	);
}

// ── User-Agent filtering ──

const BAD_UA_PATTERNS = [
	"scrapy", "python-requests", "masscan", "nikto", "sqlmap", "zgrab",
	"nmap", "libwww-perl", "mj12bot", "ahrefsbot", "semrushbot", "dotbot",
	"blexbot", "petalbot", "dataforseobot",
];

const ALLOW_UA_PATTERNS = [
	"mozilla/5.0", "applewebkit", "gecko", "claude", "anthropic",
	"chatgpt", "gptbot", "openai", "mcp-client",
];

function isBadUA(ua: string): boolean {
	if (!ua) return true; // empty UA = suspicious
	const lower = ua.toLowerCase();
	// Check allow list first
	for (const allow of ALLOW_UA_PATTERNS) {
		if (lower.includes(allow)) return false;
	}
	// Check block list
	for (const bad of BAD_UA_PATTERNS) {
		if (lower.includes(bad)) return true;
	}
	return false;
}

// ── Sliding-window rate limiter (in-memory, no packages) ──

interface RateBucket {
	timestamps: number[];
}

const rateBuckets = new Map<string, RateBucket>();

// Cleanup stale buckets every 5 minutes
setInterval(() => {
	const cutoff = Date.now() - 120_000; // 2min window
	for (const [key, bucket] of rateBuckets) {
		bucket.timestamps = bucket.timestamps.filter((t) => t > cutoff);
		if (bucket.timestamps.length === 0) rateBuckets.delete(key);
	}
}, 5 * 60 * 1000);

function checkRateLimit(key: string, maxPerMinute: number): boolean {
	const now = Date.now();
	const windowStart = now - 60_000;

	let bucket = rateBuckets.get(key);
	if (!bucket) {
		bucket = { timestamps: [] };
		rateBuckets.set(key, bucket);
	}

	// Prune old entries
	bucket.timestamps = bucket.timestamps.filter((t) => t > windowStart);

	if (bucket.timestamps.length >= maxPerMinute) {
		return false; // rate limited
	}

	bucket.timestamps.push(now);
	return true; // allowed
}

// ── Rate limit tiers ──

interface RateLimitTier {
	pattern: RegExp;
	anonymous: number;
	authenticated: number;
}

const RATE_TIERS: RateLimitTier[] = [
	{ pattern: /^\/api\/mi\//, anonymous: 10, authenticated: 30 },
	{ pattern: /^\/api\/mcp/, anonymous: 30, authenticated: 120 },
	{ pattern: /^\/api\/auth\//, anonymous: 10, authenticated: 10 },
	{ pattern: /^\/api\//, anonymous: 60, authenticated: 300 },
];

function getTier(path: string): RateLimitTier {
	for (const tier of RATE_TIERS) {
		if (tier.pattern.test(path)) return tier;
	}
	return RATE_TIERS[RATE_TIERS.length - 1]; // general /api/* fallback
}

// ── Main security middleware ──

export interface SecurityMiddlewareOpts {
	/** Skip rate limiting for these paths */
	skipPaths?: string[];
}

const RATE_LIMIT_DISABLED = process.env.DISABLE_RATE_LIMIT === "1" || process.env.NODE_ENV === "test";

export function createSecurityMiddleware(
	opts: SecurityMiddlewareOpts = {},
): MiddlewareHandler {
	return async (c, next) => {
		const path = c.req.path;

		// Skip non-API routes
		if (!path.startsWith("/api/")) return next();

		// Skip configured paths
		if (opts.skipPaths?.some((p) => path.startsWith(p))) return next();

		// ── UA filter ──
		const ua = c.req.header("user-agent") || "";
		if (isBadUA(ua)) {
			return c.json({ error: "Forbidden" }, 403);
		}

		// ── Rate limiting ──
		if (!RATE_LIMIT_DISABLED) {
			const ip = getClientIP(c.req.raw.headers);
			const hasAuth = !!c.req.header("authorization")?.startsWith("Bearer ");
			const tier = getTier(path);
			const limit = hasAuth ? tier.authenticated : tier.anonymous;
			const bucketKey = `${ip}:${tier.pattern.source}`;

			if (!checkRateLimit(bucketKey, limit)) {
				return c.json({ error: "Too many requests" }, 429);
			}
		}

		return next();
	};
}

// ── MCP endpoint guard ──

const AGENT_UA_PATTERNS = [
	"claude", "anthropic", "openai", "gemini", "mcp-client", "litellm",
	"chatgpt", "gptbot",
];

export const mcpGuard: MiddlewareHandler = async (c, next) => {
	// Allow if Bearer token present
	if (c.req.header("authorization")?.startsWith("Bearer ")) return next();

	// Allow if internal key matches
	const internalKey = process.env.INTERNAL_API_KEY;
	if (internalKey && c.req.header("x-internal-key") === internalKey) return next();

	// Allow known agent UAs
	const ua = (c.req.header("user-agent") || "").toLowerCase();
	for (const pattern of AGENT_UA_PATTERNS) {
		if (ua.includes(pattern)) return next();
	}

	return c.json({ error: "MCP endpoint requires authentication" }, 401);
};