rspace-online/lib/article-unlock.ts

114 lines
2.9 KiB
TypeScript

/**
* Article unlock strategies — find readable/archived versions of paywalled articles.
* Three strategies tried in sequence: Wayback Machine, Google Cache, archive.ph.
*/
interface UnlockResult {
success: boolean;
strategy?: string;
archiveUrl?: string;
error?: string;
}
/** Try the Wayback Machine (web.archive.org). */
async function tryWaybackMachine(url: string): Promise<UnlockResult> {
try {
const apiUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`;
const res = await fetch(apiUrl, { signal: AbortSignal.timeout(10000) });
if (!res.ok) return { success: false };
const data = await res.json();
const snapshot = data?.archived_snapshots?.closest;
if (snapshot?.available && snapshot.url) {
return {
success: true,
strategy: "Wayback Machine",
archiveUrl: snapshot.url.replace(/^http:/, "https:"),
};
}
return { success: false };
} catch {
return { success: false };
}
}
/** Try Google Cache. */
async function tryGoogleCache(url: string): Promise<UnlockResult> {
try {
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
const res = await fetch(cacheUrl, {
signal: AbortSignal.timeout(10000),
redirect: "manual",
});
// Google cache returns 200 if cached, redirects/errors otherwise
if (res.status === 200) {
return {
success: true,
strategy: "Google Cache",
archiveUrl: cacheUrl,
};
}
return { success: false };
} catch {
return { success: false };
}
}
/** Try archive.ph (archive.today). */
async function tryArchivePh(url: string): Promise<UnlockResult> {
try {
const checkUrl = `https://archive.ph/newest/${url}`;
const res = await fetch(checkUrl, {
signal: AbortSignal.timeout(10000),
redirect: "manual",
});
// archive.ph returns 302 redirect to the archived page if it exists
if (res.status === 301 || res.status === 302) {
const location = res.headers.get("location");
if (location) {
return {
success: true,
strategy: "archive.ph",
archiveUrl: location,
};
}
}
// Sometimes it returns 200 directly with the archived content
if (res.status === 200) {
return {
success: true,
strategy: "archive.ph",
archiveUrl: checkUrl,
};
}
return { success: false };
} catch {
return { success: false };
}
}
/**
* Try all unlock strategies in sequence. Returns the first successful result.
*/
export async function unlockArticle(url: string): Promise<UnlockResult> {
// Validate URL
try {
new URL(url);
} catch {
return { success: false, error: "Invalid URL" };
}
// Try strategies in order
const strategies = [tryWaybackMachine, tryGoogleCache, tryArchivePh];
for (const strategy of strategies) {
const result = await strategy(url);
if (result.success) return result;
}
return { success: false, error: "No archived version found" };
}