114 lines
2.9 KiB
TypeScript
114 lines
2.9 KiB
TypeScript
/**
|
|
* Article unlock strategies — find readable/archived versions of paywalled articles.
|
|
* Three strategies tried in sequence: Wayback Machine, Google Cache, archive.ph.
|
|
*/
|
|
|
|
interface UnlockResult {
|
|
success: boolean;
|
|
strategy?: string;
|
|
archiveUrl?: string;
|
|
error?: string;
|
|
}
|
|
|
|
/** Try the Wayback Machine (web.archive.org). */
|
|
async function tryWaybackMachine(url: string): Promise<UnlockResult> {
|
|
try {
|
|
const apiUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`;
|
|
const res = await fetch(apiUrl, { signal: AbortSignal.timeout(10000) });
|
|
if (!res.ok) return { success: false };
|
|
|
|
const data = await res.json();
|
|
const snapshot = data?.archived_snapshots?.closest;
|
|
|
|
if (snapshot?.available && snapshot.url) {
|
|
return {
|
|
success: true,
|
|
strategy: "Wayback Machine",
|
|
archiveUrl: snapshot.url.replace(/^http:/, "https:"),
|
|
};
|
|
}
|
|
return { success: false };
|
|
} catch {
|
|
return { success: false };
|
|
}
|
|
}
|
|
|
|
/** Try Google Cache. */
|
|
async function tryGoogleCache(url: string): Promise<UnlockResult> {
|
|
try {
|
|
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
|
|
const res = await fetch(cacheUrl, {
|
|
signal: AbortSignal.timeout(10000),
|
|
redirect: "manual",
|
|
});
|
|
|
|
// Google cache returns 200 if cached, redirects/errors otherwise
|
|
if (res.status === 200) {
|
|
return {
|
|
success: true,
|
|
strategy: "Google Cache",
|
|
archiveUrl: cacheUrl,
|
|
};
|
|
}
|
|
return { success: false };
|
|
} catch {
|
|
return { success: false };
|
|
}
|
|
}
|
|
|
|
/** Try archive.ph (archive.today). */
|
|
async function tryArchivePh(url: string): Promise<UnlockResult> {
|
|
try {
|
|
const checkUrl = `https://archive.ph/newest/${url}`;
|
|
const res = await fetch(checkUrl, {
|
|
signal: AbortSignal.timeout(10000),
|
|
redirect: "manual",
|
|
});
|
|
|
|
// archive.ph returns 302 redirect to the archived page if it exists
|
|
if (res.status === 301 || res.status === 302) {
|
|
const location = res.headers.get("location");
|
|
if (location) {
|
|
return {
|
|
success: true,
|
|
strategy: "archive.ph",
|
|
archiveUrl: location,
|
|
};
|
|
}
|
|
}
|
|
// Sometimes it returns 200 directly with the archived content
|
|
if (res.status === 200) {
|
|
return {
|
|
success: true,
|
|
strategy: "archive.ph",
|
|
archiveUrl: checkUrl,
|
|
};
|
|
}
|
|
return { success: false };
|
|
} catch {
|
|
return { success: false };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Try all unlock strategies in sequence. Returns the first successful result.
|
|
*/
|
|
export async function unlockArticle(url: string): Promise<UnlockResult> {
|
|
// Validate URL
|
|
try {
|
|
new URL(url);
|
|
} catch {
|
|
return { success: false, error: "Invalid URL" };
|
|
}
|
|
|
|
// Try strategies in order
|
|
const strategies = [tryWaybackMachine, tryGoogleCache, tryArchivePh];
|
|
|
|
for (const strategy of strategies) {
|
|
const result = await strategy(url);
|
|
if (result.success) return result;
|
|
}
|
|
|
|
return { success: false, error: "No archived version found" };
|
|
}
|