feat: add article unlock feature for paywalled content
Multi-strategy approach to find readable versions of paywalled articles: 1. Wayback Machine (check existing + Save Page Now) 2. Google Web Cache 3. archive.ph (read-only check for existing snapshots) Adds archiveUrl field to Note model, /api/articles/unlock endpoint, unlock button on note detail page, and browser extension integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4c939bc45e
commit
17f2d49f12
|
|
@ -26,6 +26,12 @@ chrome.runtime.onInstalled.addListener(() => {
|
|||
title: 'Clip selection to rNotes',
|
||||
contexts: ['selection'],
|
||||
});
|
||||
|
||||
chrome.contextMenus.create({
|
||||
id: 'unlock-article',
|
||||
title: 'Unlock & Clip article to rNotes',
|
||||
contexts: ['page', 'link'],
|
||||
});
|
||||
});
|
||||
|
||||
// --- Helpers ---
|
||||
|
|
@ -132,6 +138,31 @@ async function uploadImage(imageUrl) {
|
|||
return response.json();
|
||||
}
|
||||
|
||||
async function unlockArticle(url) {
|
||||
const token = await getToken();
|
||||
if (!token) {
|
||||
showNotification('rNotes Error', 'Not signed in. Open extension settings to sign in.');
|
||||
return null;
|
||||
}
|
||||
|
||||
const settings = await getSettings();
|
||||
const response = await fetch(`${settings.host}/api/articles/unlock`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ url }),
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const text = await response.text();
|
||||
throw new Error(`Unlock failed: ${response.status} ${text}`);
|
||||
}
|
||||
|
||||
return response.json();
|
||||
}
|
||||
|
||||
// --- Context Menu Handler ---
|
||||
|
||||
chrome.contextMenus.onClicked.addListener(async (info, tab) => {
|
||||
|
|
@ -197,6 +228,28 @@ chrome.contextMenus.onClicked.addListener(async (info, tab) => {
|
|||
break;
|
||||
}
|
||||
|
||||
case 'unlock-article': {
|
||||
const targetUrl = info.linkUrl || tab.url;
|
||||
showNotification('Unlocking Article', `Finding readable version of ${new URL(targetUrl).hostname}...`);
|
||||
|
||||
const result = await unlockArticle(targetUrl);
|
||||
if (result && result.success && result.archiveUrl) {
|
||||
// Create a CLIP note with the archive URL
|
||||
await createNote({
|
||||
title: tab.title || 'Unlocked Article',
|
||||
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${targetUrl}">${targetUrl}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
|
||||
type: 'CLIP',
|
||||
url: targetUrl,
|
||||
});
|
||||
showNotification('Article Unlocked', `Readable version found via ${result.strategy}`);
|
||||
// Open the unlocked article in a new tab
|
||||
chrome.tabs.create({ url: result.archiveUrl });
|
||||
} else {
|
||||
showNotification('Unlock Failed', result?.error || 'No archived version found');
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case 'clip-selection': {
|
||||
// Get selection HTML
|
||||
let content = '';
|
||||
|
|
|
|||
|
|
@ -133,6 +133,14 @@
|
|||
color: #e5e5e5;
|
||||
border: 1px solid #404040;
|
||||
}
|
||||
.btn-unlock {
|
||||
background: #172554;
|
||||
color: #93c5fd;
|
||||
border: 1px solid #1e40af;
|
||||
}
|
||||
.btn-unlock svg {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.status {
|
||||
margin: 0 14px 10px;
|
||||
|
|
@ -212,6 +220,16 @@
|
|||
</button>
|
||||
</div>
|
||||
|
||||
<div class="actions">
|
||||
<button class="btn-unlock" id="unlockBtn" disabled>
|
||||
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
|
||||
<path d="M7 11V7a5 5 0 0 1 9.9-1"></path>
|
||||
</svg>
|
||||
Unlock Article
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div id="status" class="status"></div>
|
||||
|
||||
<div class="footer">
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ async function init() {
|
|||
|
||||
// Enable buttons
|
||||
document.getElementById('clipPageBtn').disabled = false;
|
||||
document.getElementById('unlockBtn').disabled = false;
|
||||
|
||||
// Load notebooks
|
||||
await populateNotebooks();
|
||||
|
|
@ -255,6 +256,49 @@ document.getElementById('clipSelectionBtn').addEventListener('click', async () =
|
|||
}
|
||||
});
|
||||
|
||||
document.getElementById('unlockBtn').addEventListener('click', async () => {
|
||||
const btn = document.getElementById('unlockBtn');
|
||||
btn.disabled = true;
|
||||
showStatus('Unlocking article...', 'loading');
|
||||
|
||||
try {
|
||||
const token = await getToken();
|
||||
const settings = await getSettings();
|
||||
|
||||
const response = await fetch(`${settings.host}/api/articles/unlock`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${token}`,
|
||||
},
|
||||
body: JSON.stringify({ url: currentTab.url }),
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
if (result.success && result.archiveUrl) {
|
||||
// Also save as a note
|
||||
await createNote({
|
||||
title: currentTab.title || 'Unlocked Article',
|
||||
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${currentTab.url}">${currentTab.url}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
|
||||
type: 'CLIP',
|
||||
url: currentTab.url,
|
||||
});
|
||||
|
||||
showStatus(`Unlocked via ${result.strategy}! Opening...`, 'success');
|
||||
|
||||
// Open archive in new tab
|
||||
chrome.tabs.create({ url: result.archiveUrl });
|
||||
} else {
|
||||
showStatus(result.error || 'No archived version found', 'error');
|
||||
}
|
||||
} catch (err) {
|
||||
showStatus(`Error: ${err.message}`, 'error');
|
||||
} finally {
|
||||
btn.disabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
document.getElementById('optionsLink').addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
chrome.runtime.openOptionsPage();
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ model Note {
|
|||
contentPlain String? @db.Text
|
||||
type NoteType @default(NOTE)
|
||||
url String?
|
||||
archiveUrl String?
|
||||
language String?
|
||||
mimeType String?
|
||||
fileUrl String?
|
||||
|
|
|
|||
|
|
@ -0,0 +1,61 @@
|
|||
import { NextRequest, NextResponse } from 'next/server';
|
||||
import { prisma } from '@/lib/prisma';
|
||||
import { requireAuth, isAuthed } from '@/lib/auth';
|
||||
import { unlockArticle } from '@/lib/article-unlock';
|
||||
|
||||
/**
|
||||
* POST /api/articles/unlock
|
||||
*
|
||||
* Attempts to find an archived/readable version of a paywalled article.
|
||||
*
|
||||
* Body: { url: string, noteId?: string }
|
||||
* - url: The article URL to unlock
|
||||
* - noteId: (optional) If provided, updates the note's archiveUrl on success
|
||||
*
|
||||
* Returns: { success, strategy, archiveUrl, error? }
|
||||
*/
|
||||
export async function POST(request: NextRequest) {
|
||||
try {
|
||||
const auth = await requireAuth(request);
|
||||
if (!isAuthed(auth)) return auth;
|
||||
|
||||
const body = await request.json();
|
||||
const { url, noteId } = body;
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
return NextResponse.json({ error: 'URL is required' }, { status: 400 });
|
||||
}
|
||||
|
||||
// Validate URL format
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
return NextResponse.json({ error: 'Invalid URL format' }, { status: 400 });
|
||||
}
|
||||
|
||||
const result = await unlockArticle(url);
|
||||
|
||||
// If successful and noteId provided, update the note's archiveUrl
|
||||
if (result.success && result.archiveUrl && noteId) {
|
||||
const existing = await prisma.note.findUnique({
|
||||
where: { id: noteId },
|
||||
select: { authorId: true },
|
||||
});
|
||||
|
||||
if (existing && (!existing.authorId || existing.authorId === auth.user.id)) {
|
||||
await prisma.note.update({
|
||||
where: { id: noteId },
|
||||
data: { archiveUrl: result.archiveUrl },
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return NextResponse.json(result);
|
||||
} catch (error) {
|
||||
console.error('Article unlock error:', error);
|
||||
return NextResponse.json(
|
||||
{ success: false, strategy: 'none', error: 'Internal server error' },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -50,7 +50,7 @@ export async function PUT(
|
|||
}
|
||||
|
||||
const body = await request.json();
|
||||
const { title, content, type, url, language, isPinned, notebookId, tags } = body;
|
||||
const { title, content, type, url, archiveUrl, language, isPinned, notebookId, tags } = body;
|
||||
|
||||
const data: Record<string, unknown> = {};
|
||||
if (title !== undefined) data.title = title.trim();
|
||||
|
|
@ -60,6 +60,7 @@ export async function PUT(
|
|||
}
|
||||
if (type !== undefined) data.type = type;
|
||||
if (url !== undefined) data.url = url || null;
|
||||
if (archiveUrl !== undefined) data.archiveUrl = archiveUrl || null;
|
||||
if (language !== undefined) data.language = language || null;
|
||||
if (isPinned !== undefined) data.isPinned = isPinned;
|
||||
if (notebookId !== undefined) data.notebookId = notebookId || null;
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
|
|||
if (!isAuthed(auth)) return auth;
|
||||
const { user } = auth;
|
||||
const body = await request.json();
|
||||
const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
|
||||
const { title, content, type, notebookId, url, archiveUrl, language, tags, fileUrl, mimeType, fileSize, duration } = body;
|
||||
|
||||
if (!title?.trim()) {
|
||||
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
|
||||
|
|
@ -75,6 +75,7 @@ export async function POST(request: NextRequest) {
|
|||
notebookId: notebookId || null,
|
||||
authorId: user.id,
|
||||
url: url || null,
|
||||
archiveUrl: archiveUrl || null,
|
||||
language: language || null,
|
||||
fileUrl: fileUrl || null,
|
||||
mimeType: mimeType || null,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ interface NoteData {
|
|||
contentPlain: string | null;
|
||||
type: string;
|
||||
url: string | null;
|
||||
archiveUrl: string | null;
|
||||
language: string | null;
|
||||
fileUrl: string | null;
|
||||
mimeType: string | null;
|
||||
|
|
@ -49,6 +50,8 @@ export default function NoteDetailPage() {
|
|||
const [saving, setSaving] = useState(false);
|
||||
const [diarizing, setDiarizing] = useState(false);
|
||||
const [speakers, setSpeakers] = useState<{ speaker: string; start: number; end: number }[] | null>(null);
|
||||
const [unlocking, setUnlocking] = useState(false);
|
||||
const [unlockError, setUnlockError] = useState<string | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
fetch(`/api/notes/${params.id}`)
|
||||
|
|
@ -135,6 +138,30 @@ export default function NoteDetailPage() {
|
|||
}
|
||||
};
|
||||
|
||||
const handleUnlock = async () => {
|
||||
if (!note?.url || unlocking) return;
|
||||
setUnlocking(true);
|
||||
setUnlockError(null);
|
||||
try {
|
||||
const res = await authFetch('/api/articles/unlock', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ url: note.url, noteId: note.id }),
|
||||
});
|
||||
const result = await res.json();
|
||||
if (result.success && result.archiveUrl) {
|
||||
setNote({ ...note, archiveUrl: result.archiveUrl });
|
||||
} else {
|
||||
setUnlockError(result.error || 'No archived version found');
|
||||
}
|
||||
} catch (error) {
|
||||
setUnlockError('Failed to unlock article');
|
||||
console.error('Unlock error:', error);
|
||||
} finally {
|
||||
setUnlocking(false);
|
||||
}
|
||||
};
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
<div className="min-h-screen bg-[#0a0a0a] flex items-center justify-center">
|
||||
|
|
@ -241,16 +268,63 @@ export default function NoteDetailPage() {
|
|||
</span>
|
||||
</div>
|
||||
|
||||
{/* URL */}
|
||||
{/* URL + Unlock */}
|
||||
{note.url && (
|
||||
<a
|
||||
href={note.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-blue-400 hover:text-blue-300 mb-4 block truncate"
|
||||
>
|
||||
{note.url}
|
||||
</a>
|
||||
<div className="mb-4 space-y-2">
|
||||
<a
|
||||
href={note.url}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="text-sm text-blue-400 hover:text-blue-300 block truncate"
|
||||
>
|
||||
{note.url}
|
||||
</a>
|
||||
|
||||
{note.archiveUrl ? (
|
||||
<div className="flex items-center gap-2">
|
||||
<a
|
||||
href={note.archiveUrl}
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-emerald-500/10 text-emerald-400 border border-emerald-500/20 rounded-lg hover:bg-emerald-500/20 transition-colors"
|
||||
>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 11V7a4 4 0 118 0m-4 8v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2z" />
|
||||
</svg>
|
||||
View Unlocked Article
|
||||
</a>
|
||||
<span className="text-[10px] text-slate-500 truncate max-w-[200px]">{note.archiveUrl}</span>
|
||||
</div>
|
||||
) : (
|
||||
<div className="flex items-center gap-2">
|
||||
<button
|
||||
onClick={handleUnlock}
|
||||
disabled={unlocking}
|
||||
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-amber-500/10 text-amber-400 border border-amber-500/20 rounded-lg hover:bg-amber-500/20 transition-colors disabled:opacity-50"
|
||||
>
|
||||
{unlocking ? (
|
||||
<>
|
||||
<svg className="animate-spin w-3.5 h-3.5" viewBox="0 0 24 24">
|
||||
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
|
||||
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||
</svg>
|
||||
Unlocking...
|
||||
</>
|
||||
) : (
|
||||
<>
|
||||
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 15v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2zm10-10V7a4 4 0 00-8 0v4h8z" />
|
||||
</svg>
|
||||
Unlock Article
|
||||
</>
|
||||
)}
|
||||
</button>
|
||||
{unlockError && (
|
||||
<span className="text-[10px] text-red-400">{unlockError}</span>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Uploaded file/image */}
|
||||
|
|
|
|||
|
|
@ -22,9 +22,10 @@ interface NoteCardProps {
|
|||
updatedAt: string;
|
||||
tags: { id: string; name: string; color: string | null }[];
|
||||
url?: string | null;
|
||||
archiveUrl?: string | null;
|
||||
}
|
||||
|
||||
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url }: NoteCardProps) {
|
||||
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url, archiveUrl }: NoteCardProps) {
|
||||
const snippet = (contentPlain || '').slice(0, 120);
|
||||
|
||||
return (
|
||||
|
|
@ -41,6 +42,11 @@ export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, t
|
|||
★
|
||||
</span>
|
||||
)}
|
||||
{archiveUrl && (
|
||||
<span className="text-emerald-400 text-[10px] font-bold uppercase px-1 py-0.5 rounded bg-emerald-500/10" title="Unlocked article">
|
||||
unlocked
|
||||
</span>
|
||||
)}
|
||||
<span className="text-[10px] text-slate-500 ml-auto">
|
||||
{new Date(updatedAt).toLocaleDateString()}
|
||||
</span>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,232 @@
|
|||
/**
|
||||
* Article Unlock — multi-strategy approach to get readable versions of
|
||||
* paywalled or permissioned articles.
|
||||
*
|
||||
* Strategies (tried in order):
|
||||
* 1. Wayback Machine — check for existing snapshot, or request a new one
|
||||
* 2. Google Web Cache — fast lookup, often has full text
|
||||
* 3. archive.ph — check for existing snapshots (read-only, no submission)
|
||||
*/
|
||||
|
||||
export interface UnlockResult {
|
||||
success: boolean;
|
||||
strategy: string;
|
||||
archiveUrl?: string;
|
||||
content?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
const BROWSER_UA =
|
||||
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Strategy 1: Internet Archive Wayback Machine
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function tryWaybackMachine(url: string): Promise<UnlockResult | null> {
|
||||
// First check if a snapshot already exists
|
||||
try {
|
||||
const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`;
|
||||
const res = await fetch(checkUrl, {
|
||||
headers: { 'User-Agent': BROWSER_UA },
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
const snapshot = data?.archived_snapshots?.closest;
|
||||
if (snapshot?.available && snapshot?.url) {
|
||||
return {
|
||||
success: true,
|
||||
strategy: 'wayback',
|
||||
archiveUrl: snapshot.url.replace('http://', 'https://'),
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// availability check failed, try Save Page Now
|
||||
}
|
||||
|
||||
// No existing snapshot — request one via Save Page Now (SPN)
|
||||
try {
|
||||
const saveRes = await fetch('https://web.archive.org/save', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': BROWSER_UA,
|
||||
Accept: 'application/json',
|
||||
},
|
||||
body: `url=${encodeURIComponent(url)}&capture_all=1`,
|
||||
signal: AbortSignal.timeout(30000),
|
||||
});
|
||||
|
||||
if (saveRes.ok) {
|
||||
const data = await saveRes.json();
|
||||
// SPN returns a job_id — we can construct the URL
|
||||
if (data.url) {
|
||||
return {
|
||||
success: true,
|
||||
strategy: 'wayback-save',
|
||||
archiveUrl: data.url,
|
||||
};
|
||||
}
|
||||
if (data.job_id) {
|
||||
// Poll for completion (up to 30s)
|
||||
const archiveUrl = await pollWaybackJob(data.job_id);
|
||||
if (archiveUrl) {
|
||||
return { success: true, strategy: 'wayback-save', archiveUrl };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Sometimes SPN redirects to the archived page directly
|
||||
if (saveRes.status === 302 || saveRes.status === 301) {
|
||||
const location = saveRes.headers.get('location');
|
||||
if (location) {
|
||||
return { success: true, strategy: 'wayback-save', archiveUrl: location };
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// SPN failed
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async function pollWaybackJob(jobId: string): Promise<string | null> {
|
||||
for (let i = 0; i < 6; i++) {
|
||||
await new Promise((r) => setTimeout(r, 5000));
|
||||
try {
|
||||
const res = await fetch(`https://web.archive.org/save/status/${jobId}`, {
|
||||
headers: { Accept: 'application/json', 'User-Agent': BROWSER_UA },
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
if (res.ok) {
|
||||
const data = await res.json();
|
||||
if (data.status === 'success' && data.original_url && data.timestamp) {
|
||||
return `https://web.archive.org/web/${data.timestamp}/${data.original_url}`;
|
||||
}
|
||||
if (data.status === 'error') return null;
|
||||
}
|
||||
} catch {
|
||||
// keep polling
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Strategy 2: Google Web Cache
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function tryGoogleCache(url: string): Promise<UnlockResult | null> {
|
||||
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
|
||||
try {
|
||||
const res = await fetch(cacheUrl, {
|
||||
headers: { 'User-Agent': BROWSER_UA },
|
||||
redirect: 'follow',
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
if (res.ok) {
|
||||
// Google cache returns the full page — verify it's not an error page
|
||||
const text = await res.text();
|
||||
if (text.length > 1000 && !text.includes('did not match any documents')) {
|
||||
return {
|
||||
success: true,
|
||||
strategy: 'google-cache',
|
||||
archiveUrl: cacheUrl,
|
||||
content: text,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// Google cache not available
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Strategy 3: archive.ph (read-only — check for existing snapshots)
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function tryArchivePh(url: string): Promise<UnlockResult | null> {
|
||||
// Only check if an archive already exists — do NOT submit new pages
|
||||
// (archive.ph has no API and aggressive anti-bot + security concerns)
|
||||
const checkUrl = `https://archive.ph/newest/${encodeURIComponent(url)}`;
|
||||
try {
|
||||
const res = await fetch(checkUrl, {
|
||||
headers: { 'User-Agent': BROWSER_UA },
|
||||
redirect: 'manual', // archive.ph redirects to the snapshot
|
||||
signal: AbortSignal.timeout(10000),
|
||||
});
|
||||
|
||||
// A 302 redirect means a snapshot exists
|
||||
if (res.status === 302 || res.status === 301) {
|
||||
const location = res.headers.get('location');
|
||||
if (location && location.includes('archive.ph/') && !location.includes('/submit')) {
|
||||
return {
|
||||
success: true,
|
||||
strategy: 'archive-ph',
|
||||
archiveUrl: location,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// A 200 with content also means it found one
|
||||
if (res.ok) {
|
||||
const finalUrl = res.url;
|
||||
if (finalUrl && finalUrl !== checkUrl && finalUrl.includes('archive.ph/')) {
|
||||
return {
|
||||
success: true,
|
||||
strategy: 'archive-ph',
|
||||
archiveUrl: finalUrl,
|
||||
};
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
// archive.ph not reachable
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Main unlock function
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
export async function unlockArticle(url: string): Promise<UnlockResult> {
|
||||
// Validate URL
|
||||
try {
|
||||
new URL(url);
|
||||
} catch {
|
||||
return { success: false, strategy: 'none', error: 'Invalid URL' };
|
||||
}
|
||||
|
||||
// Try strategies in order of reliability
|
||||
const strategies = [
|
||||
{ name: 'Wayback Machine', fn: tryWaybackMachine },
|
||||
{ name: 'Google Cache', fn: tryGoogleCache },
|
||||
{ name: 'archive.ph', fn: tryArchivePh },
|
||||
];
|
||||
|
||||
const errors: string[] = [];
|
||||
|
||||
for (const { name, fn } of strategies) {
|
||||
try {
|
||||
const result = await fn(url);
|
||||
if (result?.success) {
|
||||
return result;
|
||||
}
|
||||
} catch (err) {
|
||||
errors.push(`${name}: ${err instanceof Error ? err.message : 'unknown error'}`);
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
success: false,
|
||||
strategy: 'none',
|
||||
error: `No archived version found. Tried: ${strategies.map((s) => s.name).join(', ')}`,
|
||||
};
|
||||
}
|
||||
Loading…
Reference in New Issue