feat: add article unlock feature for paywalled content
Multi-strategy approach to find readable versions of paywalled articles: 1. Wayback Machine (check existing + Save Page Now) 2. Google Web Cache 3. archive.ph (read-only check for existing snapshots) Adds archiveUrl field to Note model, /api/articles/unlock endpoint, unlock button on note detail page, and browser extension integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
4c939bc45e
commit
17f2d49f12
|
|
@ -26,6 +26,12 @@ chrome.runtime.onInstalled.addListener(() => {
|
||||||
title: 'Clip selection to rNotes',
|
title: 'Clip selection to rNotes',
|
||||||
contexts: ['selection'],
|
contexts: ['selection'],
|
||||||
});
|
});
|
||||||
|
|
||||||
|
chrome.contextMenus.create({
|
||||||
|
id: 'unlock-article',
|
||||||
|
title: 'Unlock & Clip article to rNotes',
|
||||||
|
contexts: ['page', 'link'],
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// --- Helpers ---
|
// --- Helpers ---
|
||||||
|
|
@ -132,6 +138,31 @@ async function uploadImage(imageUrl) {
|
||||||
return response.json();
|
return response.json();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function unlockArticle(url) {
|
||||||
|
const token = await getToken();
|
||||||
|
if (!token) {
|
||||||
|
showNotification('rNotes Error', 'Not signed in. Open extension settings to sign in.');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const settings = await getSettings();
|
||||||
|
const response = await fetch(`${settings.host}/api/articles/unlock`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${token}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ url }),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
const text = await response.text();
|
||||||
|
throw new Error(`Unlock failed: ${response.status} ${text}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
// --- Context Menu Handler ---
|
// --- Context Menu Handler ---
|
||||||
|
|
||||||
chrome.contextMenus.onClicked.addListener(async (info, tab) => {
|
chrome.contextMenus.onClicked.addListener(async (info, tab) => {
|
||||||
|
|
@ -197,6 +228,28 @@ chrome.contextMenus.onClicked.addListener(async (info, tab) => {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case 'unlock-article': {
|
||||||
|
const targetUrl = info.linkUrl || tab.url;
|
||||||
|
showNotification('Unlocking Article', `Finding readable version of ${new URL(targetUrl).hostname}...`);
|
||||||
|
|
||||||
|
const result = await unlockArticle(targetUrl);
|
||||||
|
if (result && result.success && result.archiveUrl) {
|
||||||
|
// Create a CLIP note with the archive URL
|
||||||
|
await createNote({
|
||||||
|
title: tab.title || 'Unlocked Article',
|
||||||
|
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${targetUrl}">${targetUrl}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
|
||||||
|
type: 'CLIP',
|
||||||
|
url: targetUrl,
|
||||||
|
});
|
||||||
|
showNotification('Article Unlocked', `Readable version found via ${result.strategy}`);
|
||||||
|
// Open the unlocked article in a new tab
|
||||||
|
chrome.tabs.create({ url: result.archiveUrl });
|
||||||
|
} else {
|
||||||
|
showNotification('Unlock Failed', result?.error || 'No archived version found');
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case 'clip-selection': {
|
case 'clip-selection': {
|
||||||
// Get selection HTML
|
// Get selection HTML
|
||||||
let content = '';
|
let content = '';
|
||||||
|
|
|
||||||
|
|
@ -133,6 +133,14 @@
|
||||||
color: #e5e5e5;
|
color: #e5e5e5;
|
||||||
border: 1px solid #404040;
|
border: 1px solid #404040;
|
||||||
}
|
}
|
||||||
|
.btn-unlock {
|
||||||
|
background: #172554;
|
||||||
|
color: #93c5fd;
|
||||||
|
border: 1px solid #1e40af;
|
||||||
|
}
|
||||||
|
.btn-unlock svg {
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
.status {
|
.status {
|
||||||
margin: 0 14px 10px;
|
margin: 0 14px 10px;
|
||||||
|
|
@ -212,6 +220,16 @@
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="actions">
|
||||||
|
<button class="btn-unlock" id="unlockBtn" disabled>
|
||||||
|
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
||||||
|
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
|
||||||
|
<path d="M7 11V7a5 5 0 0 1 9.9-1"></path>
|
||||||
|
</svg>
|
||||||
|
Unlock Article
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
<div id="status" class="status"></div>
|
<div id="status" class="status"></div>
|
||||||
|
|
||||||
<div class="footer">
|
<div class="footer">
|
||||||
|
|
|
||||||
|
|
@ -152,6 +152,7 @@ async function init() {
|
||||||
|
|
||||||
// Enable buttons
|
// Enable buttons
|
||||||
document.getElementById('clipPageBtn').disabled = false;
|
document.getElementById('clipPageBtn').disabled = false;
|
||||||
|
document.getElementById('unlockBtn').disabled = false;
|
||||||
|
|
||||||
// Load notebooks
|
// Load notebooks
|
||||||
await populateNotebooks();
|
await populateNotebooks();
|
||||||
|
|
@ -255,6 +256,49 @@ document.getElementById('clipSelectionBtn').addEventListener('click', async () =
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
document.getElementById('unlockBtn').addEventListener('click', async () => {
|
||||||
|
const btn = document.getElementById('unlockBtn');
|
||||||
|
btn.disabled = true;
|
||||||
|
showStatus('Unlocking article...', 'loading');
|
||||||
|
|
||||||
|
try {
|
||||||
|
const token = await getToken();
|
||||||
|
const settings = await getSettings();
|
||||||
|
|
||||||
|
const response = await fetch(`${settings.host}/api/articles/unlock`, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Authorization': `Bearer ${token}`,
|
||||||
|
},
|
||||||
|
body: JSON.stringify({ url: currentTab.url }),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await response.json();
|
||||||
|
|
||||||
|
if (result.success && result.archiveUrl) {
|
||||||
|
// Also save as a note
|
||||||
|
await createNote({
|
||||||
|
title: currentTab.title || 'Unlocked Article',
|
||||||
|
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${currentTab.url}">${currentTab.url}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
|
||||||
|
type: 'CLIP',
|
||||||
|
url: currentTab.url,
|
||||||
|
});
|
||||||
|
|
||||||
|
showStatus(`Unlocked via ${result.strategy}! Opening...`, 'success');
|
||||||
|
|
||||||
|
// Open archive in new tab
|
||||||
|
chrome.tabs.create({ url: result.archiveUrl });
|
||||||
|
} else {
|
||||||
|
showStatus(result.error || 'No archived version found', 'error');
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
showStatus(`Error: ${err.message}`, 'error');
|
||||||
|
} finally {
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
document.getElementById('optionsLink').addEventListener('click', (e) => {
|
document.getElementById('optionsLink').addEventListener('click', (e) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
chrome.runtime.openOptionsPage();
|
chrome.runtime.openOptionsPage();
|
||||||
|
|
|
||||||
|
|
@ -75,6 +75,7 @@ model Note {
|
||||||
contentPlain String? @db.Text
|
contentPlain String? @db.Text
|
||||||
type NoteType @default(NOTE)
|
type NoteType @default(NOTE)
|
||||||
url String?
|
url String?
|
||||||
|
archiveUrl String?
|
||||||
language String?
|
language String?
|
||||||
mimeType String?
|
mimeType String?
|
||||||
fileUrl String?
|
fileUrl String?
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,61 @@
|
||||||
|
import { NextRequest, NextResponse } from 'next/server';
|
||||||
|
import { prisma } from '@/lib/prisma';
|
||||||
|
import { requireAuth, isAuthed } from '@/lib/auth';
|
||||||
|
import { unlockArticle } from '@/lib/article-unlock';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/articles/unlock
|
||||||
|
*
|
||||||
|
* Attempts to find an archived/readable version of a paywalled article.
|
||||||
|
*
|
||||||
|
* Body: { url: string, noteId?: string }
|
||||||
|
* - url: The article URL to unlock
|
||||||
|
* - noteId: (optional) If provided, updates the note's archiveUrl on success
|
||||||
|
*
|
||||||
|
* Returns: { success, strategy, archiveUrl, error? }
|
||||||
|
*/
|
||||||
|
export async function POST(request: NextRequest) {
|
||||||
|
try {
|
||||||
|
const auth = await requireAuth(request);
|
||||||
|
if (!isAuthed(auth)) return auth;
|
||||||
|
|
||||||
|
const body = await request.json();
|
||||||
|
const { url, noteId } = body;
|
||||||
|
|
||||||
|
if (!url || typeof url !== 'string') {
|
||||||
|
return NextResponse.json({ error: 'URL is required' }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL format
|
||||||
|
try {
|
||||||
|
new URL(url);
|
||||||
|
} catch {
|
||||||
|
return NextResponse.json({ error: 'Invalid URL format' }, { status: 400 });
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await unlockArticle(url);
|
||||||
|
|
||||||
|
// If successful and noteId provided, update the note's archiveUrl
|
||||||
|
if (result.success && result.archiveUrl && noteId) {
|
||||||
|
const existing = await prisma.note.findUnique({
|
||||||
|
where: { id: noteId },
|
||||||
|
select: { authorId: true },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (existing && (!existing.authorId || existing.authorId === auth.user.id)) {
|
||||||
|
await prisma.note.update({
|
||||||
|
where: { id: noteId },
|
||||||
|
data: { archiveUrl: result.archiveUrl },
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NextResponse.json(result);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Article unlock error:', error);
|
||||||
|
return NextResponse.json(
|
||||||
|
{ success: false, strategy: 'none', error: 'Internal server error' },
|
||||||
|
{ status: 500 }
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -50,7 +50,7 @@ export async function PUT(
|
||||||
}
|
}
|
||||||
|
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { title, content, type, url, language, isPinned, notebookId, tags } = body;
|
const { title, content, type, url, archiveUrl, language, isPinned, notebookId, tags } = body;
|
||||||
|
|
||||||
const data: Record<string, unknown> = {};
|
const data: Record<string, unknown> = {};
|
||||||
if (title !== undefined) data.title = title.trim();
|
if (title !== undefined) data.title = title.trim();
|
||||||
|
|
@ -60,6 +60,7 @@ export async function PUT(
|
||||||
}
|
}
|
||||||
if (type !== undefined) data.type = type;
|
if (type !== undefined) data.type = type;
|
||||||
if (url !== undefined) data.url = url || null;
|
if (url !== undefined) data.url = url || null;
|
||||||
|
if (archiveUrl !== undefined) data.archiveUrl = archiveUrl || null;
|
||||||
if (language !== undefined) data.language = language || null;
|
if (language !== undefined) data.language = language || null;
|
||||||
if (isPinned !== undefined) data.isPinned = isPinned;
|
if (isPinned !== undefined) data.isPinned = isPinned;
|
||||||
if (notebookId !== undefined) data.notebookId = notebookId || null;
|
if (notebookId !== undefined) data.notebookId = notebookId || null;
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
|
||||||
if (!isAuthed(auth)) return auth;
|
if (!isAuthed(auth)) return auth;
|
||||||
const { user } = auth;
|
const { user } = auth;
|
||||||
const body = await request.json();
|
const body = await request.json();
|
||||||
const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
|
const { title, content, type, notebookId, url, archiveUrl, language, tags, fileUrl, mimeType, fileSize, duration } = body;
|
||||||
|
|
||||||
if (!title?.trim()) {
|
if (!title?.trim()) {
|
||||||
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
|
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
|
||||||
|
|
@ -75,6 +75,7 @@ export async function POST(request: NextRequest) {
|
||||||
notebookId: notebookId || null,
|
notebookId: notebookId || null,
|
||||||
authorId: user.id,
|
authorId: user.id,
|
||||||
url: url || null,
|
url: url || null,
|
||||||
|
archiveUrl: archiveUrl || null,
|
||||||
language: language || null,
|
language: language || null,
|
||||||
fileUrl: fileUrl || null,
|
fileUrl: fileUrl || null,
|
||||||
mimeType: mimeType || null,
|
mimeType: mimeType || null,
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ interface NoteData {
|
||||||
contentPlain: string | null;
|
contentPlain: string | null;
|
||||||
type: string;
|
type: string;
|
||||||
url: string | null;
|
url: string | null;
|
||||||
|
archiveUrl: string | null;
|
||||||
language: string | null;
|
language: string | null;
|
||||||
fileUrl: string | null;
|
fileUrl: string | null;
|
||||||
mimeType: string | null;
|
mimeType: string | null;
|
||||||
|
|
@ -49,6 +50,8 @@ export default function NoteDetailPage() {
|
||||||
const [saving, setSaving] = useState(false);
|
const [saving, setSaving] = useState(false);
|
||||||
const [diarizing, setDiarizing] = useState(false);
|
const [diarizing, setDiarizing] = useState(false);
|
||||||
const [speakers, setSpeakers] = useState<{ speaker: string; start: number; end: number }[] | null>(null);
|
const [speakers, setSpeakers] = useState<{ speaker: string; start: number; end: number }[] | null>(null);
|
||||||
|
const [unlocking, setUnlocking] = useState(false);
|
||||||
|
const [unlockError, setUnlockError] = useState<string | null>(null);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
fetch(`/api/notes/${params.id}`)
|
fetch(`/api/notes/${params.id}`)
|
||||||
|
|
@ -135,6 +138,30 @@ export default function NoteDetailPage() {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const handleUnlock = async () => {
|
||||||
|
if (!note?.url || unlocking) return;
|
||||||
|
setUnlocking(true);
|
||||||
|
setUnlockError(null);
|
||||||
|
try {
|
||||||
|
const res = await authFetch('/api/articles/unlock', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: { 'Content-Type': 'application/json' },
|
||||||
|
body: JSON.stringify({ url: note.url, noteId: note.id }),
|
||||||
|
});
|
||||||
|
const result = await res.json();
|
||||||
|
if (result.success && result.archiveUrl) {
|
||||||
|
setNote({ ...note, archiveUrl: result.archiveUrl });
|
||||||
|
} else {
|
||||||
|
setUnlockError(result.error || 'No archived version found');
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
setUnlockError('Failed to unlock article');
|
||||||
|
console.error('Unlock error:', error);
|
||||||
|
} finally {
|
||||||
|
setUnlocking(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
if (loading) {
|
if (loading) {
|
||||||
return (
|
return (
|
||||||
<div className="min-h-screen bg-[#0a0a0a] flex items-center justify-center">
|
<div className="min-h-screen bg-[#0a0a0a] flex items-center justify-center">
|
||||||
|
|
@ -241,16 +268,63 @@ export default function NoteDetailPage() {
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{/* URL */}
|
{/* URL + Unlock */}
|
||||||
{note.url && (
|
{note.url && (
|
||||||
<a
|
<div className="mb-4 space-y-2">
|
||||||
href={note.url}
|
<a
|
||||||
target="_blank"
|
href={note.url}
|
||||||
rel="noopener noreferrer"
|
target="_blank"
|
||||||
className="text-sm text-blue-400 hover:text-blue-300 mb-4 block truncate"
|
rel="noopener noreferrer"
|
||||||
>
|
className="text-sm text-blue-400 hover:text-blue-300 block truncate"
|
||||||
{note.url}
|
>
|
||||||
</a>
|
{note.url}
|
||||||
|
</a>
|
||||||
|
|
||||||
|
{note.archiveUrl ? (
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<a
|
||||||
|
href={note.archiveUrl}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-emerald-500/10 text-emerald-400 border border-emerald-500/20 rounded-lg hover:bg-emerald-500/20 transition-colors"
|
||||||
|
>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 11V7a4 4 0 118 0m-4 8v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2z" />
|
||||||
|
</svg>
|
||||||
|
View Unlocked Article
|
||||||
|
</a>
|
||||||
|
<span className="text-[10px] text-slate-500 truncate max-w-[200px]">{note.archiveUrl}</span>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="flex items-center gap-2">
|
||||||
|
<button
|
||||||
|
onClick={handleUnlock}
|
||||||
|
disabled={unlocking}
|
||||||
|
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-amber-500/10 text-amber-400 border border-amber-500/20 rounded-lg hover:bg-amber-500/20 transition-colors disabled:opacity-50"
|
||||||
|
>
|
||||||
|
{unlocking ? (
|
||||||
|
<>
|
||||||
|
<svg className="animate-spin w-3.5 h-3.5" viewBox="0 0 24 24">
|
||||||
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
|
||||||
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
|
||||||
|
</svg>
|
||||||
|
Unlocking...
|
||||||
|
</>
|
||||||
|
) : (
|
||||||
|
<>
|
||||||
|
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
|
||||||
|
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 15v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2zm10-10V7a4 4 0 00-8 0v4h8z" />
|
||||||
|
</svg>
|
||||||
|
Unlock Article
|
||||||
|
</>
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
{unlockError && (
|
||||||
|
<span className="text-[10px] text-red-400">{unlockError}</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|
||||||
{/* Uploaded file/image */}
|
{/* Uploaded file/image */}
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,10 @@ interface NoteCardProps {
|
||||||
updatedAt: string;
|
updatedAt: string;
|
||||||
tags: { id: string; name: string; color: string | null }[];
|
tags: { id: string; name: string; color: string | null }[];
|
||||||
url?: string | null;
|
url?: string | null;
|
||||||
|
archiveUrl?: string | null;
|
||||||
}
|
}
|
||||||
|
|
||||||
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url }: NoteCardProps) {
|
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url, archiveUrl }: NoteCardProps) {
|
||||||
const snippet = (contentPlain || '').slice(0, 120);
|
const snippet = (contentPlain || '').slice(0, 120);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
@ -41,6 +42,11 @@ export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, t
|
||||||
★
|
★
|
||||||
</span>
|
</span>
|
||||||
)}
|
)}
|
||||||
|
{archiveUrl && (
|
||||||
|
<span className="text-emerald-400 text-[10px] font-bold uppercase px-1 py-0.5 rounded bg-emerald-500/10" title="Unlocked article">
|
||||||
|
unlocked
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
<span className="text-[10px] text-slate-500 ml-auto">
|
<span className="text-[10px] text-slate-500 ml-auto">
|
||||||
{new Date(updatedAt).toLocaleDateString()}
|
{new Date(updatedAt).toLocaleDateString()}
|
||||||
</span>
|
</span>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,232 @@
|
||||||
|
/**
|
||||||
|
* Article Unlock — multi-strategy approach to get readable versions of
|
||||||
|
* paywalled or permissioned articles.
|
||||||
|
*
|
||||||
|
* Strategies (tried in order):
|
||||||
|
* 1. Wayback Machine — check for existing snapshot, or request a new one
|
||||||
|
* 2. Google Web Cache — fast lookup, often has full text
|
||||||
|
* 3. archive.ph — check for existing snapshots (read-only, no submission)
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface UnlockResult {
|
||||||
|
success: boolean;
|
||||||
|
strategy: string;
|
||||||
|
archiveUrl?: string;
|
||||||
|
content?: string;
|
||||||
|
error?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const BROWSER_UA =
|
||||||
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Strategy 1: Internet Archive Wayback Machine
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async function tryWaybackMachine(url: string): Promise<UnlockResult | null> {
|
||||||
|
// First check if a snapshot already exists
|
||||||
|
try {
|
||||||
|
const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`;
|
||||||
|
const res = await fetch(checkUrl, {
|
||||||
|
headers: { 'User-Agent': BROWSER_UA },
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json();
|
||||||
|
const snapshot = data?.archived_snapshots?.closest;
|
||||||
|
if (snapshot?.available && snapshot?.url) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
strategy: 'wayback',
|
||||||
|
archiveUrl: snapshot.url.replace('http://', 'https://'),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// availability check failed, try Save Page Now
|
||||||
|
}
|
||||||
|
|
||||||
|
// No existing snapshot — request one via Save Page Now (SPN)
|
||||||
|
try {
|
||||||
|
const saveRes = await fetch('https://web.archive.org/save', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/x-www-form-urlencoded',
|
||||||
|
'User-Agent': BROWSER_UA,
|
||||||
|
Accept: 'application/json',
|
||||||
|
},
|
||||||
|
body: `url=${encodeURIComponent(url)}&capture_all=1`,
|
||||||
|
signal: AbortSignal.timeout(30000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (saveRes.ok) {
|
||||||
|
const data = await saveRes.json();
|
||||||
|
// SPN returns a job_id — we can construct the URL
|
||||||
|
if (data.url) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
strategy: 'wayback-save',
|
||||||
|
archiveUrl: data.url,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
if (data.job_id) {
|
||||||
|
// Poll for completion (up to 30s)
|
||||||
|
const archiveUrl = await pollWaybackJob(data.job_id);
|
||||||
|
if (archiveUrl) {
|
||||||
|
return { success: true, strategy: 'wayback-save', archiveUrl };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sometimes SPN redirects to the archived page directly
|
||||||
|
if (saveRes.status === 302 || saveRes.status === 301) {
|
||||||
|
const location = saveRes.headers.get('location');
|
||||||
|
if (location) {
|
||||||
|
return { success: true, strategy: 'wayback-save', archiveUrl: location };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// SPN failed
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function pollWaybackJob(jobId: string): Promise<string | null> {
|
||||||
|
for (let i = 0; i < 6; i++) {
|
||||||
|
await new Promise((r) => setTimeout(r, 5000));
|
||||||
|
try {
|
||||||
|
const res = await fetch(`https://web.archive.org/save/status/${jobId}`, {
|
||||||
|
headers: { Accept: 'application/json', 'User-Agent': BROWSER_UA },
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
if (res.ok) {
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.status === 'success' && data.original_url && data.timestamp) {
|
||||||
|
return `https://web.archive.org/web/${data.timestamp}/${data.original_url}`;
|
||||||
|
}
|
||||||
|
if (data.status === 'error') return null;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// keep polling
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Strategy 2: Google Web Cache
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async function tryGoogleCache(url: string): Promise<UnlockResult | null> {
|
||||||
|
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
|
||||||
|
try {
|
||||||
|
const res = await fetch(cacheUrl, {
|
||||||
|
headers: { 'User-Agent': BROWSER_UA },
|
||||||
|
redirect: 'follow',
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (res.ok) {
|
||||||
|
// Google cache returns the full page — verify it's not an error page
|
||||||
|
const text = await res.text();
|
||||||
|
if (text.length > 1000 && !text.includes('did not match any documents')) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
strategy: 'google-cache',
|
||||||
|
archiveUrl: cacheUrl,
|
||||||
|
content: text,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Google cache not available
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Strategy 3: archive.ph (read-only — check for existing snapshots)
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
async function tryArchivePh(url: string): Promise<UnlockResult | null> {
|
||||||
|
// Only check if an archive already exists — do NOT submit new pages
|
||||||
|
// (archive.ph has no API and aggressive anti-bot + security concerns)
|
||||||
|
const checkUrl = `https://archive.ph/newest/${encodeURIComponent(url)}`;
|
||||||
|
try {
|
||||||
|
const res = await fetch(checkUrl, {
|
||||||
|
headers: { 'User-Agent': BROWSER_UA },
|
||||||
|
redirect: 'manual', // archive.ph redirects to the snapshot
|
||||||
|
signal: AbortSignal.timeout(10000),
|
||||||
|
});
|
||||||
|
|
||||||
|
// A 302 redirect means a snapshot exists
|
||||||
|
if (res.status === 302 || res.status === 301) {
|
||||||
|
const location = res.headers.get('location');
|
||||||
|
if (location && location.includes('archive.ph/') && !location.includes('/submit')) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
strategy: 'archive-ph',
|
||||||
|
archiveUrl: location,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// A 200 with content also means it found one
|
||||||
|
if (res.ok) {
|
||||||
|
const finalUrl = res.url;
|
||||||
|
if (finalUrl && finalUrl !== checkUrl && finalUrl.includes('archive.ph/')) {
|
||||||
|
return {
|
||||||
|
success: true,
|
||||||
|
strategy: 'archive-ph',
|
||||||
|
archiveUrl: finalUrl,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// archive.ph not reachable
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Main unlock function
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
export async function unlockArticle(url: string): Promise<UnlockResult> {
|
||||||
|
// Validate URL
|
||||||
|
try {
|
||||||
|
new URL(url);
|
||||||
|
} catch {
|
||||||
|
return { success: false, strategy: 'none', error: 'Invalid URL' };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try strategies in order of reliability
|
||||||
|
const strategies = [
|
||||||
|
{ name: 'Wayback Machine', fn: tryWaybackMachine },
|
||||||
|
{ name: 'Google Cache', fn: tryGoogleCache },
|
||||||
|
{ name: 'archive.ph', fn: tryArchivePh },
|
||||||
|
];
|
||||||
|
|
||||||
|
const errors: string[] = [];
|
||||||
|
|
||||||
|
for (const { name, fn } of strategies) {
|
||||||
|
try {
|
||||||
|
const result = await fn(url);
|
||||||
|
if (result?.success) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
errors.push(`${name}: ${err instanceof Error ? err.message : 'unknown error'}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return {
|
||||||
|
success: false,
|
||||||
|
strategy: 'none',
|
||||||
|
error: `No archived version found. Tried: ${strategies.map((s) => s.name).join(', ')}`,
|
||||||
|
};
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue