feat: add article unlock feature for paywalled content

Multi-strategy approach to find readable versions of paywalled articles:
1. Wayback Machine (check existing + Save Page Now)
2. Google Web Cache
3. archive.ph (read-only check for existing snapshots)

Adds archiveUrl field to Note model, /api/articles/unlock endpoint,
unlock button on note detail page, and browser extension integration.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-02-24 15:32:37 -08:00
parent 4c939bc45e
commit 17f2d49f12
10 changed files with 503 additions and 12 deletions

View File

@ -26,6 +26,12 @@ chrome.runtime.onInstalled.addListener(() => {
title: 'Clip selection to rNotes',
contexts: ['selection'],
});
chrome.contextMenus.create({
id: 'unlock-article',
title: 'Unlock & Clip article to rNotes',
contexts: ['page', 'link'],
});
});
// --- Helpers ---
@ -132,6 +138,31 @@ async function uploadImage(imageUrl) {
return response.json();
}
async function unlockArticle(url) {
const token = await getToken();
if (!token) {
showNotification('rNotes Error', 'Not signed in. Open extension settings to sign in.');
return null;
}
const settings = await getSettings();
const response = await fetch(`${settings.host}/api/articles/unlock`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`,
},
body: JSON.stringify({ url }),
});
if (!response.ok) {
const text = await response.text();
throw new Error(`Unlock failed: ${response.status} ${text}`);
}
return response.json();
}
// --- Context Menu Handler ---
chrome.contextMenus.onClicked.addListener(async (info, tab) => {
@ -197,6 +228,28 @@ chrome.contextMenus.onClicked.addListener(async (info, tab) => {
break;
}
case 'unlock-article': {
const targetUrl = info.linkUrl || tab.url;
showNotification('Unlocking Article', `Finding readable version of ${new URL(targetUrl).hostname}...`);
const result = await unlockArticle(targetUrl);
if (result && result.success && result.archiveUrl) {
// Create a CLIP note with the archive URL
await createNote({
title: tab.title || 'Unlocked Article',
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${targetUrl}">${targetUrl}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
type: 'CLIP',
url: targetUrl,
});
showNotification('Article Unlocked', `Readable version found via ${result.strategy}`);
// Open the unlocked article in a new tab
chrome.tabs.create({ url: result.archiveUrl });
} else {
showNotification('Unlock Failed', result?.error || 'No archived version found');
}
break;
}
case 'clip-selection': {
// Get selection HTML
let content = '';

View File

@ -133,6 +133,14 @@
color: #e5e5e5;
border: 1px solid #404040;
}
.btn-unlock {
background: #172554;
color: #93c5fd;
border: 1px solid #1e40af;
}
.btn-unlock svg {
flex-shrink: 0;
}
.status {
margin: 0 14px 10px;
@ -212,6 +220,16 @@
</button>
</div>
<div class="actions">
<button class="btn-unlock" id="unlockBtn" disabled>
<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
<rect x="3" y="11" width="18" height="11" rx="2" ry="2"></rect>
<path d="M7 11V7a5 5 0 0 1 9.9-1"></path>
</svg>
Unlock Article
</button>
</div>
<div id="status" class="status"></div>
<div class="footer">

View File

@ -152,6 +152,7 @@ async function init() {
// Enable buttons
document.getElementById('clipPageBtn').disabled = false;
document.getElementById('unlockBtn').disabled = false;
// Load notebooks
await populateNotebooks();
@ -255,6 +256,49 @@ document.getElementById('clipSelectionBtn').addEventListener('click', async () =
}
});
document.getElementById('unlockBtn').addEventListener('click', async () => {
const btn = document.getElementById('unlockBtn');
btn.disabled = true;
showStatus('Unlocking article...', 'loading');
try {
const token = await getToken();
const settings = await getSettings();
const response = await fetch(`${settings.host}/api/articles/unlock`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${token}`,
},
body: JSON.stringify({ url: currentTab.url }),
});
const result = await response.json();
if (result.success && result.archiveUrl) {
// Also save as a note
await createNote({
title: currentTab.title || 'Unlocked Article',
content: `<p>Unlocked via ${result.strategy}</p><p>Original: <a href="${currentTab.url}">${currentTab.url}</a></p><p>Archive: <a href="${result.archiveUrl}">${result.archiveUrl}</a></p>`,
type: 'CLIP',
url: currentTab.url,
});
showStatus(`Unlocked via ${result.strategy}! Opening...`, 'success');
// Open archive in new tab
chrome.tabs.create({ url: result.archiveUrl });
} else {
showStatus(result.error || 'No archived version found', 'error');
}
} catch (err) {
showStatus(`Error: ${err.message}`, 'error');
} finally {
btn.disabled = false;
}
});
document.getElementById('optionsLink').addEventListener('click', (e) => {
e.preventDefault();
chrome.runtime.openOptionsPage();

View File

@ -75,6 +75,7 @@ model Note {
contentPlain String? @db.Text
type NoteType @default(NOTE)
url String?
archiveUrl String?
language String?
mimeType String?
fileUrl String?

View File

@ -0,0 +1,61 @@
import { NextRequest, NextResponse } from 'next/server';
import { prisma } from '@/lib/prisma';
import { requireAuth, isAuthed } from '@/lib/auth';
import { unlockArticle } from '@/lib/article-unlock';
/**
* POST /api/articles/unlock
*
* Attempts to find an archived/readable version of a paywalled article.
*
* Body: { url: string, noteId?: string }
* - url: The article URL to unlock
* - noteId: (optional) If provided, updates the note's archiveUrl on success
*
* Returns: { success, strategy, archiveUrl, error? }
*/
export async function POST(request: NextRequest) {
try {
const auth = await requireAuth(request);
if (!isAuthed(auth)) return auth;
const body = await request.json();
const { url, noteId } = body;
if (!url || typeof url !== 'string') {
return NextResponse.json({ error: 'URL is required' }, { status: 400 });
}
// Validate URL format
try {
new URL(url);
} catch {
return NextResponse.json({ error: 'Invalid URL format' }, { status: 400 });
}
const result = await unlockArticle(url);
// If successful and noteId provided, update the note's archiveUrl
if (result.success && result.archiveUrl && noteId) {
const existing = await prisma.note.findUnique({
where: { id: noteId },
select: { authorId: true },
});
if (existing && (!existing.authorId || existing.authorId === auth.user.id)) {
await prisma.note.update({
where: { id: noteId },
data: { archiveUrl: result.archiveUrl },
});
}
}
return NextResponse.json(result);
} catch (error) {
console.error('Article unlock error:', error);
return NextResponse.json(
{ success: false, strategy: 'none', error: 'Internal server error' },
{ status: 500 }
);
}
}

View File

@ -50,7 +50,7 @@ export async function PUT(
}
const body = await request.json();
const { title, content, type, url, language, isPinned, notebookId, tags } = body;
const { title, content, type, url, archiveUrl, language, isPinned, notebookId, tags } = body;
const data: Record<string, unknown> = {};
if (title !== undefined) data.title = title.trim();
@ -60,6 +60,7 @@ export async function PUT(
}
if (type !== undefined) data.type = type;
if (url !== undefined) data.url = url || null;
if (archiveUrl !== undefined) data.archiveUrl = archiveUrl || null;
if (language !== undefined) data.language = language || null;
if (isPinned !== undefined) data.isPinned = isPinned;
if (notebookId !== undefined) data.notebookId = notebookId || null;

View File

@ -43,7 +43,7 @@ export async function POST(request: NextRequest) {
if (!isAuthed(auth)) return auth;
const { user } = auth;
const body = await request.json();
const { title, content, type, notebookId, url, language, tags, fileUrl, mimeType, fileSize, duration } = body;
const { title, content, type, notebookId, url, archiveUrl, language, tags, fileUrl, mimeType, fileSize, duration } = body;
if (!title?.trim()) {
return NextResponse.json({ error: 'Title is required' }, { status: 400 });
@ -75,6 +75,7 @@ export async function POST(request: NextRequest) {
notebookId: notebookId || null,
authorId: user.id,
url: url || null,
archiveUrl: archiveUrl || null,
language: language || null,
fileUrl: fileUrl || null,
mimeType: mimeType || null,

View File

@ -25,6 +25,7 @@ interface NoteData {
contentPlain: string | null;
type: string;
url: string | null;
archiveUrl: string | null;
language: string | null;
fileUrl: string | null;
mimeType: string | null;
@ -49,6 +50,8 @@ export default function NoteDetailPage() {
const [saving, setSaving] = useState(false);
const [diarizing, setDiarizing] = useState(false);
const [speakers, setSpeakers] = useState<{ speaker: string; start: number; end: number }[] | null>(null);
const [unlocking, setUnlocking] = useState(false);
const [unlockError, setUnlockError] = useState<string | null>(null);
useEffect(() => {
fetch(`/api/notes/${params.id}`)
@ -135,6 +138,30 @@ export default function NoteDetailPage() {
}
};
const handleUnlock = async () => {
if (!note?.url || unlocking) return;
setUnlocking(true);
setUnlockError(null);
try {
const res = await authFetch('/api/articles/unlock', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url: note.url, noteId: note.id }),
});
const result = await res.json();
if (result.success && result.archiveUrl) {
setNote({ ...note, archiveUrl: result.archiveUrl });
} else {
setUnlockError(result.error || 'No archived version found');
}
} catch (error) {
setUnlockError('Failed to unlock article');
console.error('Unlock error:', error);
} finally {
setUnlocking(false);
}
};
if (loading) {
return (
<div className="min-h-screen bg-[#0a0a0a] flex items-center justify-center">
@ -241,16 +268,63 @@ export default function NoteDetailPage() {
</span>
</div>
{/* URL */}
{/* URL + Unlock */}
{note.url && (
<a
href={note.url}
target="_blank"
rel="noopener noreferrer"
className="text-sm text-blue-400 hover:text-blue-300 mb-4 block truncate"
>
{note.url}
</a>
<div className="mb-4 space-y-2">
<a
href={note.url}
target="_blank"
rel="noopener noreferrer"
className="text-sm text-blue-400 hover:text-blue-300 block truncate"
>
{note.url}
</a>
{note.archiveUrl ? (
<div className="flex items-center gap-2">
<a
href={note.archiveUrl}
target="_blank"
rel="noopener noreferrer"
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-emerald-500/10 text-emerald-400 border border-emerald-500/20 rounded-lg hover:bg-emerald-500/20 transition-colors"
>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M8 11V7a4 4 0 118 0m-4 8v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2z" />
</svg>
View Unlocked Article
</a>
<span className="text-[10px] text-slate-500 truncate max-w-[200px]">{note.archiveUrl}</span>
</div>
) : (
<div className="flex items-center gap-2">
<button
onClick={handleUnlock}
disabled={unlocking}
className="inline-flex items-center gap-1.5 px-3 py-1.5 text-xs font-medium bg-amber-500/10 text-amber-400 border border-amber-500/20 rounded-lg hover:bg-amber-500/20 transition-colors disabled:opacity-50"
>
{unlocking ? (
<>
<svg className="animate-spin w-3.5 h-3.5" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4" fill="none" />
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z" />
</svg>
Unlocking...
</>
) : (
<>
<svg className="w-3.5 h-3.5" fill="none" viewBox="0 0 24 24" stroke="currentColor">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 15v2m-6 4h12a2 2 0 002-2v-6a2 2 0 00-2-2H6a2 2 0 00-2 2v6a2 2 0 002 2zm10-10V7a4 4 0 00-8 0v4h8z" />
</svg>
Unlock Article
</>
)}
</button>
{unlockError && (
<span className="text-[10px] text-red-400">{unlockError}</span>
)}
</div>
)}
</div>
)}
{/* Uploaded file/image */}

View File

@ -22,9 +22,10 @@ interface NoteCardProps {
updatedAt: string;
tags: { id: string; name: string; color: string | null }[];
url?: string | null;
archiveUrl?: string | null;
}
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url }: NoteCardProps) {
export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, tags, url, archiveUrl }: NoteCardProps) {
const snippet = (contentPlain || '').slice(0, 120);
return (
@ -41,6 +42,11 @@ export function NoteCard({ id, title, type, contentPlain, isPinned, updatedAt, t
&#9733;
</span>
)}
{archiveUrl && (
<span className="text-emerald-400 text-[10px] font-bold uppercase px-1 py-0.5 rounded bg-emerald-500/10" title="Unlocked article">
unlocked
</span>
)}
<span className="text-[10px] text-slate-500 ml-auto">
{new Date(updatedAt).toLocaleDateString()}
</span>

232
src/lib/article-unlock.ts Normal file
View File

@ -0,0 +1,232 @@
/**
* Article Unlock multi-strategy approach to get readable versions of
* paywalled or permissioned articles.
*
* Strategies (tried in order):
* 1. Wayback Machine check for existing snapshot, or request a new one
* 2. Google Web Cache fast lookup, often has full text
* 3. archive.ph check for existing snapshots (read-only, no submission)
*/
export interface UnlockResult {
success: boolean;
strategy: string;
archiveUrl?: string;
content?: string;
error?: string;
}
const BROWSER_UA =
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
// ---------------------------------------------------------------------------
// Strategy 1: Internet Archive Wayback Machine
// ---------------------------------------------------------------------------
async function tryWaybackMachine(url: string): Promise<UnlockResult | null> {
// First check if a snapshot already exists
try {
const checkUrl = `https://archive.org/wayback/available?url=${encodeURIComponent(url)}`;
const res = await fetch(checkUrl, {
headers: { 'User-Agent': BROWSER_UA },
signal: AbortSignal.timeout(10000),
});
if (res.ok) {
const data = await res.json();
const snapshot = data?.archived_snapshots?.closest;
if (snapshot?.available && snapshot?.url) {
return {
success: true,
strategy: 'wayback',
archiveUrl: snapshot.url.replace('http://', 'https://'),
};
}
}
} catch {
// availability check failed, try Save Page Now
}
// No existing snapshot — request one via Save Page Now (SPN)
try {
const saveRes = await fetch('https://web.archive.org/save', {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': BROWSER_UA,
Accept: 'application/json',
},
body: `url=${encodeURIComponent(url)}&capture_all=1`,
signal: AbortSignal.timeout(30000),
});
if (saveRes.ok) {
const data = await saveRes.json();
// SPN returns a job_id — we can construct the URL
if (data.url) {
return {
success: true,
strategy: 'wayback-save',
archiveUrl: data.url,
};
}
if (data.job_id) {
// Poll for completion (up to 30s)
const archiveUrl = await pollWaybackJob(data.job_id);
if (archiveUrl) {
return { success: true, strategy: 'wayback-save', archiveUrl };
}
}
}
// Sometimes SPN redirects to the archived page directly
if (saveRes.status === 302 || saveRes.status === 301) {
const location = saveRes.headers.get('location');
if (location) {
return { success: true, strategy: 'wayback-save', archiveUrl: location };
}
}
} catch {
// SPN failed
}
return null;
}
async function pollWaybackJob(jobId: string): Promise<string | null> {
for (let i = 0; i < 6; i++) {
await new Promise((r) => setTimeout(r, 5000));
try {
const res = await fetch(`https://web.archive.org/save/status/${jobId}`, {
headers: { Accept: 'application/json', 'User-Agent': BROWSER_UA },
signal: AbortSignal.timeout(10000),
});
if (res.ok) {
const data = await res.json();
if (data.status === 'success' && data.original_url && data.timestamp) {
return `https://web.archive.org/web/${data.timestamp}/${data.original_url}`;
}
if (data.status === 'error') return null;
}
} catch {
// keep polling
}
}
return null;
}
// ---------------------------------------------------------------------------
// Strategy 2: Google Web Cache
// ---------------------------------------------------------------------------
async function tryGoogleCache(url: string): Promise<UnlockResult | null> {
const cacheUrl = `https://webcache.googleusercontent.com/search?q=cache:${encodeURIComponent(url)}`;
try {
const res = await fetch(cacheUrl, {
headers: { 'User-Agent': BROWSER_UA },
redirect: 'follow',
signal: AbortSignal.timeout(10000),
});
if (res.ok) {
// Google cache returns the full page — verify it's not an error page
const text = await res.text();
if (text.length > 1000 && !text.includes('did not match any documents')) {
return {
success: true,
strategy: 'google-cache',
archiveUrl: cacheUrl,
content: text,
};
}
}
} catch {
// Google cache not available
}
return null;
}
// ---------------------------------------------------------------------------
// Strategy 3: archive.ph (read-only — check for existing snapshots)
// ---------------------------------------------------------------------------
async function tryArchivePh(url: string): Promise<UnlockResult | null> {
// Only check if an archive already exists — do NOT submit new pages
// (archive.ph has no API and aggressive anti-bot + security concerns)
const checkUrl = `https://archive.ph/newest/${encodeURIComponent(url)}`;
try {
const res = await fetch(checkUrl, {
headers: { 'User-Agent': BROWSER_UA },
redirect: 'manual', // archive.ph redirects to the snapshot
signal: AbortSignal.timeout(10000),
});
// A 302 redirect means a snapshot exists
if (res.status === 302 || res.status === 301) {
const location = res.headers.get('location');
if (location && location.includes('archive.ph/') && !location.includes('/submit')) {
return {
success: true,
strategy: 'archive-ph',
archiveUrl: location,
};
}
}
// A 200 with content also means it found one
if (res.ok) {
const finalUrl = res.url;
if (finalUrl && finalUrl !== checkUrl && finalUrl.includes('archive.ph/')) {
return {
success: true,
strategy: 'archive-ph',
archiveUrl: finalUrl,
};
}
}
} catch {
// archive.ph not reachable
}
return null;
}
// ---------------------------------------------------------------------------
// Main unlock function
// ---------------------------------------------------------------------------
export async function unlockArticle(url: string): Promise<UnlockResult> {
// Validate URL
try {
new URL(url);
} catch {
return { success: false, strategy: 'none', error: 'Invalid URL' };
}
// Try strategies in order of reliability
const strategies = [
{ name: 'Wayback Machine', fn: tryWaybackMachine },
{ name: 'Google Cache', fn: tryGoogleCache },
{ name: 'archive.ph', fn: tryArchivePh },
];
const errors: string[] = [];
for (const { name, fn } of strategies) {
try {
const result = await fn(url);
if (result?.success) {
return result;
}
} catch (err) {
errors.push(`${name}: ${err instanceof Error ? err.message : 'unknown error'}`);
}
}
return {
success: false,
strategy: 'none',
error: `No archived version found. Tried: ${strategies.map((s) => s.name).join(', ')}`,
};
}