163 lines
4.2 KiB
TypeScript
163 lines
4.2 KiB
TypeScript
/**
|
|
* RSS 2.0 + Atom 1.0 parser.
|
|
*
|
|
* Uses fast-xml-parser. Detects format by root element (<rss> vs <feed>).
|
|
* Returns normalized FeedItem-shaped objects.
|
|
*/
|
|
|
|
import { XMLParser } from 'fast-xml-parser';
|
|
|
|
export interface ParsedFeedItem {
|
|
guid: string;
|
|
title: string;
|
|
url: string;
|
|
summary: string;
|
|
publishedAt: number;
|
|
author: string;
|
|
}
|
|
|
|
export interface ParsedFeed {
|
|
title: string;
|
|
description: string;
|
|
items: ParsedFeedItem[];
|
|
}
|
|
|
|
const parser = new XMLParser({
|
|
ignoreAttributes: false,
|
|
attributeNamePrefix: '@_',
|
|
textNodeName: '#text',
|
|
isArray: (name) => ['item', 'entry'].includes(name),
|
|
});
|
|
|
|
function stripHtml(html: string): string {
|
|
return html
|
|
.replace(/<[^>]*>/g, '')
|
|
.replace(/&/g, '&')
|
|
.replace(/</g, '<')
|
|
.replace(/>/g, '>')
|
|
.replace(/"/g, '"')
|
|
.replace(/'/g, "'")
|
|
.replace(/ /g, ' ')
|
|
.replace(/\s+/g, ' ')
|
|
.trim();
|
|
}
|
|
|
|
function truncate(s: string, max: number): string {
|
|
if (s.length <= max) return s;
|
|
return s.slice(0, max - 1) + '\u2026';
|
|
}
|
|
|
|
function parseDate(raw: string | number | undefined): number {
|
|
if (!raw) return Date.now();
|
|
const d = new Date(raw);
|
|
return isNaN(d.getTime()) ? Date.now() : d.getTime();
|
|
}
|
|
|
|
function textOf(node: any): string {
|
|
if (!node) return '';
|
|
if (typeof node === 'string') return node;
|
|
if (typeof node === 'number') return String(node);
|
|
if (node['#text']) return String(node['#text']);
|
|
return '';
|
|
}
|
|
|
|
function parseRSS(rss: any): ParsedFeed {
|
|
const channel = rss.rss?.channel || rss.channel || {};
|
|
const items = channel.item || [];
|
|
|
|
return {
|
|
title: textOf(channel.title) || '',
|
|
description: textOf(channel.description) || '',
|
|
items: items.map((item: any) => ({
|
|
guid: textOf(item.guid) || item.link || crypto.randomUUID(),
|
|
title: textOf(item.title) || '',
|
|
url: textOf(item.link) || '',
|
|
summary: truncate(stripHtml(textOf(item.description) || textOf(item['content:encoded']) || ''), 500),
|
|
publishedAt: parseDate(item.pubDate),
|
|
author: textOf(item.author) || textOf(item['dc:creator']) || '',
|
|
})),
|
|
};
|
|
}
|
|
|
|
function parseAtom(feed: any): ParsedFeed {
|
|
const root = feed.feed || feed;
|
|
const entries = root.entry || [];
|
|
|
|
return {
|
|
title: textOf(root.title) || '',
|
|
description: textOf(root.subtitle) || '',
|
|
items: entries.map((entry: any) => {
|
|
const link = Array.isArray(entry.link)
|
|
? entry.link.find((l: any) => l['@_rel'] === 'alternate' || !l['@_rel'])
|
|
: entry.link;
|
|
const href = link?.['@_href'] || textOf(link) || '';
|
|
|
|
return {
|
|
guid: textOf(entry.id) || href || crypto.randomUUID(),
|
|
title: textOf(entry.title) || '',
|
|
url: href,
|
|
summary: truncate(stripHtml(textOf(entry.summary) || textOf(entry.content) || ''), 500),
|
|
publishedAt: parseDate(entry.published || entry.updated),
|
|
author: textOf(entry.author?.name) || '',
|
|
};
|
|
}),
|
|
};
|
|
}
|
|
|
|
export function parseFeed(xml: string): ParsedFeed {
|
|
const parsed = parser.parse(xml);
|
|
|
|
// Detect format
|
|
if (parsed.rss || parsed.channel) {
|
|
return parseRSS(parsed);
|
|
}
|
|
if (parsed.feed) {
|
|
return parseAtom(parsed);
|
|
}
|
|
|
|
// Fallback: try RSS-like structure
|
|
if (parsed['rdf:RDF']?.item) {
|
|
return {
|
|
title: textOf(parsed['rdf:RDF'].channel?.title) || '',
|
|
description: textOf(parsed['rdf:RDF'].channel?.description) || '',
|
|
items: (parsed['rdf:RDF'].item || []).map((item: any) => ({
|
|
guid: textOf(item.link) || crypto.randomUUID(),
|
|
title: textOf(item.title) || '',
|
|
url: textOf(item.link) || '',
|
|
summary: truncate(stripHtml(textOf(item.description) || ''), 500),
|
|
publishedAt: parseDate(item['dc:date']),
|
|
author: textOf(item['dc:creator']) || '',
|
|
})),
|
|
};
|
|
}
|
|
|
|
throw new Error('Unrecognized feed format');
|
|
}
|
|
|
|
/**
|
|
* Parse OPML XML into a list of feed URLs with titles.
|
|
*/
|
|
export function parseOPML(xml: string): { url: string; name: string }[] {
|
|
const parsed = parser.parse(xml);
|
|
const results: { url: string; name: string }[] = [];
|
|
|
|
function walk(node: any) {
|
|
if (!node) return;
|
|
const arr = Array.isArray(node) ? node : [node];
|
|
for (const item of arr) {
|
|
const url = item['@_xmlUrl'] || item['@_xmlurl'];
|
|
if (url) {
|
|
results.push({
|
|
url,
|
|
name: item['@_title'] || item['@_text'] || url,
|
|
});
|
|
}
|
|
if (item.outline) walk(item.outline);
|
|
}
|
|
}
|
|
|
|
const body = parsed.opml?.body || parsed.body;
|
|
if (body?.outline) walk(body.outline);
|
|
return results;
|
|
}
|