1247 lines
37 KiB
TypeScript
1247 lines
37 KiB
TypeScript
/**
|
|
* Obsidian Vault Importer
|
|
* Handles reading and processing markdown files from a local Obsidian vault
|
|
*/
|
|
|
|
import { GitHubQuartzReader, GitHubQuartzConfig } from './githubQuartzReader'
|
|
import { getClientConfig } from './clientConfig'
|
|
|
|
export interface ObsidianObsNote {
|
|
id: string
|
|
title: string
|
|
content: string
|
|
filePath: string
|
|
tags: string[]
|
|
created: Date | string
|
|
modified: Date | string
|
|
links: string[]
|
|
backlinks: string[]
|
|
frontmatter: Record<string, any>
|
|
vaultPath?: string
|
|
}
|
|
|
|
export interface FolderNode {
|
|
name: string
|
|
path: string
|
|
children: FolderNode[]
|
|
notes: ObsidianObsNote[]
|
|
isExpanded: boolean
|
|
level: number
|
|
}
|
|
|
|
export interface ObsidianVault {
|
|
name: string
|
|
path: string
|
|
obs_notes: ObsidianObsNote[]
|
|
totalObsNotes: number
|
|
lastImported: Date
|
|
folderTree: FolderNode
|
|
}
|
|
|
|
export interface ObsidianVaultRecord {
|
|
id: string
|
|
typeName: 'obsidian_vault'
|
|
name: string
|
|
path: string
|
|
obs_notes: ObsidianObsNote[]
|
|
totalObsNotes: number
|
|
lastImported: Date
|
|
folderTree: FolderNode
|
|
meta: Record<string, any>
|
|
}
|
|
|
|
export class ObsidianImporter {
|
|
private vault: ObsidianVault | null = null
|
|
|
|
/**
|
|
* Import notes from a directory (simulated file picker for now)
|
|
* In a real implementation, this would use the File System Access API
|
|
*/
|
|
async importFromDirectory(directoryPath: string): Promise<ObsidianVault> {
|
|
try {
|
|
// For now, we'll simulate this with a demo vault
|
|
// In a real implementation, you'd use the File System Access API
|
|
|
|
// Simulate reading files (in real implementation, use File System Access API)
|
|
const mockObsNotes = await this.createMockObsNotes()
|
|
|
|
this.vault = {
|
|
name: this.extractVaultName(directoryPath),
|
|
path: directoryPath,
|
|
obs_notes: mockObsNotes,
|
|
totalObsNotes: mockObsNotes.length,
|
|
lastImported: new Date(),
|
|
folderTree: this.buildFolderTree(mockObsNotes)
|
|
}
|
|
|
|
return this.vault
|
|
} catch (error) {
|
|
console.error('Error importing Obsidian vault:', error)
|
|
throw new Error('Failed to import Obsidian vault')
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import notes from a Quartz URL using GitHub API
|
|
*/
|
|
async importFromQuartzUrl(quartzUrl: string): Promise<ObsidianVault> {
|
|
try {
|
|
// Ensure URL has protocol
|
|
const url = quartzUrl.startsWith('http') ? quartzUrl : `https://${quartzUrl}`
|
|
|
|
// Try to get GitHub repository info from environment or URL
|
|
const githubConfig = this.getGitHubConfigFromUrl(url)
|
|
|
|
if (githubConfig) {
|
|
const obs_notes = await this.importFromGitHub(githubConfig)
|
|
|
|
this.vault = {
|
|
name: this.extractVaultNameFromUrl(url),
|
|
path: url,
|
|
obs_notes,
|
|
totalObsNotes: obs_notes.length,
|
|
lastImported: new Date(),
|
|
folderTree: this.buildFolderTree(obs_notes)
|
|
}
|
|
|
|
return this.vault
|
|
} else {
|
|
// Fallback to the old method
|
|
const obs_notes = await this.discoverQuartzContent(url)
|
|
|
|
this.vault = {
|
|
name: this.extractVaultNameFromUrl(url),
|
|
path: url,
|
|
obs_notes,
|
|
totalObsNotes: obs_notes.length,
|
|
lastImported: new Date(),
|
|
folderTree: this.buildFolderTree(obs_notes)
|
|
}
|
|
|
|
return this.vault
|
|
}
|
|
} catch (error) {
|
|
console.error('Error importing from Quartz URL:', error)
|
|
throw new Error('Failed to import from Quartz URL')
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import notes using File System Access API (modern browsers)
|
|
*/
|
|
async importFromFileSystem(): Promise<ObsidianVault> {
|
|
try {
|
|
// Check if File System Access API is supported
|
|
if (!('showDirectoryPicker' in window)) {
|
|
throw new Error('File System Access API not supported in this browser')
|
|
}
|
|
|
|
// Request directory access
|
|
const directoryHandle = await (window as any).showDirectoryPicker({
|
|
mode: 'read'
|
|
})
|
|
|
|
const obs_notes: ObsidianObsNote[] = []
|
|
await this.readDirectoryRecursively(directoryHandle, obs_notes, '')
|
|
|
|
this.vault = {
|
|
name: directoryHandle.name,
|
|
path: directoryHandle.name, // File System Access API doesn't expose full path
|
|
obs_notes,
|
|
totalObsNotes: obs_notes.length,
|
|
lastImported: new Date(),
|
|
folderTree: this.buildFolderTree(obs_notes)
|
|
}
|
|
|
|
return this.vault
|
|
} catch (error) {
|
|
console.error('Error importing Obsidian vault via File System Access API:', error)
|
|
throw new Error('Failed to import Obsidian vault')
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Recursively read directory and process markdown files
|
|
*/
|
|
private async readDirectoryRecursively(
|
|
directoryHandle: any,
|
|
obs_notes: ObsidianObsNote[],
|
|
relativePath: string
|
|
): Promise<void> {
|
|
for await (const [name, handle] of directoryHandle.entries()) {
|
|
const currentPath = relativePath ? `${relativePath}/${name}` : name
|
|
|
|
if (handle.kind === 'directory') {
|
|
// Skip hidden directories and .obsidian
|
|
if (!name.startsWith('.') && name !== 'node_modules') {
|
|
await this.readDirectoryRecursively(handle, obs_notes, currentPath)
|
|
}
|
|
} else if (handle.kind === 'file' && name.endsWith('.md')) {
|
|
try {
|
|
const file = await handle.getFile()
|
|
const content = await file.text()
|
|
const obs_note = this.parseMarkdownFile(content, currentPath, file.lastModified)
|
|
obs_notes.push(obs_note)
|
|
} catch (error) {
|
|
console.warn(`Failed to read file ${currentPath}:`, error)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parse a markdown file and extract metadata
|
|
*/
|
|
private parseMarkdownFile(content: string, filePath: string, lastModified: number): ObsidianObsNote {
|
|
// Extract frontmatter
|
|
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/)
|
|
let frontmatter: Record<string, any> = {}
|
|
let body = content
|
|
|
|
if (frontmatterMatch) {
|
|
try {
|
|
const frontmatterText = frontmatterMatch[1]
|
|
// Simple YAML parsing (in production, use a proper YAML parser)
|
|
frontmatter = this.parseSimpleYaml(frontmatterText)
|
|
body = frontmatterMatch[2]
|
|
} catch (error) {
|
|
console.warn('Failed to parse frontmatter:', error)
|
|
}
|
|
}
|
|
|
|
// Extract title from frontmatter or first heading
|
|
const title = frontmatter.title || this.extractTitle(body) || this.extractFileName(filePath)
|
|
|
|
// Extract tags
|
|
const tags = this.extractTags(body, frontmatter)
|
|
|
|
// Extract links
|
|
const links = this.extractLinks(body, '')
|
|
|
|
// Generate unique ID
|
|
const id = this.generateNoteId(filePath)
|
|
|
|
return {
|
|
id,
|
|
title,
|
|
content: body,
|
|
filePath,
|
|
tags,
|
|
created: new Date(frontmatter.created || lastModified),
|
|
modified: new Date(lastModified),
|
|
links,
|
|
backlinks: [], // Would need to be calculated by analyzing all notes
|
|
frontmatter
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract title from markdown content
|
|
*/
|
|
private extractTitle(content: string): string | null {
|
|
const headingMatch = content.match(/^#\s+(.+)$/m)
|
|
return headingMatch ? headingMatch[1].trim() : null
|
|
}
|
|
|
|
/**
|
|
* Extract filename without extension
|
|
*/
|
|
private extractFileName(filePath: string): string {
|
|
const fileName = filePath.split('/').pop() || filePath
|
|
return fileName.replace(/\.md$/, '')
|
|
}
|
|
|
|
/**
|
|
* Extract tags from content and frontmatter
|
|
*/
|
|
private extractTags(content: string, frontmatter: Record<string, any>): string[] {
|
|
const tags = new Set<string>()
|
|
|
|
// Extract from frontmatter
|
|
if (frontmatter.tags) {
|
|
if (Array.isArray(frontmatter.tags)) {
|
|
frontmatter.tags.forEach((tag: string) => tags.add(tag))
|
|
} else if (typeof frontmatter.tags === 'string') {
|
|
frontmatter.tags.split(',').forEach((tag: string) => tags.add(tag.trim()))
|
|
}
|
|
}
|
|
|
|
// Extract from content (#tag format)
|
|
const tagMatches = content.match(/#[a-zA-Z0-9_-]+/g)
|
|
if (tagMatches) {
|
|
tagMatches.forEach(tag => tags.add(tag))
|
|
}
|
|
|
|
return Array.from(tags)
|
|
}
|
|
|
|
|
|
/**
|
|
* Generate unique ID for note
|
|
*/
|
|
private generateNoteId(filePath: string): string {
|
|
return `note_${filePath.replace(/[^a-zA-Z0-9]/g, '_')}`
|
|
}
|
|
|
|
/**
|
|
* Simple YAML parser for frontmatter
|
|
*/
|
|
private parseSimpleYaml(yamlText: string): Record<string, any> {
|
|
const result: Record<string, any> = {}
|
|
const lines = yamlText.split('\n')
|
|
|
|
for (const line of lines) {
|
|
const trimmed = line.trim()
|
|
if (trimmed && !trimmed.startsWith('#')) {
|
|
const colonIndex = trimmed.indexOf(':')
|
|
if (colonIndex > 0) {
|
|
const key = trimmed.substring(0, colonIndex).trim()
|
|
let value = trimmed.substring(colonIndex + 1).trim()
|
|
|
|
// Remove quotes if present
|
|
if ((value.startsWith('"') && value.endsWith('"')) ||
|
|
(value.startsWith("'") && value.endsWith("'"))) {
|
|
value = value.slice(1, -1)
|
|
}
|
|
|
|
// Parse arrays
|
|
if (value.startsWith('[') && value.endsWith(']')) {
|
|
try {
|
|
value = JSON.parse(value)
|
|
} catch {
|
|
// If JSON parsing fails, treat as string
|
|
}
|
|
}
|
|
|
|
result[key] = value
|
|
}
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
/**
|
|
* Extract vault name from path
|
|
*/
|
|
private extractVaultName(path: string): string {
|
|
const parts = path.split('/')
|
|
return parts[parts.length - 1] || 'Obsidian Vault'
|
|
}
|
|
|
|
/**
|
|
* Create mock obs_notes for demonstration
|
|
*/
|
|
private async createMockObsNotes(): Promise<ObsidianObsNote[]> {
|
|
return [
|
|
{
|
|
id: 'note_1',
|
|
title: 'Welcome to Obsidian',
|
|
content: `# Welcome to Obsidian
|
|
|
|
This is a sample note from your Obsidian vault. You can drag this note onto the canvas to create a new rectangle shape.
|
|
|
|
## Features
|
|
- [[Note Linking]]
|
|
- #tags
|
|
- [External Links](https://obsidian.md)
|
|
|
|
## Tasks
|
|
- [x] Set up vault
|
|
- [ ] Import notes
|
|
- [ ] Organize content`,
|
|
filePath: 'Welcome to Obsidian.md',
|
|
tags: ['#welcome', '#getting-started'],
|
|
created: new Date('2024-01-01'),
|
|
modified: new Date('2024-01-15'),
|
|
links: ['Note Linking', 'https://obsidian.md'],
|
|
backlinks: [],
|
|
frontmatter: {
|
|
title: 'Welcome to Obsidian',
|
|
tags: ['welcome', 'getting-started'],
|
|
created: '2024-01-01'
|
|
}
|
|
},
|
|
{
|
|
id: 'note_2',
|
|
title: 'Project Ideas',
|
|
content: `# Project Ideas
|
|
|
|
A collection of creative project ideas and concepts.
|
|
|
|
## Web Development
|
|
- Canvas-based drawing app
|
|
- Real-time collaboration tools
|
|
- AI-powered content generation
|
|
|
|
## Design
|
|
- Interactive data visualizations
|
|
- User experience improvements
|
|
- Mobile-first design patterns`,
|
|
filePath: 'Project Ideas.md',
|
|
tags: ['#projects', '#ideas', '#development'],
|
|
created: new Date('2024-01-05'),
|
|
modified: new Date('2024-01-20'),
|
|
links: [],
|
|
backlinks: [],
|
|
frontmatter: {
|
|
title: 'Project Ideas',
|
|
tags: ['projects', 'ideas', 'development']
|
|
}
|
|
},
|
|
{
|
|
id: 'note_3',
|
|
title: 'Meeting Notes',
|
|
content: `# Meeting Notes - January 15, 2024
|
|
|
|
## Attendees
|
|
- John Doe
|
|
- Jane Smith
|
|
- Bob Johnson
|
|
|
|
## Agenda
|
|
1. Project status update
|
|
2. Budget review
|
|
3. Timeline discussion
|
|
|
|
## Action Items
|
|
- [ ] Complete budget analysis by Friday
|
|
- [ ] Schedule follow-up meeting
|
|
- [ ] Update project documentation`,
|
|
filePath: 'Meetings/2024-01-15 Meeting Notes.md',
|
|
tags: ['#meetings', '#2024'],
|
|
created: new Date('2024-01-15'),
|
|
modified: new Date('2024-01-15'),
|
|
links: [],
|
|
backlinks: [],
|
|
frontmatter: {
|
|
title: 'Meeting Notes - January 15, 2024',
|
|
date: '2024-01-15',
|
|
tags: ['meetings', '2024']
|
|
}
|
|
}
|
|
]
|
|
}
|
|
|
|
/**
|
|
* Get the current vault
|
|
*/
|
|
getVault(): ObsidianVault | null {
|
|
return this.vault
|
|
}
|
|
|
|
/**
|
|
* Search obs_notes in the vault
|
|
*/
|
|
searchObsNotes(query: string): ObsidianObsNote[] {
|
|
if (!this.vault) return []
|
|
|
|
const lowercaseQuery = query.toLowerCase()
|
|
|
|
return this.vault.obs_notes.filter(obs_note =>
|
|
obs_note.title.toLowerCase().includes(lowercaseQuery) ||
|
|
obs_note.content.toLowerCase().includes(lowercaseQuery) ||
|
|
obs_note.tags.some(tag => tag.toLowerCase().includes(lowercaseQuery))
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Get obs_notes by tag
|
|
*/
|
|
getObsNotesByTag(tag: string): ObsidianObsNote[] {
|
|
if (!this.vault) return []
|
|
|
|
return this.vault.obs_notes.filter(obs_note =>
|
|
obs_note.tags.some(noteTag => noteTag.toLowerCase().includes(tag.toLowerCase()))
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Get all unique tags
|
|
*/
|
|
getAllTags(): string[] {
|
|
if (!this.vault) return []
|
|
|
|
const allTags = new Set<string>()
|
|
this.vault.obs_notes.forEach(obs_note => {
|
|
obs_note.tags.forEach(tag => allTags.add(tag))
|
|
})
|
|
|
|
return Array.from(allTags).sort()
|
|
}
|
|
|
|
/**
|
|
* Build folder tree structure from obs_notes
|
|
*/
|
|
buildFolderTree(obs_notes: ObsidianObsNote[]): FolderNode {
|
|
const root: FolderNode = {
|
|
name: 'Root',
|
|
path: '',
|
|
children: [],
|
|
notes: [],
|
|
isExpanded: true,
|
|
level: 0
|
|
}
|
|
|
|
// Group notes by their folder paths
|
|
const folderMap = new Map<string, { folders: string[], notes: ObsidianObsNote[] }>()
|
|
|
|
obs_notes.forEach(note => {
|
|
const pathParts = this.parseFilePath(note.filePath)
|
|
const folderKey = pathParts.folders.join('/')
|
|
|
|
if (!folderMap.has(folderKey)) {
|
|
folderMap.set(folderKey, { folders: pathParts.folders, notes: [] })
|
|
}
|
|
folderMap.get(folderKey)!.notes.push(note)
|
|
})
|
|
|
|
// Build the tree structure
|
|
folderMap.forEach(({ folders, notes }) => {
|
|
this.addFolderToTree(root, folders, notes)
|
|
})
|
|
|
|
return root
|
|
}
|
|
|
|
/**
|
|
* Parse file path into folder structure
|
|
*/
|
|
private parseFilePath(filePath: string): { folders: string[], fileName: string } {
|
|
// Handle both local paths and URLs
|
|
let pathToParse = filePath
|
|
|
|
if (filePath.startsWith('http')) {
|
|
// Extract pathname from URL
|
|
try {
|
|
const url = new URL(filePath)
|
|
pathToParse = url.pathname.replace(/^\//, '')
|
|
} catch (e) {
|
|
console.warn('Invalid URL:', filePath)
|
|
return { folders: [], fileName: filePath }
|
|
}
|
|
}
|
|
|
|
// Split path and filter out empty parts
|
|
const parts = pathToParse.split('/').filter(part => part.length > 0)
|
|
|
|
if (parts.length === 0) {
|
|
return { folders: [], fileName: filePath }
|
|
}
|
|
|
|
const fileName = parts[parts.length - 1]
|
|
const folders = parts.slice(0, -1)
|
|
|
|
return { folders, fileName }
|
|
}
|
|
|
|
/**
|
|
* Add folder to tree structure
|
|
*/
|
|
private addFolderToTree(root: FolderNode, folderPath: string[], notes: ObsidianObsNote[]): void {
|
|
let current = root
|
|
|
|
for (let i = 0; i < folderPath.length; i++) {
|
|
const folderName = folderPath[i]
|
|
let existingFolder = current.children.find(child => child.name === folderName)
|
|
|
|
if (!existingFolder) {
|
|
const currentPath = folderPath.slice(0, i + 1).join('/')
|
|
existingFolder = {
|
|
name: folderName,
|
|
path: currentPath,
|
|
children: [],
|
|
notes: [],
|
|
isExpanded: false,
|
|
level: i + 1
|
|
}
|
|
current.children.push(existingFolder)
|
|
}
|
|
|
|
current = existingFolder
|
|
}
|
|
|
|
// Add notes to the final folder
|
|
current.notes.push(...notes)
|
|
}
|
|
|
|
/**
|
|
* Get all notes from a folder tree (recursive)
|
|
*/
|
|
getAllNotesFromTree(folder: FolderNode): ObsidianObsNote[] {
|
|
let notes = [...folder.notes]
|
|
|
|
folder.children.forEach(child => {
|
|
notes.push(...this.getAllNotesFromTree(child))
|
|
})
|
|
|
|
return notes
|
|
}
|
|
|
|
/**
|
|
* Find folder by path in tree
|
|
*/
|
|
findFolderByPath(root: FolderNode, path: string): FolderNode | null {
|
|
if (root.path === path) {
|
|
return root
|
|
}
|
|
|
|
for (const child of root.children) {
|
|
const found = this.findFolderByPath(child, path)
|
|
if (found) {
|
|
return found
|
|
}
|
|
}
|
|
|
|
return null
|
|
}
|
|
|
|
/**
|
|
* Convert vault to Automerge record format
|
|
*/
|
|
vaultToRecord(vault: ObsidianVault): ObsidianVaultRecord {
|
|
return {
|
|
id: `obsidian_vault:${vault.name}`,
|
|
typeName: 'obsidian_vault',
|
|
name: vault.name,
|
|
path: vault.path,
|
|
obs_notes: vault.obs_notes,
|
|
totalObsNotes: vault.totalObsNotes,
|
|
lastImported: vault.lastImported,
|
|
folderTree: vault.folderTree,
|
|
meta: {}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert Automerge record to vault format
|
|
*/
|
|
recordToVault(record: ObsidianVaultRecord): ObsidianVault {
|
|
return {
|
|
name: record.name,
|
|
path: record.path,
|
|
obs_notes: record.obs_notes,
|
|
totalObsNotes: record.totalObsNotes,
|
|
lastImported: record.lastImported,
|
|
folderTree: record.folderTree
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Search notes in the current vault
|
|
*/
|
|
async searchNotes(query: string): Promise<ObsidianObsNote[]> {
|
|
if (!this.vault) return []
|
|
|
|
// If this is a GitHub-based Quartz vault, use GitHub search
|
|
if (this.vault.path && (this.vault.path.startsWith('http') || this.vault.path.includes('github'))) {
|
|
const githubConfig = this.getGitHubConfigFromUrl(this.vault.path)
|
|
if (githubConfig) {
|
|
try {
|
|
const reader = new GitHubQuartzReader(githubConfig)
|
|
const quartzNotes = await reader.searchNotes(query)
|
|
|
|
// Convert to Obsidian format
|
|
return quartzNotes.map(note => ({
|
|
id: note.id,
|
|
title: note.title,
|
|
content: note.content,
|
|
filePath: note.filePath,
|
|
tags: note.tags,
|
|
links: [],
|
|
created: new Date().toISOString(),
|
|
modified: note.lastModified,
|
|
vaultPath: githubConfig.owner + '/' + githubConfig.repo,
|
|
backlinks: [],
|
|
frontmatter: note.frontmatter
|
|
}))
|
|
} catch (error) {
|
|
console.error('GitHub search failed, falling back to local search:', error)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback to local search
|
|
const searchTerm = query.toLowerCase()
|
|
return this.vault.obs_notes.filter(note =>
|
|
note.title.toLowerCase().includes(searchTerm) ||
|
|
note.content.toLowerCase().includes(searchTerm) ||
|
|
note.tags.some(tag => tag.toLowerCase().includes(searchTerm))
|
|
)
|
|
}
|
|
|
|
/**
|
|
* Get GitHub configuration from client config
|
|
*/
|
|
private getGitHubConfigFromUrl(_quartzUrl: string): GitHubQuartzConfig | null {
|
|
const config = getClientConfig()
|
|
const githubToken = config.githubToken
|
|
const githubRepo = config.quartzRepo
|
|
|
|
if (!githubToken || !githubRepo) {
|
|
return null
|
|
}
|
|
|
|
if (githubToken === 'your_github_token_here' || githubRepo === 'your_username/your-quartz-repo') {
|
|
return null
|
|
}
|
|
|
|
const [owner, repo] = githubRepo.split('/')
|
|
if (!owner || !repo) {
|
|
return null
|
|
}
|
|
|
|
return {
|
|
token: githubToken,
|
|
owner,
|
|
repo,
|
|
branch: config.quartzBranch || 'main',
|
|
contentPath: 'content'
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Import notes from GitHub repository
|
|
*/
|
|
private async importFromGitHub(config: GitHubQuartzConfig): Promise<ObsidianObsNote[]> {
|
|
try {
|
|
const reader = new GitHubQuartzReader(config)
|
|
const quartzNotes = await reader.getAllNotes()
|
|
|
|
// Convert Quartz notes to Obsidian format and deduplicate by ID
|
|
const notesMap = new Map<string, ObsidianObsNote>()
|
|
|
|
quartzNotes
|
|
.filter(note => note != null) // Filter out any null/undefined notes
|
|
.forEach(note => {
|
|
const obsNote: ObsidianObsNote = {
|
|
id: note.id || 'unknown',
|
|
title: note.title || 'Untitled',
|
|
content: note.content || '',
|
|
filePath: note.filePath || 'unknown',
|
|
tags: note.tags || [],
|
|
links: [], // Will be populated if needed
|
|
created: new Date(),
|
|
modified: new Date(note.lastModified || new Date().toISOString()),
|
|
backlinks: [],
|
|
frontmatter: note.frontmatter || {},
|
|
vaultPath: config.owner + '/' + config.repo,
|
|
}
|
|
|
|
// If we already have a note with this ID, keep the one with the longer content
|
|
// (assuming it's more complete) or prefer the one without quotes in the filename
|
|
const existing = notesMap.get(obsNote.id)
|
|
if (existing) {
|
|
console.warn(`Duplicate note ID found: ${obsNote.id}. File paths: ${existing.filePath} vs ${obsNote.filePath}`)
|
|
|
|
// Prefer the note without quotes in the filename
|
|
const existingHasQuotes = existing.filePath.includes('"')
|
|
const currentHasQuotes = obsNote.filePath.includes('"')
|
|
|
|
if (currentHasQuotes && !existingHasQuotes) {
|
|
return // Keep the existing one
|
|
} else if (!currentHasQuotes && existingHasQuotes) {
|
|
notesMap.set(obsNote.id, obsNote)
|
|
} else {
|
|
// Both have or don't have quotes, prefer the one with more content
|
|
if (obsNote.content.length > existing.content.length) {
|
|
notesMap.set(obsNote.id, obsNote)
|
|
}
|
|
}
|
|
} else {
|
|
notesMap.set(obsNote.id, obsNote)
|
|
}
|
|
})
|
|
|
|
const uniqueNotes = Array.from(notesMap.values())
|
|
|
|
return uniqueNotes
|
|
} catch (error) {
|
|
console.error('Failed to import from GitHub:', error)
|
|
throw error
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Discover content from a Quartz site (fallback method)
|
|
*/
|
|
private async discoverQuartzContent(baseUrl: string): Promise<ObsidianObsNote[]> {
|
|
const obs_notes: ObsidianObsNote[] = []
|
|
|
|
try {
|
|
// Try to find content through common Quartz patterns
|
|
const contentUrls = await this.findQuartzContentUrls(baseUrl)
|
|
|
|
if (contentUrls.length === 0) {
|
|
return obs_notes
|
|
}
|
|
|
|
for (const contentUrl of contentUrls) {
|
|
try {
|
|
const response = await fetch(contentUrl)
|
|
if (!response.ok) {
|
|
continue
|
|
}
|
|
|
|
const content = await response.text()
|
|
const obs_note = this.parseQuartzMarkdown(content, contentUrl, baseUrl)
|
|
|
|
// Add all notes regardless of content length
|
|
obs_notes.push(obs_note)
|
|
} catch (error) {
|
|
// Silently skip failed fetches
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn('⚠️ Failed to discover Quartz content:', error)
|
|
}
|
|
|
|
return obs_notes
|
|
}
|
|
|
|
/**
|
|
* Find content URLs from a Quartz site
|
|
*/
|
|
private async findQuartzContentUrls(baseUrl: string): Promise<string[]> {
|
|
const urls: string[] = []
|
|
|
|
try {
|
|
// First, try to fetch the main page to discover content
|
|
console.log('🔍 Fetching main page to discover content structure...')
|
|
const mainPageResponse = await fetch(baseUrl)
|
|
if (mainPageResponse.ok) {
|
|
const mainPageContent = await mainPageResponse.text()
|
|
urls.push(baseUrl) // Always include the main page
|
|
|
|
// Look for navigation links and content links in the main page
|
|
const discoveredUrls = this.extractContentUrlsFromPage(mainPageContent, baseUrl)
|
|
urls.push(...discoveredUrls)
|
|
}
|
|
|
|
// Try to find a sitemap
|
|
const sitemapUrl = `${baseUrl}/sitemap.xml`
|
|
try {
|
|
const response = await fetch(sitemapUrl)
|
|
if (response.ok) {
|
|
const sitemap = await response.text()
|
|
const urlMatches = sitemap.match(/<loc>(.*?)<\/loc>/g)
|
|
if (urlMatches) {
|
|
const sitemapUrls = urlMatches.map(match =>
|
|
match.replace(/<\/?loc>/g, '').trim()
|
|
).filter(url => url.endsWith('.html') || url.endsWith('.md') || url.includes(baseUrl))
|
|
urls.push(...sitemapUrls)
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn('Failed to fetch sitemap:', error)
|
|
}
|
|
|
|
// Try to find content through common Quartz patterns
|
|
const commonPaths = [
|
|
'/', // Root page
|
|
'/index.html',
|
|
'/about',
|
|
'/contact',
|
|
'/notes',
|
|
'/posts',
|
|
'/content',
|
|
'/pages',
|
|
'/blog',
|
|
'/articles'
|
|
]
|
|
|
|
for (const path of commonPaths) {
|
|
try {
|
|
const url = path === '/' ? baseUrl : `${baseUrl}${path}`
|
|
const response = await fetch(url)
|
|
if (response.ok) {
|
|
urls.push(url)
|
|
}
|
|
} catch (error) {
|
|
// Ignore individual path failures
|
|
}
|
|
}
|
|
} catch (error) {
|
|
console.warn('Failed to find Quartz content URLs:', error)
|
|
}
|
|
|
|
// Remove duplicates and limit results
|
|
const uniqueUrls = [...new Set(urls)]
|
|
return uniqueUrls.slice(0, 50) // Limit to 50 pages to avoid overwhelming
|
|
}
|
|
|
|
/**
|
|
* Extract content URLs from a page's HTML content
|
|
*/
|
|
private extractContentUrlsFromPage(content: string, baseUrl: string): string[] {
|
|
const urls: string[] = []
|
|
|
|
try {
|
|
// Look for navigation links
|
|
const navLinks = content.match(/<nav[^>]*>[\s\S]*?<\/nav>/gi)
|
|
if (navLinks) {
|
|
navLinks.forEach(nav => {
|
|
const links = nav.match(/<a[^>]+href=["']([^"']+)["'][^>]*>/gi)
|
|
if (links) {
|
|
links.forEach(link => {
|
|
const urlMatch = link.match(/href=["']([^"']+)["']/i)
|
|
if (urlMatch) {
|
|
const url = urlMatch[1]
|
|
if (url.startsWith('/') && !url.startsWith('//')) {
|
|
urls.push(`${baseUrl}${url}`)
|
|
} else if (url.startsWith(baseUrl)) {
|
|
urls.push(url)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
})
|
|
}
|
|
|
|
// Look for any internal links
|
|
const allLinks = content.match(/<a[^>]+href=["']([^"']+)["'][^>]*>/gi)
|
|
if (allLinks) {
|
|
allLinks.forEach(link => {
|
|
const urlMatch = link.match(/href=["']([^"']+)["']/i)
|
|
if (urlMatch) {
|
|
const url = urlMatch[1]
|
|
if (url.startsWith('/') && !url.startsWith('//') && !url.includes('#')) {
|
|
urls.push(`${baseUrl}${url}`)
|
|
} else if (url.startsWith(baseUrl) && !url.includes('#')) {
|
|
urls.push(url)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
} catch (error) {
|
|
console.warn('Error extracting URLs from page:', error)
|
|
}
|
|
|
|
return urls
|
|
}
|
|
|
|
/**
|
|
* Parse Quartz markdown content
|
|
*/
|
|
private parseQuartzMarkdown(content: string, url: string, baseUrl: string): ObsidianObsNote {
|
|
// Extract title from URL or content
|
|
const title = this.extractTitleFromUrl(url) || this.extractTitleFromContent(content)
|
|
|
|
// Parse frontmatter
|
|
const frontmatter = this.parseFrontmatter(content)
|
|
|
|
// Extract tags
|
|
const tags = this.extractTags(content, frontmatter)
|
|
|
|
// Extract links
|
|
const links = this.extractLinks(content, baseUrl)
|
|
|
|
// Clean content (remove frontmatter and convert HTML to markdown-like text)
|
|
let cleanContent = this.removeFrontmatter(content)
|
|
|
|
// If content is HTML, convert it to a more readable format
|
|
if (cleanContent.includes('<html') || cleanContent.includes('<body')) {
|
|
cleanContent = this.convertHtmlToMarkdown(cleanContent)
|
|
}
|
|
|
|
return {
|
|
id: this.generateId(url),
|
|
title,
|
|
content: cleanContent,
|
|
filePath: url,
|
|
tags,
|
|
created: new Date(),
|
|
modified: new Date(),
|
|
links,
|
|
backlinks: [],
|
|
frontmatter
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Convert HTML content to markdown-like text
|
|
*/
|
|
private convertHtmlToMarkdown(html: string): string {
|
|
let text = html
|
|
|
|
// Remove script, style, and other non-content tags
|
|
text = text.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
text = text.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
text = text.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, '')
|
|
text = text.replace(/<header[^>]*>[\s\S]*?<\/header>/gi, '')
|
|
text = text.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, '')
|
|
text = text.replace(/<aside[^>]*>[\s\S]*?<\/aside>/gi, '')
|
|
|
|
// Try to extract main content area
|
|
const mainMatch = text.match(/<main[^>]*>(.*?)<\/main>/is)
|
|
if (mainMatch) {
|
|
text = mainMatch[1]
|
|
} else {
|
|
// Try to find article or content div
|
|
const articleMatch = text.match(/<article[^>]*>(.*?)<\/article>/is)
|
|
if (articleMatch) {
|
|
text = articleMatch[1]
|
|
} else {
|
|
// Try multiple content div patterns
|
|
const contentPatterns = [
|
|
/<div[^>]*class="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is,
|
|
/<div[^>]*class="[^"]*main[^"]*"[^>]*>(.*?)<\/div>/is,
|
|
/<div[^>]*class="[^"]*post[^"]*"[^>]*>(.*?)<\/div>/is,
|
|
/<div[^>]*class="[^"]*article[^"]*"[^>]*>(.*?)<\/div>/is,
|
|
/<div[^>]*id="[^"]*content[^"]*"[^>]*>(.*?)<\/div>/is,
|
|
/<div[^>]*id="[^"]*main[^"]*"[^>]*>(.*?)<\/div>/is
|
|
]
|
|
|
|
for (const pattern of contentPatterns) {
|
|
const match = text.match(pattern)
|
|
if (match) {
|
|
text = match[1]
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert headers
|
|
text = text.replace(/<h1[^>]*>(.*?)<\/h1>/gi, '# $1\n\n')
|
|
text = text.replace(/<h2[^>]*>(.*?)<\/h2>/gi, '## $1\n\n')
|
|
text = text.replace(/<h3[^>]*>(.*?)<\/h3>/gi, '### $1\n\n')
|
|
text = text.replace(/<h4[^>]*>(.*?)<\/h4>/gi, '#### $1\n\n')
|
|
text = text.replace(/<h5[^>]*>(.*?)<\/h5>/gi, '##### $1\n\n')
|
|
text = text.replace(/<h6[^>]*>(.*?)<\/h6>/gi, '###### $1\n\n')
|
|
|
|
// Convert paragraphs
|
|
text = text.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
|
|
|
|
// Convert links
|
|
text = text.replace(/<a[^>]+href=["']([^"']+)["'][^>]*>(.*?)<\/a>/gi, '[$2]($1)')
|
|
|
|
// Convert lists
|
|
text = text.replace(/<ul[^>]*>/gi, '')
|
|
text = text.replace(/<\/ul>/gi, '\n')
|
|
text = text.replace(/<ol[^>]*>/gi, '')
|
|
text = text.replace(/<\/ol>/gi, '\n')
|
|
text = text.replace(/<li[^>]*>(.*?)<\/li>/gi, '- $1\n')
|
|
|
|
// Convert emphasis
|
|
text = text.replace(/<strong[^>]*>(.*?)<\/strong>/gi, '**$1**')
|
|
text = text.replace(/<b[^>]*>(.*?)<\/b>/gi, '**$1**')
|
|
text = text.replace(/<em[^>]*>(.*?)<\/em>/gi, '*$1*')
|
|
text = text.replace(/<i[^>]*>(.*?)<\/i>/gi, '*$1*')
|
|
|
|
// Convert code
|
|
text = text.replace(/<code[^>]*>(.*?)<\/code>/gi, '`$1`')
|
|
text = text.replace(/<pre[^>]*>(.*?)<\/pre>/gi, '```\n$1\n```\n')
|
|
|
|
// Convert blockquotes
|
|
text = text.replace(/<blockquote[^>]*>(.*?)<\/blockquote>/gi, '> $1\n\n')
|
|
|
|
// Convert line breaks
|
|
text = text.replace(/<br[^>]*>/gi, '\n')
|
|
|
|
// Remove remaining HTML tags
|
|
text = text.replace(/<[^>]+>/g, '')
|
|
|
|
// Decode HTML entities
|
|
text = text.replace(/&/g, '&')
|
|
text = text.replace(/</g, '<')
|
|
text = text.replace(/>/g, '>')
|
|
text = text.replace(/"/g, '"')
|
|
text = text.replace(/'/g, "'")
|
|
text = text.replace(/ /g, ' ')
|
|
|
|
// Clean up whitespace
|
|
text = text.replace(/\n\s*\n\s*\n/g, '\n\n')
|
|
text = text.replace(/^\s+|\s+$/g, '') // Trim start and end
|
|
text = text.trim()
|
|
|
|
// If we still don't have much content, try to extract any text from the original HTML
|
|
if (text.length < 50) {
|
|
let fallbackText = html
|
|
|
|
// Remove script, style, and other non-content tags
|
|
fallbackText = fallbackText.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
|
fallbackText = fallbackText.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '')
|
|
fallbackText = fallbackText.replace(/<nav[^>]*>[\s\S]*?<\/nav>/gi, '')
|
|
fallbackText = fallbackText.replace(/<header[^>]*>[\s\S]*?<\/header>/gi, '')
|
|
fallbackText = fallbackText.replace(/<footer[^>]*>[\s\S]*?<\/footer>/gi, '')
|
|
fallbackText = fallbackText.replace(/<aside[^>]*>[\s\S]*?<\/aside>/gi, '')
|
|
|
|
// Convert basic HTML elements
|
|
fallbackText = fallbackText.replace(/<h[1-6][^>]*>(.*?)<\/h[1-6]>/gi, '# $1\n\n')
|
|
fallbackText = fallbackText.replace(/<p[^>]*>(.*?)<\/p>/gi, '$1\n\n')
|
|
fallbackText = fallbackText.replace(/<div[^>]*>(.*?)<\/div>/gi, '$1\n')
|
|
fallbackText = fallbackText.replace(/<span[^>]*>(.*?)<\/span>/gi, '$1')
|
|
fallbackText = fallbackText.replace(/<[^>]+>/g, '')
|
|
fallbackText = fallbackText.replace(/&/g, '&')
|
|
fallbackText = fallbackText.replace(/</g, '<')
|
|
fallbackText = fallbackText.replace(/>/g, '>')
|
|
fallbackText = fallbackText.replace(/"/g, '"')
|
|
fallbackText = fallbackText.replace(/'/g, "'")
|
|
fallbackText = fallbackText.replace(/ /g, ' ')
|
|
fallbackText = fallbackText.replace(/\n\s*\n\s*\n/g, '\n\n')
|
|
fallbackText = fallbackText.trim()
|
|
|
|
if (fallbackText.length > text.length) {
|
|
text = fallbackText
|
|
}
|
|
}
|
|
|
|
// Final fallback: if we still don't have content, try to extract any text from the body
|
|
if (text.length < 20) {
|
|
const bodyMatch = html.match(/<body[^>]*>(.*?)<\/body>/is)
|
|
if (bodyMatch) {
|
|
let bodyText = bodyMatch[1]
|
|
// Remove all HTML tags
|
|
bodyText = bodyText.replace(/<[^>]+>/g, '')
|
|
// Decode HTML entities
|
|
bodyText = bodyText.replace(/&/g, '&')
|
|
bodyText = bodyText.replace(/</g, '<')
|
|
bodyText = bodyText.replace(/>/g, '>')
|
|
bodyText = bodyText.replace(/"/g, '"')
|
|
bodyText = bodyText.replace(/'/g, "'")
|
|
bodyText = bodyText.replace(/ /g, ' ')
|
|
bodyText = bodyText.replace(/\s+/g, ' ').trim()
|
|
|
|
if (bodyText.length > text.length) {
|
|
text = bodyText
|
|
}
|
|
}
|
|
}
|
|
|
|
return text
|
|
}
|
|
|
|
/**
|
|
* Extract title from URL
|
|
*/
|
|
private extractTitleFromUrl(url: string): string {
|
|
try {
|
|
const urlObj = new URL(url)
|
|
const path = urlObj.pathname
|
|
const segments = path.split('/').filter(segment => segment)
|
|
const lastSegment = segments[segments.length - 1] || 'index'
|
|
|
|
let title = lastSegment
|
|
.replace(/\.(html|md)$/, '')
|
|
.replace(/[-_]/g, ' ')
|
|
.replace(/\b\w/g, l => l.toUpperCase())
|
|
|
|
// If title is just "index" or empty, try to use the domain name
|
|
if (title === 'Index' || title === '') {
|
|
title = urlObj.hostname.replace('www.', '').replace('.com', '').replace('.xyz', '')
|
|
}
|
|
|
|
return title
|
|
} catch (error) {
|
|
// Fallback if URL parsing fails
|
|
return url.split('/').pop() || 'Untitled'
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract title from content
|
|
*/
|
|
private extractTitleFromContent(content: string): string {
|
|
// Look for title tag first
|
|
const titleMatch = content.match(/<title[^>]*>(.*?)<\/title>/i)
|
|
if (titleMatch) {
|
|
let title = titleMatch[1].replace(/<[^>]*>/g, '').trim()
|
|
// Clean up common title suffixes
|
|
title = title.replace(/\s*-\s*.*$/, '') // Remove " - Site Name" suffix
|
|
title = title.replace(/\s*\|\s*.*$/, '') // Remove " | Site Name" suffix
|
|
if (title && title !== 'Untitled') {
|
|
return title
|
|
}
|
|
}
|
|
|
|
// Look for h1 tag
|
|
const h1Match = content.match(/<h1[^>]*>(.*?)<\/h1>/i)
|
|
if (h1Match) {
|
|
return h1Match[1].replace(/<[^>]*>/g, '').trim()
|
|
}
|
|
|
|
// Look for first heading
|
|
const headingMatch = content.match(/^#\s+(.+)$/m)
|
|
if (headingMatch) {
|
|
return headingMatch[1].trim()
|
|
}
|
|
|
|
return 'Untitled'
|
|
}
|
|
|
|
/**
|
|
* Extract vault name from URL
|
|
*/
|
|
private extractVaultNameFromUrl(url: string): string {
|
|
try {
|
|
const urlObj = new URL(url)
|
|
return urlObj.hostname.replace('www.', '')
|
|
} catch (error) {
|
|
return 'Quartz Vault'
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate ID from URL
|
|
*/
|
|
private generateId(url: string): string {
|
|
return url.replace(/[^a-zA-Z0-9]/g, '_')
|
|
}
|
|
|
|
/**
|
|
* Parse frontmatter from content
|
|
*/
|
|
private parseFrontmatter(content: string): Record<string, any> {
|
|
const frontmatterMatch = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n/)
|
|
if (frontmatterMatch) {
|
|
return this.parseSimpleYaml(frontmatterMatch[1])
|
|
}
|
|
return {}
|
|
}
|
|
|
|
/**
|
|
* Remove frontmatter from content
|
|
*/
|
|
private removeFrontmatter(content: string): string {
|
|
return content.replace(/^---\s*\n[\s\S]*?\n---\s*\n/, '')
|
|
}
|
|
|
|
/**
|
|
* Extract links from content with base URL
|
|
*/
|
|
private extractLinks(content: string, baseUrl: string): string[] {
|
|
const links: string[] = []
|
|
|
|
// Extract markdown links [text](url)
|
|
const markdownLinks = content.match(/\[([^\]]+)\]\(([^)]+)\)/g)
|
|
if (markdownLinks) {
|
|
markdownLinks.forEach(link => {
|
|
const urlMatch = link.match(/\[([^\]]+)\]\(([^)]+)\)/)
|
|
if (urlMatch) {
|
|
const url = urlMatch[2]
|
|
if (url.startsWith('http') || url.startsWith('/')) {
|
|
links.push(url.startsWith('/') ? `${baseUrl}${url}` : url)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
// Extract HTML links <a href="url">
|
|
const htmlLinks = content.match(/<a[^>]+href=["']([^"']+)["'][^>]*>/gi)
|
|
if (htmlLinks) {
|
|
htmlLinks.forEach(link => {
|
|
const urlMatch = link.match(/href=["']([^"']+)["']/i)
|
|
if (urlMatch) {
|
|
const url = urlMatch[1]
|
|
if (url.startsWith('http') || url.startsWith('/')) {
|
|
links.push(url.startsWith('/') ? `${baseUrl}${url}` : url)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
return links
|
|
}
|
|
}
|