Merge branch 'dev'
CI/CD / deploy (push) Successful in 6m3s
Details
CI/CD / deploy (push) Successful in 6m3s
Details
This commit is contained in:
commit
9a857c7bc2
15
Dockerfile
15
Dockerfile
|
|
@ -26,16 +26,27 @@ RUN apt-get update && apt-get install -y --no-install-recommends curl xz-utils c
|
||||||
&& rm -rf /tmp/typst* \
|
&& rm -rf /tmp/typst* \
|
||||||
&& chmod +x /usr/local/bin/typst
|
&& chmod +x /usr/local/bin/typst
|
||||||
|
|
||||||
|
# MarkItDown venv stage — Python + pip install markitdown
|
||||||
|
FROM debian:bookworm-slim AS markitdown
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends python3 python3-venv \
|
||||||
|
&& python3 -m venv /opt/markitdown \
|
||||||
|
&& /opt/markitdown/bin/pip install --no-cache-dir markitdown \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Production stage
|
# Production stage
|
||||||
FROM oven/bun:1-slim AS production
|
FROM oven/bun:1-slim AS production
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install CA certificates for outbound HTTPS (link-preview, etc.)
|
# Install CA certificates + python3 runtime (for markitdown)
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates && rm -rf /var/lib/apt/lists/*
|
RUN apt-get update && apt-get install -y --no-install-recommends ca-certificates python3 && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
# Install Typst binary (for rPubs PDF generation)
|
# Install Typst binary (for rPubs PDF generation)
|
||||||
COPY --from=typst /usr/local/bin/typst /usr/local/bin/typst
|
COPY --from=typst /usr/local/bin/typst /usr/local/bin/typst
|
||||||
|
|
||||||
|
# Install MarkItDown venv (for office document conversion)
|
||||||
|
COPY --from=markitdown /opt/markitdown /opt/markitdown
|
||||||
|
ENV PATH="/opt/markitdown/bin:$PATH"
|
||||||
|
|
||||||
# Copy built assets and server
|
# Copy built assets and server
|
||||||
COPY --from=build /app/dist ./dist
|
COPY --from=build /app/dist ./dist
|
||||||
COPY --from=build /app/server ./server
|
COPY --from=build /app/server ./server
|
||||||
|
|
|
||||||
|
|
@ -13,15 +13,16 @@ import TurndownService from 'turndown';
|
||||||
import { markdownToTiptap, extractPlainTextFromTiptap } from './markdown-tiptap';
|
import { markdownToTiptap, extractPlainTextFromTiptap } from './markdown-tiptap';
|
||||||
import { hashContent } from './index';
|
import { hashContent } from './index';
|
||||||
import type { ConvertedNote } from './index';
|
import type { ConvertedNote } from './index';
|
||||||
|
import { isMarkitdownFormat, convertWithMarkitdown } from './markitdown';
|
||||||
|
|
||||||
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
|
const turndown = new TurndownService({ headingStyle: 'atx', codeBlockStyle: 'fenced' });
|
||||||
|
|
||||||
/** Dispatch file import by extension / MIME type. */
|
/** Dispatch file import by extension / MIME type. */
|
||||||
export function importFile(
|
export async function importFile(
|
||||||
filename: string,
|
filename: string,
|
||||||
data: Uint8Array,
|
data: Uint8Array,
|
||||||
mimeType?: string,
|
mimeType?: string,
|
||||||
): ConvertedNote {
|
): Promise<ConvertedNote> {
|
||||||
const ext = filename.substring(filename.lastIndexOf('.')).toLowerCase();
|
const ext = filename.substring(filename.lastIndexOf('.')).toLowerCase();
|
||||||
const textContent = () => new TextDecoder().decode(data);
|
const textContent = () => new TextDecoder().decode(data);
|
||||||
|
|
||||||
|
|
@ -37,6 +38,9 @@ export function importFile(
|
||||||
if (['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp'].includes(ext)) {
|
if (['.jpg', '.jpeg', '.png', '.gif', '.webp', '.svg', '.bmp'].includes(ext)) {
|
||||||
return importImageFile(filename, data, mimeType || guessMime(ext));
|
return importImageFile(filename, data, mimeType || guessMime(ext));
|
||||||
}
|
}
|
||||||
|
if (isMarkitdownFormat(filename)) {
|
||||||
|
return importOfficeFile(filename, data, mimeType);
|
||||||
|
}
|
||||||
|
|
||||||
// Default: treat as text
|
// Default: treat as text
|
||||||
try {
|
try {
|
||||||
|
|
@ -47,6 +51,19 @@ export function importFile(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Import an office file (PDF/DOCX/PPTX/XLSX) via markitdown conversion. */
|
||||||
|
async function importOfficeFile(
|
||||||
|
filename: string,
|
||||||
|
data: Uint8Array,
|
||||||
|
mimeType?: string,
|
||||||
|
): Promise<ConvertedNote> {
|
||||||
|
const markdown = await convertWithMarkitdown(filename, data);
|
||||||
|
const note = importMarkdownFile(filename, markdown);
|
||||||
|
// Keep original file as attachment alongside the converted content
|
||||||
|
note.attachments = [{ filename, data, mimeType: mimeType || 'application/octet-stream' }];
|
||||||
|
return note;
|
||||||
|
}
|
||||||
|
|
||||||
/** Import a markdown file. */
|
/** Import a markdown file. */
|
||||||
export function importMarkdownFile(filename: string, content: string): ConvertedNote {
|
export function importMarkdownFile(filename: string, content: string): ConvertedNote {
|
||||||
const title = titleFromFilename(filename);
|
const title = titleFromFilename(filename);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
/**
|
||||||
|
* MarkItDown converter — spawns Microsoft's markitdown CLI to convert
|
||||||
|
* office documents (PDF, DOCX, PPTX, XLSX) to Markdown.
|
||||||
|
*
|
||||||
|
* Follows the same Bun.spawn pattern as rpubs/typst-compile.ts.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { writeFile, readFile, mkdir, rm } from "node:fs/promises";
|
||||||
|
import { join } from "node:path";
|
||||||
|
import { randomUUID } from "node:crypto";
|
||||||
|
|
||||||
|
export const MARKITDOWN_EXTS = [".pdf", ".docx", ".pptx", ".xlsx"] as const;
|
||||||
|
|
||||||
|
/** Check if filename has an extension that markitdown can convert. */
|
||||||
|
export function isMarkitdownFormat(filename: string): boolean {
|
||||||
|
const ext = filename.substring(filename.lastIndexOf(".")).toLowerCase();
|
||||||
|
return (MARKITDOWN_EXTS as readonly string[]).includes(ext);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Convert an office file to Markdown via the markitdown CLI. */
|
||||||
|
export async function convertWithMarkitdown(
|
||||||
|
filename: string,
|
||||||
|
data: Uint8Array,
|
||||||
|
): Promise<string> {
|
||||||
|
const jobId = randomUUID();
|
||||||
|
const tmpDir = join("/tmp", `markitdown-${jobId}`);
|
||||||
|
await mkdir(tmpDir, { recursive: true });
|
||||||
|
|
||||||
|
const inputPath = join(tmpDir, filename);
|
||||||
|
const outputPath = join(tmpDir, "output.md");
|
||||||
|
|
||||||
|
try {
|
||||||
|
await writeFile(inputPath, data);
|
||||||
|
|
||||||
|
const proc = Bun.spawn(
|
||||||
|
["markitdown", inputPath, "-o", outputPath],
|
||||||
|
{
|
||||||
|
stdout: "pipe",
|
||||||
|
stderr: "pipe",
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
const exitCode = await proc.exited;
|
||||||
|
|
||||||
|
if (exitCode !== 0) {
|
||||||
|
const stderr = await new Response(proc.stderr).text();
|
||||||
|
throw new Error(`markitdown failed (exit ${exitCode}): ${stderr}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const markdown = await readFile(outputPath, "utf-8");
|
||||||
|
return markdown;
|
||||||
|
} finally {
|
||||||
|
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -742,7 +742,7 @@ routes.post("/api/import/files", async (c) => {
|
||||||
for (const file of files) {
|
for (const file of files) {
|
||||||
try {
|
try {
|
||||||
const data = new Uint8Array(await file.arrayBuffer());
|
const data = new Uint8Array(await file.arrayBuffer());
|
||||||
const note = importFile(file.name, data, file.type || undefined);
|
const note = await importFile(file.name, data, file.type || undefined);
|
||||||
convertedNotes.push(note);
|
convertedNotes.push(note);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
warnings.push(`Failed to import ${file.name}: ${(err as Error).message}`);
|
warnings.push(`Failed to import ${file.name}: ${(err as Error).message}`);
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ import { getRecentTasksForMI } from "../modules/rtasks/mod";
|
||||||
import { getRecentDocsForMI } from "../modules/rdocs/mod";
|
import { getRecentDocsForMI } from "../modules/rdocs/mod";
|
||||||
import { generateImage, generateVideoViaFal } from "./mi-media";
|
import { generateImage, generateVideoViaFal } from "./mi-media";
|
||||||
import { queryModuleContent } from "./mi-data-queries";
|
import { queryModuleContent } from "./mi-data-queries";
|
||||||
|
import { convertWithMarkitdown, isMarkitdownFormat } from "../modules/rdocs/converters/markitdown";
|
||||||
|
|
||||||
const mi = new Hono();
|
const mi = new Hono();
|
||||||
|
|
||||||
|
|
@ -561,6 +562,30 @@ mi.post("/execute-server-action", async (c) => {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ── POST /extract-text — convert office files to markdown via markitdown ──
|
||||||
|
|
||||||
|
mi.post("/extract-text", async (c) => {
|
||||||
|
const formData = await c.req.formData();
|
||||||
|
const file = formData.get("file");
|
||||||
|
if (!file || typeof file === "string" || !("arrayBuffer" in file)) {
|
||||||
|
return c.json({ error: "file required (FormData)" }, 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
const filename = (file as File).name || "upload";
|
||||||
|
if (!isMarkitdownFormat(filename)) {
|
||||||
|
return c.json({ error: `Unsupported format: ${filename}` }, 400);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const data = new Uint8Array(await (file as File).arrayBuffer());
|
||||||
|
const markdown = await convertWithMarkitdown(filename, data);
|
||||||
|
return c.json({ markdown, filename });
|
||||||
|
} catch (e: any) {
|
||||||
|
console.error("[mi/extract-text] Error:", e.message);
|
||||||
|
return c.json({ error: "Conversion failed: " + e.message }, 500);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ── POST /suggestions — dynamic data-driven suggestions ──
|
// ── POST /suggestions — dynamic data-driven suggestions ──
|
||||||
|
|
||||||
mi.post("/suggestions", async (c) => {
|
mi.post("/suggestions", async (c) => {
|
||||||
|
|
|
||||||
|
|
@ -4450,6 +4450,24 @@ Use real coordinates, YYYY-MM-DD dates, ISO currency codes. Ask clarifying quest
|
||||||
reader.readAsDataURL(file);
|
reader.readAsDataURL(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function handleOfficeFile(file) {
|
||||||
|
try {
|
||||||
|
const form = new FormData();
|
||||||
|
form.append("file", file);
|
||||||
|
const res = await fetch("/api/mi/extract-text", { method: "POST", body: form });
|
||||||
|
const data = await res.json();
|
||||||
|
if (data.error) {
|
||||||
|
console.error("[canvas] office extract failed:", data.error);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (data.markdown) {
|
||||||
|
startTriage(data.markdown, "drop");
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
console.error("[canvas] office file conversion failed:", err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function handleUrl(url) {
|
function handleUrl(url) {
|
||||||
if (IMAGE_EXT_RE.test(url)) {
|
if (IMAGE_EXT_RE.test(url)) {
|
||||||
window.__canvasApi.newShape("folk-image", { src: url });
|
window.__canvasApi.newShape("folk-image", { src: url });
|
||||||
|
|
@ -4481,7 +4499,19 @@ Use real coordinates, YYYY-MM-DD dates, ISO currency codes. Ask clarifying quest
|
||||||
dragEnterCount = 0;
|
dragEnterCount = 0;
|
||||||
overlay.classList.remove("active");
|
overlay.classList.remove("active");
|
||||||
|
|
||||||
// 1. Check for image files
|
// 1. Check for office files (PDF, DOCX, PPTX, XLSX) → extract text → triage
|
||||||
|
const OFFICE_EXTS = [".pdf", ".docx", ".pptx", ".xlsx"];
|
||||||
|
const officeFile = Array.from(e.dataTransfer?.files || []).find(f => {
|
||||||
|
const ext = f.name.substring(f.name.lastIndexOf(".")).toLowerCase();
|
||||||
|
return OFFICE_EXTS.includes(ext);
|
||||||
|
});
|
||||||
|
if (officeFile) {
|
||||||
|
e.preventDefault();
|
||||||
|
handleOfficeFile(officeFile);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Check for image files
|
||||||
const imageFile = Array.from(e.dataTransfer?.files || []).find(f => f.type.startsWith("image/"));
|
const imageFile = Array.from(e.dataTransfer?.files || []).find(f => f.type.startsWith("image/"));
|
||||||
if (imageFile) {
|
if (imageFile) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
@ -4489,7 +4519,7 @@ Use real coordinates, YYYY-MM-DD dates, ISO currency codes. Ask clarifying quest
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2. Check for text/URL
|
// 3. Check for text/URL
|
||||||
const text = (e.dataTransfer?.getData("text/plain") || e.dataTransfer?.getData("text/uri-list") || "").trim();
|
const text = (e.dataTransfer?.getData("text/plain") || e.dataTransfer?.getData("text/uri-list") || "").trim();
|
||||||
if (text) {
|
if (text) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue