Make ingress async to avoid Cloudflare timeout

The LLM analysis step was taking too long and causing 524 errors.
Now returns immediately and processes in background.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jeff Emmett 2026-01-24 10:46:09 +01:00
parent 4ebd90cc64
commit dcd576944c
1 changed files with 21 additions and 15 deletions

View File

@ -188,13 +188,21 @@ async def chat_suggestions(q: str = ""):
# --- Ingress Endpoints --- # --- Ingress Endpoints ---
async def process_ingress_background(url: str):
"""Background task to process ingress."""
try:
await ingress_pipeline.process(url)
except Exception as e:
print(f"Ingress error for {url}: {e}")
@app.post("/ingress", response_model=IngressResponse) @app.post("/ingress", response_model=IngressResponse)
async def ingress(request: IngressRequest, background_tasks: BackgroundTasks): async def ingress(request: IngressRequest, background_tasks: BackgroundTasks):
""" """
Process an external article URL through the ingress pipeline. Process an external article URL through the ingress pipeline.
This scrapes the article, analyzes it for wiki relevance, Returns immediately and processes in the background.
finds matching existing articles, and generates draft articles. Check /review endpoint for results.
""" """
if not ingress_pipeline: if not ingress_pipeline:
raise HTTPException( raise HTTPException(
@ -202,20 +210,18 @@ async def ingress(request: IngressRequest, background_tasks: BackgroundTasks):
detail="Ingress pipeline not initialized. Run indexing first.", detail="Ingress pipeline not initialized. Run indexing first.",
) )
try: # Process in background to avoid Cloudflare timeout
result = await ingress_pipeline.process(str(request.url)) background_tasks.add_task(process_ingress_background, str(request.url))
return IngressResponse( return IngressResponse(
status="success", status="processing",
message="Article processed successfully", message="Article submitted for processing. Check the Review tab for results.",
scraped_title=result.scraped.title, scraped_title=None,
topics_found=len(result.analysis.get("main_topics", [])), topics_found=0,
wiki_matches=len(result.wiki_matches), wiki_matches=0,
drafts_generated=len(result.draft_articles), drafts_generated=0,
queue_file=result.timestamp, queue_file=None,
) )
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# --- Review Queue Endpoints --- # --- Review Queue Endpoints ---