rfiles-online/files/tasks.py

79 lines
2.6 KiB
Python

"""
Celery tasks for file management.
"""
from celery import shared_task
from django.utils import timezone
@shared_task
def cleanup_expired_shares():
"""Deactivate expired share links."""
from .models import PublicShare
now = timezone.now()
expired_shares = PublicShare.objects.filter(
is_active=True,
expires_at__isnull=False,
expires_at__lt=now
)
count = expired_shares.update(is_active=False)
return f"Deactivated {count} expired shares"
@shared_task
def cleanup_old_access_logs(days=90):
"""Delete old access logs to save space."""
from .models import FileAccessLog
cutoff = timezone.now() - timezone.timedelta(days=days)
deleted, _ = FileAccessLog.objects.filter(accessed_at__lt=cutoff).delete()
return f"Deleted {deleted} access logs older than {days} days"
@shared_task
def process_media_file(media_file_id):
"""Process an uploaded media file (OCR, metadata extraction)."""
import os
import requests
from .models import MediaFile
try:
media_file = MediaFile.objects.get(id=media_file_id)
except MediaFile.DoesNotExist:
return f"MediaFile {media_file_id} not found"
if media_file.is_processed:
return f"MediaFile {media_file_id} already processed"
try:
if media_file.is_pdf:
ocr_url = os.environ.get('OCR_SERVICE_URL', 'http://pdf-ocr:8000')
with media_file.file.open('rb') as f:
response = requests.post(
f'{ocr_url}/ocr/sync',
files={'file': (media_file.original_filename, f, 'application/pdf')},
timeout=300
)
if response.status_code == 200:
result = response.json()
media_file.extracted_text = result.get('text', '')
media_file.is_processed = True
media_file.save(update_fields=['extracted_text', 'is_processed'])
return f"OCR completed for {media_file_id}: {len(media_file.extracted_text)} chars"
else:
media_file.processing_error = f"OCR failed: {response.status_code}"
media_file.save(update_fields=['processing_error'])
return f"OCR failed for {media_file_id}: {response.status_code}"
media_file.is_processed = True
media_file.save(update_fields=['is_processed'])
return f"Marked {media_file_id} as processed (no processing needed)"
except Exception as e:
media_file.processing_error = str(e)
media_file.save(update_fields=['processing_error'])
return f"Error processing {media_file_id}: {e}"