79 lines
2.6 KiB
Python
79 lines
2.6 KiB
Python
"""
|
|
Celery tasks for file management.
|
|
"""
|
|
|
|
from celery import shared_task
|
|
from django.utils import timezone
|
|
|
|
|
|
@shared_task
|
|
def cleanup_expired_shares():
|
|
"""Deactivate expired share links."""
|
|
from .models import PublicShare
|
|
|
|
now = timezone.now()
|
|
expired_shares = PublicShare.objects.filter(
|
|
is_active=True,
|
|
expires_at__isnull=False,
|
|
expires_at__lt=now
|
|
)
|
|
count = expired_shares.update(is_active=False)
|
|
return f"Deactivated {count} expired shares"
|
|
|
|
|
|
@shared_task
|
|
def cleanup_old_access_logs(days=90):
|
|
"""Delete old access logs to save space."""
|
|
from .models import FileAccessLog
|
|
|
|
cutoff = timezone.now() - timezone.timedelta(days=days)
|
|
deleted, _ = FileAccessLog.objects.filter(accessed_at__lt=cutoff).delete()
|
|
return f"Deleted {deleted} access logs older than {days} days"
|
|
|
|
|
|
@shared_task
|
|
def process_media_file(media_file_id):
|
|
"""Process an uploaded media file (OCR, metadata extraction)."""
|
|
import os
|
|
import requests
|
|
from .models import MediaFile
|
|
|
|
try:
|
|
media_file = MediaFile.objects.get(id=media_file_id)
|
|
except MediaFile.DoesNotExist:
|
|
return f"MediaFile {media_file_id} not found"
|
|
|
|
if media_file.is_processed:
|
|
return f"MediaFile {media_file_id} already processed"
|
|
|
|
try:
|
|
if media_file.is_pdf:
|
|
ocr_url = os.environ.get('OCR_SERVICE_URL', 'http://pdf-ocr:8000')
|
|
|
|
with media_file.file.open('rb') as f:
|
|
response = requests.post(
|
|
f'{ocr_url}/ocr/sync',
|
|
files={'file': (media_file.original_filename, f, 'application/pdf')},
|
|
timeout=300
|
|
)
|
|
|
|
if response.status_code == 200:
|
|
result = response.json()
|
|
media_file.extracted_text = result.get('text', '')
|
|
media_file.is_processed = True
|
|
media_file.save(update_fields=['extracted_text', 'is_processed'])
|
|
return f"OCR completed for {media_file_id}: {len(media_file.extracted_text)} chars"
|
|
else:
|
|
media_file.processing_error = f"OCR failed: {response.status_code}"
|
|
media_file.save(update_fields=['processing_error'])
|
|
return f"OCR failed for {media_file_id}: {response.status_code}"
|
|
|
|
media_file.is_processed = True
|
|
media_file.save(update_fields=['is_processed'])
|
|
return f"Marked {media_file_id} as processed (no processing needed)"
|
|
|
|
except Exception as e:
|
|
media_file.processing_error = str(e)
|
|
media_file.save(update_fields=['processing_error'])
|
|
return f"Error processing {media_file_id}: {e}"
|