""" Celery tasks for file management. """ from celery import shared_task from django.utils import timezone @shared_task def cleanup_expired_shares(): """Deactivate expired share links.""" from .models import PublicShare now = timezone.now() expired_shares = PublicShare.objects.filter( is_active=True, expires_at__isnull=False, expires_at__lt=now ) count = expired_shares.update(is_active=False) return f"Deactivated {count} expired shares" @shared_task def cleanup_old_access_logs(days=90): """Delete old access logs to save space.""" from .models import FileAccessLog cutoff = timezone.now() - timezone.timedelta(days=days) deleted, _ = FileAccessLog.objects.filter(accessed_at__lt=cutoff).delete() return f"Deleted {deleted} access logs older than {days} days" @shared_task def process_media_file(media_file_id): """Process an uploaded media file (OCR, metadata extraction).""" import os import requests from .models import MediaFile try: media_file = MediaFile.objects.get(id=media_file_id) except MediaFile.DoesNotExist: return f"MediaFile {media_file_id} not found" if media_file.is_processed: return f"MediaFile {media_file_id} already processed" try: if media_file.is_pdf: ocr_url = os.environ.get('OCR_SERVICE_URL', 'http://pdf-ocr:8000') with media_file.file.open('rb') as f: response = requests.post( f'{ocr_url}/ocr/sync', files={'file': (media_file.original_filename, f, 'application/pdf')}, timeout=300 ) if response.status_code == 200: result = response.json() media_file.extracted_text = result.get('text', '') media_file.is_processed = True media_file.save(update_fields=['extracted_text', 'is_processed']) return f"OCR completed for {media_file_id}: {len(media_file.extracted_text)} chars" else: media_file.processing_error = f"OCR failed: {response.status_code}" media_file.save(update_fields=['processing_error']) return f"OCR failed for {media_file_id}: {response.status_code}" media_file.is_processed = True media_file.save(update_fields=['is_processed']) return f"Marked {media_file_id} as processed (no processing needed)" except Exception as e: media_file.processing_error = str(e) media_file.save(update_fields=['processing_error']) return f"Error processing {media_file_id}: {e}"