legal-doc-masker/backend/app/services/file_service.py

89 lines
2.8 KiB
Python

from celery import Celery
from ..core.config import settings
from ..models.file import File, FileStatus
from sqlalchemy.orm import Session
from ..core.database import SessionLocal
import sys
import os
from ..core.services.document_service import DocumentService
from pathlib import Path
from fastapi import HTTPException
celery = Celery(
'file_service',
broker=settings.CELERY_BROKER_URL,
backend=settings.CELERY_RESULT_BACKEND
)
def delete_file(file_id: str):
"""
Delete a file and its associated records.
This will:
1. Delete the database record
2. Delete the original uploaded file
3. Delete the processed markdown file (if it exists)
"""
db = SessionLocal()
try:
# Get the file record
file = db.query(File).filter(File.id == file_id).first()
if not file:
raise HTTPException(status_code=404, detail="File not found")
# Delete the original file if it exists
if file.original_path and os.path.exists(file.original_path):
os.remove(file.original_path)
# Delete the processed file if it exists
if file.processed_path and os.path.exists(file.processed_path):
os.remove(file.processed_path)
# Delete the database record
db.delete(file)
db.commit()
except Exception as e:
db.rollback()
raise HTTPException(status_code=500, detail=f"Error deleting file: {str(e)}")
finally:
db.close()
@celery.task
def process_file(file_id: str):
db = SessionLocal()
try:
file = db.query(File).filter(File.id == file_id).first()
if not file:
return
# Update status to processing
file.status = FileStatus.PROCESSING
db.commit()
try:
# Process the file using your existing masking system
process_service = DocumentService()
# Determine output path using file_id with .md extension
output_filename = f"{file_id}.md"
output_path = str(settings.PROCESSED_FOLDER / output_filename)
# Process document with both input and output paths
# This will raise an exception if processing fails
process_service.process_document(file.original_path, output_path)
# Update file record with processed path
file.processed_path = output_path
file.status = FileStatus.SUCCESS
db.commit()
except Exception as e:
file.status = FileStatus.FAILED
file.error_message = str(e)
db.commit()
# Re-raise the exception to ensure Celery marks the task as failed
raise
finally:
db.close()