feat: 增加backend
This commit is contained in:
parent
47e78c35bb
commit
76b0351f8f
|
|
@ -0,0 +1,24 @@
|
||||||
|
FROM python:3.11-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
build-essential \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Copy requirements first to leverage Docker cache
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy the rest of the application
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Create storage directories
|
||||||
|
RUN mkdir -p storage/uploads storage/processed
|
||||||
|
|
||||||
|
# Expose the port the app runs on
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
# Command to run the application
|
||||||
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||||
|
|
@ -0,0 +1,103 @@
|
||||||
|
# Legal Document Masker API
|
||||||
|
|
||||||
|
This is the backend API for the Legal Document Masking system. It provides endpoints for file upload, processing status tracking, and file download.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
- Python 3.8+
|
||||||
|
- Redis (for Celery)
|
||||||
|
|
||||||
|
## File Storage
|
||||||
|
|
||||||
|
Files are stored in the following structure:
|
||||||
|
```
|
||||||
|
backend/
|
||||||
|
├── storage/
|
||||||
|
│ ├── uploads/ # Original uploaded files
|
||||||
|
│ └── processed/ # Masked/processed files
|
||||||
|
```
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### Option 1: Local Development
|
||||||
|
|
||||||
|
1. Create a virtual environment:
|
||||||
|
```bash
|
||||||
|
python -m venv venv
|
||||||
|
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Install dependencies:
|
||||||
|
```bash
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Set up environment variables:
|
||||||
|
Create a `.env` file in the backend directory with the following variables:
|
||||||
|
```env
|
||||||
|
SECRET_KEY=your-secret-key-here
|
||||||
|
```
|
||||||
|
|
||||||
|
The database (SQLite) will be automatically created when you first run the application.
|
||||||
|
|
||||||
|
4. Start Redis (required for Celery):
|
||||||
|
```bash
|
||||||
|
redis-server
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Start Celery worker:
|
||||||
|
```bash
|
||||||
|
celery -A app.services.file_service worker --loglevel=info
|
||||||
|
```
|
||||||
|
|
||||||
|
6. Start the FastAPI server:
|
||||||
|
```bash
|
||||||
|
uvicorn app.main:app --reload
|
||||||
|
```
|
||||||
|
|
||||||
|
### Option 2: Docker Deployment
|
||||||
|
|
||||||
|
1. Build and start the services:
|
||||||
|
```bash
|
||||||
|
docker-compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
This will start:
|
||||||
|
- FastAPI server on port 8000
|
||||||
|
- Celery worker for background processing
|
||||||
|
- Redis for task queue
|
||||||
|
|
||||||
|
## API Documentation
|
||||||
|
|
||||||
|
Once the server is running, you can access:
|
||||||
|
- Swagger UI: `http://localhost:8000/docs`
|
||||||
|
- ReDoc: `http://localhost:8000/redoc`
|
||||||
|
|
||||||
|
## API Endpoints
|
||||||
|
|
||||||
|
- `POST /api/v1/files/upload` - Upload a new file
|
||||||
|
- `GET /api/v1/files` - List all files
|
||||||
|
- `GET /api/v1/files/{file_id}` - Get file details
|
||||||
|
- `GET /api/v1/files/{file_id}/download` - Download processed file
|
||||||
|
- `WS /api/v1/files/ws/status/{file_id}` - WebSocket for real-time status updates
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### Running Tests
|
||||||
|
```bash
|
||||||
|
pytest
|
||||||
|
```
|
||||||
|
|
||||||
|
### Code Style
|
||||||
|
The project uses Black for code formatting:
|
||||||
|
```bash
|
||||||
|
black .
|
||||||
|
```
|
||||||
|
|
||||||
|
### Docker Commands
|
||||||
|
|
||||||
|
- Start services: `docker-compose up`
|
||||||
|
- Start in background: `docker-compose up -d`
|
||||||
|
- Stop services: `docker-compose down`
|
||||||
|
- View logs: `docker-compose logs -f`
|
||||||
|
- Rebuild: `docker-compose up --build`
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, WebSocket, Response
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from typing import List
|
||||||
|
import os
|
||||||
|
from ...core.config import settings
|
||||||
|
from ...core.database import get_db
|
||||||
|
from ...models.file import File as FileModel, FileStatus
|
||||||
|
from ...services.file_service import process_file
|
||||||
|
from ...schemas.file import FileResponse as FileResponseSchema, FileList
|
||||||
|
import asyncio
|
||||||
|
from fastapi import WebSocketDisconnect
|
||||||
|
|
||||||
|
router = APIRouter()
|
||||||
|
|
||||||
|
@router.post("/upload", response_model=FileResponseSchema)
|
||||||
|
async def upload_file(
|
||||||
|
file: UploadFile = File(...),
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
if not file.filename:
|
||||||
|
raise HTTPException(status_code=400, detail="No file provided")
|
||||||
|
|
||||||
|
if not any(file.filename.lower().endswith(ext) for ext in settings.ALLOWED_EXTENSIONS):
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=400,
|
||||||
|
detail=f"File type not allowed. Allowed types: {', '.join(settings.ALLOWED_EXTENSIONS)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Save file
|
||||||
|
file_path = settings.UPLOAD_FOLDER / file.filename
|
||||||
|
with open(file_path, "wb") as buffer:
|
||||||
|
content = await file.read()
|
||||||
|
buffer.write(content)
|
||||||
|
|
||||||
|
# Create database entry
|
||||||
|
db_file = FileModel(
|
||||||
|
filename=file.filename,
|
||||||
|
original_path=str(file_path),
|
||||||
|
status=FileStatus.NOT_STARTED
|
||||||
|
)
|
||||||
|
db.add(db_file)
|
||||||
|
db.commit()
|
||||||
|
db.refresh(db_file)
|
||||||
|
|
||||||
|
# Start processing
|
||||||
|
process_file.delay(str(db_file.id))
|
||||||
|
|
||||||
|
return db_file
|
||||||
|
|
||||||
|
@router.get("/files", response_model=List[FileResponseSchema])
|
||||||
|
def list_files(
|
||||||
|
skip: int = 0,
|
||||||
|
limit: int = 100,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
files = db.query(FileModel).offset(skip).limit(limit).all()
|
||||||
|
return files
|
||||||
|
|
||||||
|
@router.get("/files/{file_id}", response_model=FileResponseSchema)
|
||||||
|
def get_file(
|
||||||
|
file_id: str,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
return file
|
||||||
|
|
||||||
|
@router.get("/files/{file_id}/download")
|
||||||
|
async def download_file(
|
||||||
|
file_id: str,
|
||||||
|
db: Session = Depends(get_db)
|
||||||
|
):
|
||||||
|
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||||
|
if not file:
|
||||||
|
raise HTTPException(status_code=404, detail="File not found")
|
||||||
|
|
||||||
|
if file.status != FileStatus.SUCCESS:
|
||||||
|
raise HTTPException(status_code=400, detail="File is not ready for download")
|
||||||
|
|
||||||
|
if not os.path.exists(file.processed_path):
|
||||||
|
raise HTTPException(status_code=404, detail="Processed file not found")
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
path=file.processed_path,
|
||||||
|
filename=file.filename,
|
||||||
|
media_type="application/octet-stream"
|
||||||
|
)
|
||||||
|
|
||||||
|
@router.websocket("/ws/status/{file_id}")
|
||||||
|
async def websocket_endpoint(websocket: WebSocket, file_id: str, db: Session = Depends(get_db)):
|
||||||
|
await websocket.accept()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||||
|
if not file:
|
||||||
|
await websocket.send_json({"error": "File not found"})
|
||||||
|
break
|
||||||
|
|
||||||
|
await websocket.send_json({
|
||||||
|
"status": file.status,
|
||||||
|
"error": file.error_message
|
||||||
|
})
|
||||||
|
|
||||||
|
if file.status in [FileStatus.SUCCESS, FileStatus.FAILED]:
|
||||||
|
break
|
||||||
|
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
except WebSocketDisconnect:
|
||||||
|
pass
|
||||||
|
|
@ -0,0 +1,39 @@
|
||||||
|
from pydantic_settings import BaseSettings
|
||||||
|
from typing import Optional
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
class Settings(BaseSettings):
|
||||||
|
# API Settings
|
||||||
|
API_V1_STR: str = "/api/v1"
|
||||||
|
PROJECT_NAME: str = "Legal Document Masker API"
|
||||||
|
|
||||||
|
# Security
|
||||||
|
SECRET_KEY: str = "your-secret-key-here" # Change in production
|
||||||
|
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 # 8 days
|
||||||
|
|
||||||
|
# Database
|
||||||
|
DATABASE_URL: str = "sqlite:///./legal_doc_masker.db"
|
||||||
|
|
||||||
|
# File Storage
|
||||||
|
BASE_DIR: Path = Path(__file__).parent.parent.parent
|
||||||
|
UPLOAD_FOLDER: Path = BASE_DIR / "storage" / "uploads"
|
||||||
|
PROCESSED_FOLDER: Path = BASE_DIR / "storage" / "processed"
|
||||||
|
MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB
|
||||||
|
ALLOWED_EXTENSIONS: set = {"pdf", "docx", "doc"}
|
||||||
|
|
||||||
|
# Celery
|
||||||
|
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||||
|
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/0"
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
case_sensitive = True
|
||||||
|
env_file = ".env"
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(**kwargs)
|
||||||
|
# Create storage directories if they don't exist
|
||||||
|
self.UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||||
|
self.PROCESSED_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
settings = Settings()
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
from sqlalchemy import create_engine
|
||||||
|
from sqlalchemy.ext.declarative import declarative_base
|
||||||
|
from sqlalchemy.orm import sessionmaker
|
||||||
|
from .config import settings
|
||||||
|
|
||||||
|
# Create SQLite engine with check_same_thread=False for FastAPI
|
||||||
|
engine = create_engine(
|
||||||
|
settings.DATABASE_URL,
|
||||||
|
connect_args={"check_same_thread": False}
|
||||||
|
)
|
||||||
|
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||||
|
|
||||||
|
Base = declarative_base()
|
||||||
|
|
||||||
|
# Dependency
|
||||||
|
def get_db():
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
yield db
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
from fastapi import FastAPI
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from .core.config import settings
|
||||||
|
from .api.endpoints import files
|
||||||
|
from .core.database import engine, Base
|
||||||
|
|
||||||
|
# Create database tables
|
||||||
|
Base.metadata.create_all(bind=engine)
|
||||||
|
|
||||||
|
app = FastAPI(
|
||||||
|
title=settings.PROJECT_NAME,
|
||||||
|
openapi_url=f"{settings.API_V1_STR}/openapi.json"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Set up CORS
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"], # In production, replace with specific origins
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Include routers
|
||||||
|
app.include_router(
|
||||||
|
files.router,
|
||||||
|
prefix=f"{settings.API_V1_STR}/files",
|
||||||
|
tags=["files"]
|
||||||
|
)
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def root():
|
||||||
|
return {"message": "Welcome to Legal Document Masker API"}
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
from sqlalchemy import Column, String, DateTime, Text
|
||||||
|
from datetime import datetime
|
||||||
|
import uuid
|
||||||
|
from ..core.database import Base
|
||||||
|
|
||||||
|
class FileStatus(str):
|
||||||
|
NOT_STARTED = "not_started"
|
||||||
|
PROCESSING = "processing"
|
||||||
|
SUCCESS = "success"
|
||||||
|
FAILED = "failed"
|
||||||
|
|
||||||
|
class File(Base):
|
||||||
|
__tablename__ = "files"
|
||||||
|
|
||||||
|
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||||
|
filename = Column(String(255), nullable=False)
|
||||||
|
original_path = Column(String(255), nullable=False)
|
||||||
|
processed_path = Column(String(255))
|
||||||
|
status = Column(String(20), nullable=False, default=FileStatus.NOT_STARTED)
|
||||||
|
error_message = Column(Text)
|
||||||
|
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||||
|
updated_at = Column(DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
from pydantic import BaseModel
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID
|
||||||
|
|
||||||
|
class FileBase(BaseModel):
|
||||||
|
filename: str
|
||||||
|
status: str
|
||||||
|
error_message: Optional[str] = None
|
||||||
|
|
||||||
|
class FileResponse(FileBase):
|
||||||
|
id: UUID
|
||||||
|
created_at: datetime
|
||||||
|
updated_at: datetime
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
from_attributes = True
|
||||||
|
|
||||||
|
class FileList(BaseModel):
|
||||||
|
files: list[FileResponse]
|
||||||
|
total: int
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
from celery import Celery
|
||||||
|
from ..core.config import settings
|
||||||
|
from ..models.file import File, FileStatus
|
||||||
|
from sqlalchemy.orm import Session
|
||||||
|
from ..core.database import SessionLocal
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Add the parent directory to Python path to import the masking system
|
||||||
|
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
||||||
|
from src.main import process_document # Import your existing masking function
|
||||||
|
|
||||||
|
celery = Celery(
|
||||||
|
'file_service',
|
||||||
|
broker=settings.CELERY_BROKER_URL,
|
||||||
|
backend=settings.CELERY_RESULT_BACKEND
|
||||||
|
)
|
||||||
|
|
||||||
|
@celery.task
|
||||||
|
def process_file(file_id: str):
|
||||||
|
db = SessionLocal()
|
||||||
|
try:
|
||||||
|
file = db.query(File).filter(File.id == file_id).first()
|
||||||
|
if not file:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Update status to processing
|
||||||
|
file.status = FileStatus.PROCESSING
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Process the file using your existing masking system
|
||||||
|
output_path = process_document(file.original_path)
|
||||||
|
|
||||||
|
# Update file record with processed path
|
||||||
|
file.processed_path = output_path
|
||||||
|
file.status = FileStatus.SUCCESS
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
file.status = FileStatus.FAILED
|
||||||
|
file.error_message = str(e)
|
||||||
|
db.commit()
|
||||||
|
raise
|
||||||
|
|
||||||
|
finally:
|
||||||
|
db.close()
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
version: '3.8'
|
||||||
|
|
||||||
|
services:
|
||||||
|
api:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
volumes:
|
||||||
|
- ./storage:/app/storage
|
||||||
|
- ./legal_doc_masker.db:/app/legal_doc_masker.db
|
||||||
|
environment:
|
||||||
|
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||||
|
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
|
||||||
|
celery_worker:
|
||||||
|
build: .
|
||||||
|
command: celery -A app.services.file_service worker --loglevel=info
|
||||||
|
volumes:
|
||||||
|
- ./storage:/app/storage
|
||||||
|
- ./legal_doc_masker.db:/app/legal_doc_masker.db
|
||||||
|
environment:
|
||||||
|
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||||
|
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||||
|
depends_on:
|
||||||
|
- redis
|
||||||
|
- api
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:alpine
|
||||||
|
ports:
|
||||||
|
- "6379:6379"
|
||||||
|
|
@ -0,0 +1,31 @@
|
||||||
|
# FastAPI and server
|
||||||
|
fastapi>=0.104.0
|
||||||
|
uvicorn>=0.24.0
|
||||||
|
python-multipart>=0.0.6
|
||||||
|
websockets>=12.0
|
||||||
|
|
||||||
|
# Database
|
||||||
|
sqlalchemy>=2.0.0
|
||||||
|
alembic>=1.12.0
|
||||||
|
|
||||||
|
# Background tasks
|
||||||
|
celery>=5.3.0
|
||||||
|
redis>=5.0.0
|
||||||
|
|
||||||
|
# Security
|
||||||
|
python-jose[cryptography]>=3.3.0
|
||||||
|
passlib[bcrypt]>=1.7.4
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
|
||||||
|
# Testing
|
||||||
|
pytest>=7.4.0
|
||||||
|
httpx>=0.25.0
|
||||||
|
|
||||||
|
# Existing project dependencies
|
||||||
|
pydantic-settings>=2.0.0
|
||||||
|
watchdog==2.1.6
|
||||||
|
requests==2.28.1
|
||||||
|
python-docx>=0.8.11
|
||||||
|
PyPDF2>=3.0.0
|
||||||
|
pandas>=2.0.0
|
||||||
|
magic-pdf[full]
|
||||||
Loading…
Reference in New Issue