feat: 增加backend
This commit is contained in:
parent
47e78c35bb
commit
76b0351f8f
|
|
@ -0,0 +1,24 @@
|
|||
FROM python:3.11-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
build-essential \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements first to leverage Docker cache
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy the rest of the application
|
||||
COPY . .
|
||||
|
||||
# Create storage directories
|
||||
RUN mkdir -p storage/uploads storage/processed
|
||||
|
||||
# Expose the port the app runs on
|
||||
EXPOSE 8000
|
||||
|
||||
# Command to run the application
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
|
||||
|
|
@ -0,0 +1,103 @@
|
|||
# Legal Document Masker API
|
||||
|
||||
This is the backend API for the Legal Document Masking system. It provides endpoints for file upload, processing status tracking, and file download.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
- Python 3.8+
|
||||
- Redis (for Celery)
|
||||
|
||||
## File Storage
|
||||
|
||||
Files are stored in the following structure:
|
||||
```
|
||||
backend/
|
||||
├── storage/
|
||||
│ ├── uploads/ # Original uploaded files
|
||||
│ └── processed/ # Masked/processed files
|
||||
```
|
||||
|
||||
## Setup
|
||||
|
||||
### Option 1: Local Development
|
||||
|
||||
1. Create a virtual environment:
|
||||
```bash
|
||||
python -m venv venv
|
||||
source venv/bin/activate # On Windows: venv\Scripts\activate
|
||||
```
|
||||
|
||||
2. Install dependencies:
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
3. Set up environment variables:
|
||||
Create a `.env` file in the backend directory with the following variables:
|
||||
```env
|
||||
SECRET_KEY=your-secret-key-here
|
||||
```
|
||||
|
||||
The database (SQLite) will be automatically created when you first run the application.
|
||||
|
||||
4. Start Redis (required for Celery):
|
||||
```bash
|
||||
redis-server
|
||||
```
|
||||
|
||||
5. Start Celery worker:
|
||||
```bash
|
||||
celery -A app.services.file_service worker --loglevel=info
|
||||
```
|
||||
|
||||
6. Start the FastAPI server:
|
||||
```bash
|
||||
uvicorn app.main:app --reload
|
||||
```
|
||||
|
||||
### Option 2: Docker Deployment
|
||||
|
||||
1. Build and start the services:
|
||||
```bash
|
||||
docker-compose up --build
|
||||
```
|
||||
|
||||
This will start:
|
||||
- FastAPI server on port 8000
|
||||
- Celery worker for background processing
|
||||
- Redis for task queue
|
||||
|
||||
## API Documentation
|
||||
|
||||
Once the server is running, you can access:
|
||||
- Swagger UI: `http://localhost:8000/docs`
|
||||
- ReDoc: `http://localhost:8000/redoc`
|
||||
|
||||
## API Endpoints
|
||||
|
||||
- `POST /api/v1/files/upload` - Upload a new file
|
||||
- `GET /api/v1/files` - List all files
|
||||
- `GET /api/v1/files/{file_id}` - Get file details
|
||||
- `GET /api/v1/files/{file_id}/download` - Download processed file
|
||||
- `WS /api/v1/files/ws/status/{file_id}` - WebSocket for real-time status updates
|
||||
|
||||
## Development
|
||||
|
||||
### Running Tests
|
||||
```bash
|
||||
pytest
|
||||
```
|
||||
|
||||
### Code Style
|
||||
The project uses Black for code formatting:
|
||||
```bash
|
||||
black .
|
||||
```
|
||||
|
||||
### Docker Commands
|
||||
|
||||
- Start services: `docker-compose up`
|
||||
- Start in background: `docker-compose up -d`
|
||||
- Stop services: `docker-compose down`
|
||||
- View logs: `docker-compose logs -f`
|
||||
- Rebuild: `docker-compose up --build`
|
||||
|
|
@ -0,0 +1,111 @@
|
|||
from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, WebSocket, Response
|
||||
from fastapi.responses import FileResponse
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import List
|
||||
import os
|
||||
from ...core.config import settings
|
||||
from ...core.database import get_db
|
||||
from ...models.file import File as FileModel, FileStatus
|
||||
from ...services.file_service import process_file
|
||||
from ...schemas.file import FileResponse as FileResponseSchema, FileList
|
||||
import asyncio
|
||||
from fastapi import WebSocketDisconnect
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
@router.post("/upload", response_model=FileResponseSchema)
|
||||
async def upload_file(
|
||||
file: UploadFile = File(...),
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
if not file.filename:
|
||||
raise HTTPException(status_code=400, detail="No file provided")
|
||||
|
||||
if not any(file.filename.lower().endswith(ext) for ext in settings.ALLOWED_EXTENSIONS):
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"File type not allowed. Allowed types: {', '.join(settings.ALLOWED_EXTENSIONS)}"
|
||||
)
|
||||
|
||||
# Save file
|
||||
file_path = settings.UPLOAD_FOLDER / file.filename
|
||||
with open(file_path, "wb") as buffer:
|
||||
content = await file.read()
|
||||
buffer.write(content)
|
||||
|
||||
# Create database entry
|
||||
db_file = FileModel(
|
||||
filename=file.filename,
|
||||
original_path=str(file_path),
|
||||
status=FileStatus.NOT_STARTED
|
||||
)
|
||||
db.add(db_file)
|
||||
db.commit()
|
||||
db.refresh(db_file)
|
||||
|
||||
# Start processing
|
||||
process_file.delay(str(db_file.id))
|
||||
|
||||
return db_file
|
||||
|
||||
@router.get("/files", response_model=List[FileResponseSchema])
|
||||
def list_files(
|
||||
skip: int = 0,
|
||||
limit: int = 100,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
files = db.query(FileModel).offset(skip).limit(limit).all()
|
||||
return files
|
||||
|
||||
@router.get("/files/{file_id}", response_model=FileResponseSchema)
|
||||
def get_file(
|
||||
file_id: str,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||
if not file:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
return file
|
||||
|
||||
@router.get("/files/{file_id}/download")
|
||||
async def download_file(
|
||||
file_id: str,
|
||||
db: Session = Depends(get_db)
|
||||
):
|
||||
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||
if not file:
|
||||
raise HTTPException(status_code=404, detail="File not found")
|
||||
|
||||
if file.status != FileStatus.SUCCESS:
|
||||
raise HTTPException(status_code=400, detail="File is not ready for download")
|
||||
|
||||
if not os.path.exists(file.processed_path):
|
||||
raise HTTPException(status_code=404, detail="Processed file not found")
|
||||
|
||||
return FileResponse(
|
||||
path=file.processed_path,
|
||||
filename=file.filename,
|
||||
media_type="application/octet-stream"
|
||||
)
|
||||
|
||||
@router.websocket("/ws/status/{file_id}")
|
||||
async def websocket_endpoint(websocket: WebSocket, file_id: str, db: Session = Depends(get_db)):
|
||||
await websocket.accept()
|
||||
try:
|
||||
while True:
|
||||
file = db.query(FileModel).filter(FileModel.id == file_id).first()
|
||||
if not file:
|
||||
await websocket.send_json({"error": "File not found"})
|
||||
break
|
||||
|
||||
await websocket.send_json({
|
||||
"status": file.status,
|
||||
"error": file.error_message
|
||||
})
|
||||
|
||||
if file.status in [FileStatus.SUCCESS, FileStatus.FAILED]:
|
||||
break
|
||||
|
||||
await asyncio.sleep(1)
|
||||
except WebSocketDisconnect:
|
||||
pass
|
||||
|
|
@ -0,0 +1,39 @@
|
|||
from pydantic_settings import BaseSettings
|
||||
from typing import Optional
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
class Settings(BaseSettings):
|
||||
# API Settings
|
||||
API_V1_STR: str = "/api/v1"
|
||||
PROJECT_NAME: str = "Legal Document Masker API"
|
||||
|
||||
# Security
|
||||
SECRET_KEY: str = "your-secret-key-here" # Change in production
|
||||
ACCESS_TOKEN_EXPIRE_MINUTES: int = 60 * 24 * 8 # 8 days
|
||||
|
||||
# Database
|
||||
DATABASE_URL: str = "sqlite:///./legal_doc_masker.db"
|
||||
|
||||
# File Storage
|
||||
BASE_DIR: Path = Path(__file__).parent.parent.parent
|
||||
UPLOAD_FOLDER: Path = BASE_DIR / "storage" / "uploads"
|
||||
PROCESSED_FOLDER: Path = BASE_DIR / "storage" / "processed"
|
||||
MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB
|
||||
ALLOWED_EXTENSIONS: set = {"pdf", "docx", "doc"}
|
||||
|
||||
# Celery
|
||||
CELERY_BROKER_URL: str = "redis://localhost:6379/0"
|
||||
CELERY_RESULT_BACKEND: str = "redis://localhost:6379/0"
|
||||
|
||||
class Config:
|
||||
case_sensitive = True
|
||||
env_file = ".env"
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
# Create storage directories if they don't exist
|
||||
self.UPLOAD_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||
self.PROCESSED_FOLDER.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
settings = Settings()
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from .config import settings
|
||||
|
||||
# Create SQLite engine with check_same_thread=False for FastAPI
|
||||
engine = create_engine(
|
||||
settings.DATABASE_URL,
|
||||
connect_args={"check_same_thread": False}
|
||||
)
|
||||
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
||||
|
||||
Base = declarative_base()
|
||||
|
||||
# Dependency
|
||||
def get_db():
|
||||
db = SessionLocal()
|
||||
try:
|
||||
yield db
|
||||
finally:
|
||||
db.close()
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from .core.config import settings
|
||||
from .api.endpoints import files
|
||||
from .core.database import engine, Base
|
||||
|
||||
# Create database tables
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
app = FastAPI(
|
||||
title=settings.PROJECT_NAME,
|
||||
openapi_url=f"{settings.API_V1_STR}/openapi.json"
|
||||
)
|
||||
|
||||
# Set up CORS
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"], # In production, replace with specific origins
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# Include routers
|
||||
app.include_router(
|
||||
files.router,
|
||||
prefix=f"{settings.API_V1_STR}/files",
|
||||
tags=["files"]
|
||||
)
|
||||
|
||||
@app.get("/")
|
||||
async def root():
|
||||
return {"message": "Welcome to Legal Document Masker API"}
|
||||
|
|
@ -0,0 +1,22 @@
|
|||
from sqlalchemy import Column, String, DateTime, Text
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
from ..core.database import Base
|
||||
|
||||
class FileStatus(str):
|
||||
NOT_STARTED = "not_started"
|
||||
PROCESSING = "processing"
|
||||
SUCCESS = "success"
|
||||
FAILED = "failed"
|
||||
|
||||
class File(Base):
|
||||
__tablename__ = "files"
|
||||
|
||||
id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
|
||||
filename = Column(String(255), nullable=False)
|
||||
original_path = Column(String(255), nullable=False)
|
||||
processed_path = Column(String(255))
|
||||
status = Column(String(20), nullable=False, default=FileStatus.NOT_STARTED)
|
||||
error_message = Column(Text)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
updated_at = Column(DateTime, nullable=False, default=datetime.utcnow, onupdate=datetime.utcnow)
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
from pydantic import BaseModel
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
from uuid import UUID
|
||||
|
||||
class FileBase(BaseModel):
|
||||
filename: str
|
||||
status: str
|
||||
error_message: Optional[str] = None
|
||||
|
||||
class FileResponse(FileBase):
|
||||
id: UUID
|
||||
created_at: datetime
|
||||
updated_at: datetime
|
||||
|
||||
class Config:
|
||||
from_attributes = True
|
||||
|
||||
class FileList(BaseModel):
|
||||
files: list[FileResponse]
|
||||
total: int
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
from celery import Celery
|
||||
from ..core.config import settings
|
||||
from ..models.file import File, FileStatus
|
||||
from sqlalchemy.orm import Session
|
||||
from ..core.database import SessionLocal
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add the parent directory to Python path to import the masking system
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
|
||||
from src.main import process_document # Import your existing masking function
|
||||
|
||||
celery = Celery(
|
||||
'file_service',
|
||||
broker=settings.CELERY_BROKER_URL,
|
||||
backend=settings.CELERY_RESULT_BACKEND
|
||||
)
|
||||
|
||||
@celery.task
|
||||
def process_file(file_id: str):
|
||||
db = SessionLocal()
|
||||
try:
|
||||
file = db.query(File).filter(File.id == file_id).first()
|
||||
if not file:
|
||||
return
|
||||
|
||||
# Update status to processing
|
||||
file.status = FileStatus.PROCESSING
|
||||
db.commit()
|
||||
|
||||
try:
|
||||
# Process the file using your existing masking system
|
||||
output_path = process_document(file.original_path)
|
||||
|
||||
# Update file record with processed path
|
||||
file.processed_path = output_path
|
||||
file.status = FileStatus.SUCCESS
|
||||
db.commit()
|
||||
|
||||
except Exception as e:
|
||||
file.status = FileStatus.FAILED
|
||||
file.error_message = str(e)
|
||||
db.commit()
|
||||
raise
|
||||
|
||||
finally:
|
||||
db.close()
|
||||
|
|
@ -0,0 +1,33 @@
|
|||
version: '3.8'
|
||||
|
||||
services:
|
||||
api:
|
||||
build: .
|
||||
ports:
|
||||
- "8000:8000"
|
||||
volumes:
|
||||
- ./storage:/app/storage
|
||||
- ./legal_doc_masker.db:/app/legal_doc_masker.db
|
||||
environment:
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||
depends_on:
|
||||
- redis
|
||||
|
||||
celery_worker:
|
||||
build: .
|
||||
command: celery -A app.services.file_service worker --loglevel=info
|
||||
volumes:
|
||||
- ./storage:/app/storage
|
||||
- ./legal_doc_masker.db:/app/legal_doc_masker.db
|
||||
environment:
|
||||
- CELERY_BROKER_URL=redis://redis:6379/0
|
||||
- CELERY_RESULT_BACKEND=redis://redis:6379/0
|
||||
depends_on:
|
||||
- redis
|
||||
- api
|
||||
|
||||
redis:
|
||||
image: redis:alpine
|
||||
ports:
|
||||
- "6379:6379"
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
# FastAPI and server
|
||||
fastapi>=0.104.0
|
||||
uvicorn>=0.24.0
|
||||
python-multipart>=0.0.6
|
||||
websockets>=12.0
|
||||
|
||||
# Database
|
||||
sqlalchemy>=2.0.0
|
||||
alembic>=1.12.0
|
||||
|
||||
# Background tasks
|
||||
celery>=5.3.0
|
||||
redis>=5.0.0
|
||||
|
||||
# Security
|
||||
python-jose[cryptography]>=3.3.0
|
||||
passlib[bcrypt]>=1.7.4
|
||||
python-dotenv>=1.0.0
|
||||
|
||||
# Testing
|
||||
pytest>=7.4.0
|
||||
httpx>=0.25.0
|
||||
|
||||
# Existing project dependencies
|
||||
pydantic-settings>=2.0.0
|
||||
watchdog==2.1.6
|
||||
requests==2.28.1
|
||||
python-docx>=0.8.11
|
||||
PyPDF2>=3.0.0
|
||||
pandas>=2.0.0
|
||||
magic-pdf[full]
|
||||
Loading…
Reference in New Issue