legal-doc-masker/backend/Dockerfile

47 lines
1.3 KiB
Docker

FROM python:3.11-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
libreoffice \
wget \
git \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements first to leverage Docker cache
COPY requirements.txt .
# Upgrade pip and install core dependencies
RUN pip install --upgrade pip setuptools wheel
# Install PyTorch CPU version first (for better caching and smaller size)
RUN pip install --no-cache-dir torch==2.7.0 -f https://download.pytorch.org/whl/torch_stable.html
# Install the rest of the requirements
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download NER model during build (larger image but faster startup)
# RUN python -c "
# from transformers import AutoTokenizer, AutoModelForTokenClassification
# model_name = 'uer/roberta-base-finetuned-cluener2020-chinese'
# print('Downloading NER model...')
# AutoTokenizer.from_pretrained(model_name)
# AutoModelForTokenClassification.from_pretrained(model_name)
# print('NER model downloaded successfully')
# "
# Copy the rest of the application
COPY . .
# Create storage directories
RUN mkdir -p storage/uploads storage/processed
# Expose the port the app runs on
EXPOSE 8000
# Command to run the application
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]