FROM python:3.11-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ libreoffice \ wget \ git \ && rm -rf /var/lib/apt/lists/* # Copy requirements first to leverage Docker cache COPY requirements.txt . # Upgrade pip and install core dependencies RUN pip install --upgrade pip setuptools wheel # Install PyTorch CPU version first (for better caching and smaller size) RUN pip install --no-cache-dir torch==2.7.0 -f https://download.pytorch.org/whl/torch_stable.html # Install the rest of the requirements RUN pip install --no-cache-dir -r requirements.txt # Pre-download NER model during build (larger image but faster startup) # RUN python -c " # from transformers import AutoTokenizer, AutoModelForTokenClassification # model_name = 'uer/roberta-base-finetuned-cluener2020-chinese' # print('Downloading NER model...') # AutoTokenizer.from_pretrained(model_name) # AutoModelForTokenClassification.from_pretrained(model_name) # print('NER model downloaded successfully') # " # Copy the rest of the application COPY . . # Create storage directories RUN mkdir -p storage/uploads storage/processed # Expose the port the app runs on EXPOSE 8000 # Command to run the application CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]