47 lines
1.3 KiB
Docker
47 lines
1.3 KiB
Docker
FROM python:3.11-slim
|
|
|
|
WORKDIR /app
|
|
|
|
# Install system dependencies
|
|
RUN apt-get update && apt-get install -y \
|
|
build-essential \
|
|
libreoffice \
|
|
wget \
|
|
git \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
# Copy requirements first to leverage Docker cache
|
|
COPY requirements.txt .
|
|
|
|
# Upgrade pip and install core dependencies
|
|
RUN pip install --upgrade pip setuptools wheel
|
|
|
|
# Install PyTorch CPU version first (for better caching and smaller size)
|
|
RUN pip install --no-cache-dir torch==2.7.0 -f https://download.pytorch.org/whl/torch_stable.html
|
|
|
|
# Install the rest of the requirements
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
# Pre-download NER model during build (larger image but faster startup)
|
|
# RUN python -c "
|
|
# from transformers import AutoTokenizer, AutoModelForTokenClassification
|
|
# model_name = 'uer/roberta-base-finetuned-cluener2020-chinese'
|
|
# print('Downloading NER model...')
|
|
# AutoTokenizer.from_pretrained(model_name)
|
|
# AutoModelForTokenClassification.from_pretrained(model_name)
|
|
# print('NER model downloaded successfully')
|
|
# "
|
|
|
|
|
|
# Copy the rest of the application
|
|
COPY . .
|
|
|
|
# Create storage directories
|
|
RUN mkdir -p storage/uploads storage/processed
|
|
|
|
# Expose the port the app runs on
|
|
EXPOSE 8000
|
|
|
|
# Command to run the application
|
|
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] |