open-notebook/pdf-ocr/Dockerfile

30 lines
583 B
Docker

FROM python:3.11-slim
# Install system dependencies for OCR
RUN apt-get update && apt-get install -y --no-install-recommends \
ocrmypdf \
tesseract-ocr \
tesseract-ocr-eng \
ghostscript \
unpaper \
pngquant \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
RUN pip install --no-cache-dir \
fastapi \
uvicorn[standard] \
python-multipart \
aiofiles
WORKDIR /app
# Create directories
RUN mkdir -p /app/uploads /app/processed
COPY app.py /app/
EXPOSE 8000
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]