| |
| FROM python:3.11-slim |
|
|
| |
| ENV PYTHONUNBUFFERED=1 |
| ENV PYTHONDONTWRITEBYTECODE=1 |
| ENV PIP_NO_CACHE_DIR=1 |
| ENV PIP_DISABLE_PIP_VERSION_CHECK=1 |
| ENV DEBIAN_FRONTEND=noninteractive |
|
|
| |
| RUN useradd --create-home --shell /bin/bash app |
|
|
| |
| WORKDIR /app |
|
|
| |
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| |
| build-essential \ |
| gcc \ |
| g++ \ |
| make \ |
| cmake \ |
| pkg-config \ |
| |
| curl \ |
| wget \ |
| git \ |
| |
| libffi-dev \ |
| libssl-dev \ |
| |
| libjpeg-dev \ |
| libpng-dev \ |
| libfreetype6-dev \ |
| libtiff5-dev \ |
| libopenjp2-7-dev \ |
| |
| libxml2-dev \ |
| libxslt1-dev \ |
| zlib1g-dev \ |
| |
| tesseract-ocr \ |
| tesseract-ocr-eng \ |
| poppler-utils \ |
| |
| sqlite3 \ |
| libsqlite3-dev \ |
| |
| && apt-get clean \ |
| && rm -rf /var/lib/apt/lists/* \ |
| && rm -rf /var/cache/apt/* |
|
|
| |
| RUN python -m pip install --upgrade pip setuptools wheel |
|
|
| |
| RUN pip config set global.trusted-host "pypi.org files.pythonhosted.org pypi.python.org" \ |
| && pip config set global.no-cache-dir true \ |
| && pip config set global.disable-pip-version-check true |
|
|
| |
| COPY requirements.txt . |
|
|
| |
| RUN pip install --no-cache-dir --upgrade -r requirements.txt \ |
| && pip install --no-cache-dir \ |
| |
| gunicorn \ |
| uvloop \ |
| |
| psutil \ |
| && pip list --outdated |
|
|
| |
| COPY . . |
|
|
| |
| RUN mkdir -p \ |
| |
| temp logs uploads downloads cache \ |
| |
| /tmp/data_extractor_temp \ |
| |
| static \ |
| |
| data \ |
| && chmod -R 755 /app |
|
|
| |
| RUN chmod -R 777 temp logs uploads downloads cache /tmp \ |
| && chmod -R 755 static \ |
| && chmod 755 app.py \ |
| && chmod -R 755 config utils workflow models |
|
|
| |
| RUN chown -R app:app /app \ |
| && chown -R app:app /tmp/data_extractor_temp |
|
|
| |
| ENV PYTHONPATH=/app |
| ENV GRADIO_SERVER_NAME=0.0.0.0 |
| ENV GRADIO_SERVER_PORT=7860 |
| |
| |
|
|
| |
| ENV MPLBACKEND=Agg |
| ENV MPLCONFIGDIR=/tmp/mpl_cache |
|
|
| |
| ENV GRADIO_QUEUE_DEFAULT_CONCURRENCY=10 |
| ENV GRADIO_MAX_THREADS=20 |
|
|
| |
| ENV WEBSOCKET_HOST=0.0.0.0 |
| ENV WEBSOCKET_PORT=8765 |
|
|
| |
| ENV TEMP_DIR=/tmp/data_extractor_temp |
| ENV SESSION_TIMEOUT=1800 |
| ENV MAX_FILE_SIZE_MB=50 |
|
|
| |
| ENV COORDINATOR_MODEL=gemini-2.5-pro |
| ENV DATA_EXTRACTOR_MODEL=gemini-2.5-pro |
| ENV DATA_ARRANGER_MODEL=gemini-2.5-pro |
| ENV CODE_GENERATOR_MODEL=gemini-2.5-pro |
|
|
| |
| ENV PYTHONSAFEPATH=1 |
| ENV PYTHONHASHSEED=random |
|
|
| |
| EXPOSE 7860 |
| EXPOSE 8765 |
|
|
| |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=60s --retries=3 \ |
| CMD curl -f http://localhost:7860/ || exit 1 |
| |
| |
| USER root |
| |
| |
| RUN echo ' |
| set -e\n\ |
| echo "π Starting Data Extractor Multi-User Application..."\n\ |
| echo "π Python version: $(python --version)"\n\ |
| echo "π Server: 0.0.0.0:7860"\n\ |
| echo "π₯ Multi-user concurrency: Enabled"\n\ |
| echo "π Session isolation: Active"\n\ |
| echo "πΎ Temp directory: $TEMP_DIR"\n\ |
| \n\ |
| |
| mkdir -p "$TEMP_DIR"\n\ |
| mkdir -p /tmp/mpl_cache\n\ |
| chmod 777 "$TEMP_DIR" /tmp/mpl_cache\n\ |
| \n\ |
| |
| exec python app.py\n\ |
| ' > /app/start.sh && chmod +x /app/start.sh |
| |
| |
| CMD ["/app/start.sh"] |
| |