Fix HF exec entrypoint: generate entrypoint.sh in Docker; body in entrypoint_body.sh
c994fd2 verified | # Hugging Face Space (Docker) β GenSearcher + FireRed | |
| # Requires GPU. For multi-GPU full-local mode, set START_VLLM_*=1 and CUDA device envs in README. | |
| FROM pytorch/pytorch:2.5.1-cuda12.4-cudnn9-runtime | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| curl \ | |
| git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /app | |
| COPY vendor/rllm /app/vendor/rllm | |
| COPY requirements.txt /app/requirements.txt | |
| COPY app.py space_gen.py space_health.py /app/ | |
| COPY services /app/services | |
| COPY scripts/entrypoint_body.sh scripts/verify_env.py /app/scripts/ | |
| # HF may exec /app/scripts/entrypoint.sh directly β never COPY it from Windows (CRLF β "exec format error"). | |
| # Build a 2-line LF-only stub; normalize body script bytes inside Linux. | |
| RUN python3 -c "import pathlib; p=pathlib.Path('/app/scripts/entrypoint_body.sh'); b=p.read_bytes(); b=b.lstrip(b'\xef\xbb\xbf'); b=b.replace(b'\r\n', b'\n').replace(b'\r', b''); p.write_bytes(b)" \ | |
| && printf '%s\n' '#!/bin/bash' 'exec /bin/bash /app/scripts/entrypoint_body.sh' > /app/scripts/entrypoint.sh \ | |
| && chmod +x /app/scripts/entrypoint.sh /app/scripts/entrypoint_body.sh | |
| ENV PYTHONPATH=/app/vendor/rllm | |
| ENV GRADIO_SERVER_PORT=7860 | |
| # HF Spaces / minimal images often have uid 1000 with no /etc/passwd entry; PyTorch Inductor calls | |
| # getpass.getuser() and crashes with KeyError. USER/LOGNAME short-circuit getuser(); cache dirs avoid $HOME issues. | |
| ENV USER=huggingface | |
| ENV LOGNAME=huggingface | |
| ENV TORCHINDUCTOR_CACHE_DIR=/tmp/torch_inductor_cache | |
| ENV TRITON_CACHE_DIR=/tmp/triton_cache | |
| RUN pip install --no-cache-dir --upgrade pip setuptools wheel \ | |
| && pip install --no-cache-dir -e /app/vendor/rllm \ | |
| && pip install --no-cache-dir -r /app/requirements.txt | |
| # Optional: local vLLM inside the image (large). Disable with build-arg if you only use external APIs. | |
| ARG INSTALL_VLLM=1 | |
| RUN if [ "$INSTALL_VLLM" = "1" ]; then pip install --no-cache-dir "vllm>=0.6.3"; fi | |
| EXPOSE 7860 | |
| # PID 1 is bash (real ELF). HF may ignore this and exec entrypoint.sh; that file is generated above with valid shebang. | |
| CMD ["/bin/bash", "/app/scripts/entrypoint_body.sh"] | |