# ============================================================================= # Stage 1: Builder - install dependencies and download models # ============================================================================= FROM python:3.11-slim-bookworm AS builder WORKDIR /app # System dependencies for building RUN apt-get update && \ apt-get install -y --no-install-recommends curl && \ rm -rf /var/lib/apt/lists/* # Use CPU-only torch (avoids 2GB+ CUDA libs) ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu # Install torch CPU-only first RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu # Install pinned dependencies from requirements.txt for reproducible builds COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code and install package (--no-deps since deps already installed) # Note: pyproject.toml is copied last to maximize layer caching. If only # pyproject.toml changes (e.g., version bump), only this layer rebuilds. COPY pyproject.toml . COPY sage/ sage/ RUN pip install --no-cache-dir . --no-deps # Pre-download models to cache directory ENV HF_HOME=/app/.cache/huggingface # Download E5-small embedding model (~134MB) RUN python -c "\ from sentence_transformers import SentenceTransformer; \ SentenceTransformer('intfloat/e5-small-v2')" # Download HHEM hallucination detection model (~892MB) # HHEM uses custom config pointing to foundation T5 model for tokenizer RUN python -c "\ from transformers import AutoConfig, AutoTokenizer; \ from huggingface_hub import hf_hub_download; \ config = AutoConfig.from_pretrained('vectara/hallucination_evaluation_model', trust_remote_code=True); \ AutoTokenizer.from_pretrained(config.foundation); \ AutoConfig.from_pretrained(config.foundation); \ hf_hub_download('vectara/hallucination_evaluation_model', 'model.safetensors')" # ============================================================================= # Stage 2: Runtime - slim image with only what's needed # ============================================================================= FROM python:3.11-slim-bookworm AS runtime WORKDIR /app # Only curl for healthcheck (no build tools) RUN apt-get update && \ apt-get install -y --no-install-recommends curl && \ rm -rf /var/lib/apt/lists/* # Non-root user with UID 1000 (required by HF Spaces) RUN useradd -m -u 1000 user # Copy installed packages from builder COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages COPY --from=builder /usr/local/bin /usr/local/bin # Copy application code COPY --from=builder /app/sage /app/sage # Copy pre-downloaded models from builder COPY --from=builder /app/.cache /app/.cache # Environment ENV HF_HOME=/app/.cache/huggingface ENV PYTHONUNBUFFERED=1 # Fix ownership for non-root user RUN chown -R user:user /app USER user # Default port 7860 for HF Spaces; overridden by PORT env var at runtime ENV PORT=7860 EXPOSE 7860 # Health check with startup grace period (models take ~30s to load) HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \ CMD curl -sf http://localhost:${PORT:-7860}/health || exit 1 CMD ["python", "-m", "sage.api.run", "--host", "0.0.0.0"]