| # FROM ollama/ollama:0.12.3 | |
| FROM ollama/ollama:latest | |
| RUN apt update | |
| RUN apt upgrade -y | |
| # OLLAMA_DEBUG Show additional debug information (e.g. OLLAMA_DEBUG=1) | |
| # OLLAMA_HOST IP Address for the ollama server (default 127.0.0.1:11434) | |
| # OLLAMA_CONTEXT_LENGTH Context length to use unless otherwise specified (default: 4096) | |
| # OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m") | |
| # OLLAMA_MAX_LOADED_MODELS Maximum number of loaded models per GPU | |
| # OLLAMA_MAX_QUEUE Maximum number of queued requests | |
| # OLLAMA_MODELS The path to the models directory | |
| # OLLAMA_NUM_PARALLEL Maximum number of parallel requests | |
| # OLLAMA_NOPRUNE Do not prune model blobs on startup | |
| # OLLAMA_ORIGINS A comma separated list of allowed origins | |
| # OLLAMA_SCHED_SPREAD Always schedule model across all GPUs | |
| # OLLAMA_FLASH_ATTENTION Enabled flash attention | |
| # OLLAMA_KV_CACHE_TYPE Quantization type for the K/V cache (default: f16) | |
| # OLLAMA_LLM_LIBRARY Set LLM library to bypass autodetection | |
| # OLLAMA_GPU_OVERHEAD Reserve a portion of VRAM per GPU (bytes) | |
| # OLLAMA_LOAD_TIMEOUT | |
| ENV OLLAMA_KEEP_ALIVE="24h" | |
| ENV OLLAMA_HOST=0.0.0.0:7861 | |
| ENV OLLAMA_LOAD_TIMEOUT="24h" | |
| RUN apt-get update && apt-get upgrade -y | |
| RUN apt-get install git g++ python3 python3-pip -y && apt-get clean | |
| COPY requirements.txt requirements.txt | |
| COPY pull06.sh pull06.sh | |
| COPY pull17.sh pull17.sh | |
| COPY pull4.sh pull4.sh | |
| COPY pull8.sh pull8.sh | |
| #RUN /bin/bash -x pull06.sh | |
| # RUN /bin/bash -x pull8.sh | |
| COPY pull14.sh pull14.sh | |
| # RUN /bin/bash -x pull14.sh | |
| RUN pip install --no-cache-dir -r requirements.txt --break-system-packages | |
| VOLUME vol1 vol2 | |
| COPY main.py main.py | |
| COPY util.py util.py | |
| COPY start.sh start.sh | |
| #ENTRYPOINT ["/usr/bin/ollama", "serve"] | |
| ENTRYPOINT ["/bin/bash", "-x", "start.sh"] | |