Spaces:

TruVlad
/

flaskAPI

Paused

App Files Files Community

flaskAPI / Dockerfile

TruVlad

Upload folder using huggingface_hub

0e2d813 verified about 1 month ago

raw

history blame contribute delete

1.98 kB

	# FROM ollama/ollama:0.12.3
	FROM ollama/ollama:latest
	RUN apt update
	RUN apt upgrade -y
	# OLLAMA_DEBUG Show additional debug information (e.g. OLLAMA_DEBUG=1)
	# OLLAMA_HOST IP Address for the ollama server (default 127.0.0.1:11434)
	# OLLAMA_CONTEXT_LENGTH Context length to use unless otherwise specified (default: 4096)
	# OLLAMA_KEEP_ALIVE The duration that models stay loaded in memory (default "5m")
	# OLLAMA_MAX_LOADED_MODELS Maximum number of loaded models per GPU
	# OLLAMA_MAX_QUEUE Maximum number of queued requests
	# OLLAMA_MODELS The path to the models directory
	# OLLAMA_NUM_PARALLEL Maximum number of parallel requests
	# OLLAMA_NOPRUNE Do not prune model blobs on startup
	# OLLAMA_ORIGINS A comma separated list of allowed origins
	# OLLAMA_SCHED_SPREAD Always schedule model across all GPUs
	# OLLAMA_FLASH_ATTENTION Enabled flash attention
	# OLLAMA_KV_CACHE_TYPE Quantization type for the K/V cache (default: f16)
	# OLLAMA_LLM_LIBRARY Set LLM library to bypass autodetection
	# OLLAMA_GPU_OVERHEAD Reserve a portion of VRAM per GPU (bytes)
	# OLLAMA_LOAD_TIMEOUT
	ENV OLLAMA_KEEP_ALIVE="24h"
	ENV OLLAMA_HOST=0.0.0.0:7861
	ENV OLLAMA_LOAD_TIMEOUT="24h"

	RUN apt-get update && apt-get upgrade -y
	RUN apt-get install git g++ python3 python3-pip -y && apt-get clean

	COPY requirements.txt requirements.txt
	COPY pull06.sh pull06.sh
	COPY pull17.sh pull17.sh
	COPY pull4.sh pull4.sh
	COPY pull8.sh pull8.sh
	#RUN /bin/bash -x pull06.sh
	# RUN /bin/bash -x pull8.sh
	COPY pull14.sh pull14.sh
	# RUN /bin/bash -x pull14.sh

	RUN pip install --no-cache-dir -r requirements.txt --break-system-packages

	VOLUME vol1 vol2
	COPY main.py main.py
	COPY util.py util.py
	COPY start.sh start.sh
	#ENTRYPOINT ["/usr/bin/ollama", "serve"]
	ENTRYPOINT ["/bin/bash", "-x", "start.sh"]