File size: 793 Bytes
a80f6e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | #!/bin/bash
# GPU ID 和显存阈值(MiB)
GPU_ID=7
THRESHOLD=20480
# 要运行的命令
RUN_COMMAND="CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \
--host 127.0.0.1 --port 8422 \
--model meta-llama/Meta-Llama-3-8B-Instruct \
--tensor-parallel-size 2 \
--max-model-len 512"
while true; do
# 获取GPU空闲内存
FREE_MEM=$(nvidia-smi --id=$GPU_ID --query-gpu=memory.free --format=csv,noheader,nounits)
echo "GPU $GPU_ID free memory: ${FREE_MEM} MiB"
if [ "$FREE_MEM" -ge "$THRESHOLD" ]; then
echo "Sufficient memory detected on GPU $GPU_ID. Starting the server..."
eval "$RUN_COMMAND"
break
else
echo "Not enough memory on GPU $GPU_ID. Retrying in 10 seconds..."
sleep 5
fi
done
|