| # GPU ID 和显存阈值(MiB) | |
| GPU_ID=7 | |
| THRESHOLD=20480 | |
| # 要运行的命令 | |
| RUN_COMMAND="CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \ | |
| --host 127.0.0.1 --port 8422 \ | |
| --model meta-llama/Meta-Llama-3-8B-Instruct \ | |
| --tensor-parallel-size 2 \ | |
| --max-model-len 512" | |
| while true; do | |
| # 获取GPU空闲内存 | |
| FREE_MEM=$(nvidia-smi --id=$GPU_ID --query-gpu=memory.free --format=csv,noheader,nounits) | |
| echo "GPU $GPU_ID free memory: ${FREE_MEM} MiB" | |
| if [ "$FREE_MEM" -ge "$THRESHOLD" ]; then | |
| echo "Sufficient memory detected on GPU $GPU_ID. Starting the server..." | |
| eval "$RUN_COMMAND" | |
| break | |
| else | |
| echo "Not enough memory on GPU $GPU_ID. Retrying in 10 seconds..." | |
| sleep 5 | |
| fi | |
| done | |