unfair11212
/

learn

Model card Files Files and versions

learn / gpu_check_server.bash

unfair11212's picture

Upload folder using huggingface_hub

a80f6e6 verified 8 months ago

history blame contribute delete

793 Bytes

	#!/bin/bash

	# GPU ID 和显存阈值（MiB）
	GPU_ID=7
	THRESHOLD=20480

	# 要运行的命令
	RUN_COMMAND="CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \
	--host 127.0.0.1 --port 8422 \
	--model meta-llama/Meta-Llama-3-8B-Instruct \
	--tensor-parallel-size 2 \
	--max-model-len 512"

	while true; do
	# 获取GPU空闲内存
	FREE_MEM=$(nvidia-smi --id=$GPU_ID --query-gpu=memory.free --format=csv,noheader,nounits)

	echo "GPU $GPU_ID free memory: ${FREE_MEM} MiB"

	if [ "$FREE_MEM" -ge "$THRESHOLD" ]; then
	echo "Sufficient memory detected on GPU $GPU_ID. Starting the server..."
	eval "$RUN_COMMAND"
	break
	else
	echo "Not enough memory on GPU $GPU_ID. Retrying in 10 seconds..."
	sleep 5
	fi
	done