|
|
| |
|
|
| device=$1 |
| deploy_flag=$2 |
| step=$3 |
|
|
| if [ -z "$1" ] || [ -z "$2" ] || [ -z "$3" ]; then |
| echo "Usage: $0 <device> <deploy_flag> <step>" |
| exit 1 |
| fi |
|
|
| |
| model_path=$4 |
|
|
| language=$5 |
| src_lang=${language%-*} |
| tgt_lang=${language#*-} |
|
|
| |
| |
| |
| |
| |
| address=127.0.0.1:801${device} |
|
|
| |
| |
| work_dir=/data/wyt/codes/DocDPO/inference_monolang/ted_en_zh_balanced_paritial |
|
|
| if [ "$deploy_flag" = "true" ]; then |
| if [ "${address%%:*}" = "127.0.0.1" ]; then |
| source ~/.zshrc |
| conda activate vllm |
| CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching --gpu_memory_utilization 0.9 > vllm_${step}.log 2>&1 & |
| conda activate optima-vllm |
| else |
| ssh -n wyt@${address%%:*} "source ~/.zshrc && conda activate optima-vllm && CUDA_VISIBLE_DEVICES=${device} nohup vllm serve ${model_path} --host 0.0.0.0 --port ${address##*:} --served-model-name "qwen" --enable-prefix-caching > /dev/null 2>&1 &" |
| fi |
| fi |
|
|
| echo "Waiting for LLM deployment in 20 seconds..." |
| |
|
|
| echo "Testing API of ${address}..." |
| while true; do |
| python test_api.py $address |
| if [ $? -eq 0 ]; then |
| echo "API connected successfully!" |
| break |
| else |
| echo "API connection failed. Retrying in 5 seconds..." |
| sleep 5 |
| fi |
| done |
|
|
| cur_path=`pwd` |
| cd $work_dir |
|
|
| |
| for i in {0..11}; do |
| if [ ! -f "$cur_path/${src_lang}-${tgt_lang}_${step}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang}" ]; then |
| echo IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i.${tgt_lang} |
| python -u infer.py \ |
| --src_file /data/wyt/codes/DocDPO/data/2017-01-ted-test/${src_lang}-${tgt_lang}/IWSLT17.TED.tst2017.${src_lang}-${tgt_lang}.${src_lang}.$i \ |
| --output_path $cur_path/results/${src_lang}-${tgt_lang}_${step} \ |
| --window_size 10 \ |
| --infer_address $address \ |
| --schedule_address $address \ |
| --language ${src_lang}-${tgt_lang} \ |
| --infer_temperature 0.7 \ |
| --schedule_temperature 0.7 \ |
| --translate_style base |
| fi |
| done |
|
|
| cd $cur_path |
|
|
| |
|
|