File size: 916 Bytes
a80f6e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \
--host 127.0.0.1 --port 8422 \
--model Qwen/QwQ-32B \
--tensor-parallel-size 2 \
--max-model-len 8192 \
# CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \
# --host 127.0.0.1 --port 8422 \
# --model Qwen/Qwen3-8B \
# --enable-reasoning --reasoning-parser deepseek_r1 \
# --tensor-parallel-size 1 \
# --max-model-len 2048 \
# CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \
# --host 127.0.0.1 --port 8422 \
# --model meta-llama/Meta-Llama-3-8B-Instruct \
# --enable-reasoning-parser \
# --tensor-parallel-size 1 \
# --max-model-len 1024 \
# CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \
# --host 127.0.0.1 --port 8422 \
# --model Qwen/QwQ-32B \
# --tensor-parallel-size 2 \
# --max-model-len 8192 \ |