CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \ --host 127.0.0.1 --port 8422 \ --model Qwen/QwQ-32B \ --tensor-parallel-size 2 \ --max-model-len 8192 \ # CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \ # --host 127.0.0.1 --port 8422 \ # --model Qwen/Qwen3-8B \ # --enable-reasoning --reasoning-parser deepseek_r1 \ # --tensor-parallel-size 1 \ # --max-model-len 2048 \ # CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \ # --host 127.0.0.1 --port 8422 \ # --model meta-llama/Meta-Llama-3-8B-Instruct \ # --enable-reasoning-parser \ # --tensor-parallel-size 1 \ # --max-model-len 1024 \ # CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \ # --host 127.0.0.1 --port 8422 \ # --model Qwen/QwQ-32B \ # --tensor-parallel-size 2 \ # --max-model-len 8192 \