File size: 916 Bytes
a80f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server \
    --host 127.0.0.1 --port 8422 \
    --model Qwen/QwQ-32B \
    --tensor-parallel-size 2 \
    --max-model-len 8192 \

# CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \
#     --host 127.0.0.1 --port 8422 \
#     --model Qwen/Qwen3-8B \
#     --enable-reasoning --reasoning-parser deepseek_r1 \
#     --tensor-parallel-size 1 \
#     --max-model-len 2048  \

# CUDA_VISIBLE_DEVICES=7 python -m vllm.entrypoints.openai.api_server \
#     --host 127.0.0.1 --port 8422 \
#     --model meta-llama/Meta-Llama-3-8B-Instruct \
#     --enable-reasoning-parser \
#     --tensor-parallel-size 1 \
#     --max-model-len 1024  \



# CUDA_VISIBLE_DEVICES=6,7 python -m vllm.entrypoints.openai.api_server \
#     --host 127.0.0.1 --port 8422 \
#     --model Qwen/QwQ-32B \
#     --tensor-parallel-size 2 \
#     --max-model-len 8192 \