vllm deploy error

#3
by xingchun - opened

cmd

python -m vllm.entrypoints.openai.api_server --served-model-name Qwen3.5-27B --model /data/models/Qwen3.5-27B-Claude-Opus-4.6-Distill --host 0.0.0.0 --port 8006  --api-key 123456 --trust-remote-code  --tensor-parallel-size 2 --reasoning-parser qwen3 --max-model-len 65536 --gpu-memory-utilization 0.80 --enable-prefix-caching  --max-num-seqs 5 --enable-auto-tool-choice --tool-call-parser qwen3_coder  --enable-chunked-prefill --calculate-kv-scales 

error

 are exactly equal.
(APIServer pid=48274) INFO 03-26 17:21:15 [vllm.py:748] Asynchronous scheduling is enabled.
(APIServer pid=48274) Traceback (most recent call last):
(APIServer pid=48274)   File "<frozen runpy>", line 198, in _run_module_as_main
(APIServer pid=48274)   File "<frozen runpy>", line 88, in _run_code
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 689, in <module>
(APIServer pid=48274)     uvloop.run(run_server(args))
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/uvloop/__init__.py", line 96, in run
(APIServer pid=48274)     return __asyncio.run(
(APIServer pid=48274)            ^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/root/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 195, in run
(APIServer pid=48274)     return runner.run(main)
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/root/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/asyncio/runners.py", line 118, in run
(APIServer pid=48274)     return self._loop.run_until_complete(task)
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "uvloop/loop.pyx", line 1518, in uvloop.loop.Loop.run_until_complete
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/uvloop/__init__.py", line 48, in wrapper
(APIServer pid=48274)     return await main
(APIServer pid=48274)            ^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 649, in run_server
(APIServer pid=48274)     await run_server_worker(listen_address, sock, args, **uvicorn_kwargs)
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 663, in run_server_worker
(APIServer pid=48274)     async with build_async_engine_client(
(APIServer pid=48274)                ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/root/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 210, in __aenter__
(APIServer pid=48274)     return await anext(self.gen)
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 101, in build_async_engine_client
(APIServer pid=48274)     async with build_async_engine_client_from_engine_args(
(APIServer pid=48274)                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/root/.local/share/uv/python/cpython-3.12.9-linux-x86_64-gnu/lib/python3.12/contextlib.py", line 210, in __aenter__
(APIServer pid=48274)     return await anext(self.gen)
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/entrypoints/openai/api_server.py", line 142, in build_async_engine_client_from_engine_args
(APIServer pid=48274)     async_llm = AsyncLLM.from_vllm_config(
(APIServer pid=48274)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 225, in from_vllm_config
(APIServer pid=48274)     return cls(
(APIServer pid=48274)            ^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/v1/engine/async_llm.py", line 135, in __init__
(APIServer pid=48274)     self.renderer = renderer = renderer_from_config(self.vllm_config)
(APIServer pid=48274)                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/renderers/registry.py", line 91, in renderer_from_config
(APIServer pid=48274)     return RENDERER_REGISTRY.load_renderer(
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/renderers/registry.py", line 64, in load_renderer
(APIServer pid=48274)     return renderer_cls.from_config(config, tokenizer_kwargs)
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/renderers/hf.py", line 623, in from_config
(APIServer pid=48274)     cached_get_tokenizer(
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/tokenizers/registry.py", line 210, in get_tokenizer
(APIServer pid=48274)     tokenizer = tokenizer_cls_.from_pretrained(tokenizer_name, *args, **kwargs)
(APIServer pid=48274)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/tokenizers/hf.py", line 110, in from_pretrained
(APIServer pid=48274)     raise e
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/vllm/tokenizers/hf.py", line 85, in from_pretrained
(APIServer pid=48274)     tokenizer = AutoTokenizer.from_pretrained(
(APIServer pid=48274)                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/modelscope/utils/hf_util/patcher.py", line 179, in patch_pretrained_model_name_or_path
(APIServer pid=48274)     return cls._from_pretrained_origin.__func__(cls, model_dir,
(APIServer pid=48274)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(APIServer pid=48274)   File "/data/py_env/qwen3.5/lib/python3.12/site-packages/transformers/models/auto/tokenization_auto.py", line 1153, in from_pretrained
(APIServer pid=48274)     raise ValueError(
(APIServer pid=48274) ValueError: Tokenizer class TokenizersBackend does not exist or is not currently imported.

version

vllm                                     0.17.1rc1.dev126+gbc2c0c86e
transformers                             4.57.6

How to solve it?

TeichAI org

v0.18.1rc0 is the latest Vllm version, try updating.

Sign up or log in to comment