Image-Text-to-Text
Transformers
Safetensors
English
CASA_Qwen_2_5_VL_3B
conversational
custom_code
CASA-Qwen2_5-VL-3B / processing_qwen2_5vl_casa.py
ameroyer's picture
Super-squash branch 'main' using huggingface_hub
eb26251 verified
from typing import Any
from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer
from .processing import BaseProcessor, QwenImageProcessor
class QwenCASAProcessor(BaseProcessor):
attributes = ["tokenizer"]
tokenizer_class = "Qwen2Tokenizer"
def __init__(
self,
tokenizer: Qwen2Tokenizer,
pre_image_tokens: tuple[int, ...] = (151652,),
post_image_tokens: tuple[int, ...] = (151653,),
system_start_tokens: tuple[int, ...] = (151644, 8948, 198),
system_end_tokens: tuple[int, ...] = (151645, 198),
user_start_tokens: tuple[int, ...] = (151644, 872, 198),
user_end_tokens: tuple[int, ...] = (151645, 198),
asst_start_tokens: tuple[int, ...] = (151644, 77091, 198),
asst_end_tokens: tuple[int, ...] = (151645, 198),
image_size: int = 448,
**kwargs: Any,
):
del kwargs
super().__init__(
tokenizer=tokenizer,
pre_image_tokens=pre_image_tokens,
post_image_tokens=post_image_tokens,
system_start_tokens=system_start_tokens,
system_end_tokens=system_end_tokens,
user_start_tokens=user_start_tokens,
user_end_tokens=user_end_tokens,
asst_start_tokens=asst_start_tokens,
asst_end_tokens=asst_end_tokens,
)
self._image_processor = QwenImageProcessor(img_size=image_size)