| from typing import Any | |
| from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer | |
| from .processing import BaseProcessor, QwenImageProcessor | |
| class QwenCASAProcessor(BaseProcessor): | |
| attributes = ["tokenizer"] | |
| tokenizer_class = "Qwen2Tokenizer" | |
| def __init__( | |
| self, | |
| tokenizer: Qwen2Tokenizer, | |
| pre_image_tokens: tuple[int, ...] = (151652,), | |
| post_image_tokens: tuple[int, ...] = (151653,), | |
| system_start_tokens: tuple[int, ...] = (151644, 8948, 198), | |
| system_end_tokens: tuple[int, ...] = (151645, 198), | |
| user_start_tokens: tuple[int, ...] = (151644, 872, 198), | |
| user_end_tokens: tuple[int, ...] = (151645, 198), | |
| asst_start_tokens: tuple[int, ...] = (151644, 77091, 198), | |
| asst_end_tokens: tuple[int, ...] = (151645, 198), | |
| image_size: int = 448, | |
| **kwargs: Any, | |
| ): | |
| del kwargs | |
| super().__init__( | |
| tokenizer=tokenizer, | |
| pre_image_tokens=pre_image_tokens, | |
| post_image_tokens=post_image_tokens, | |
| system_start_tokens=system_start_tokens, | |
| system_end_tokens=system_end_tokens, | |
| user_start_tokens=user_start_tokens, | |
| user_end_tokens=user_end_tokens, | |
| asst_start_tokens=asst_start_tokens, | |
| asst_end_tokens=asst_end_tokens, | |
| ) | |
| self._image_processor = QwenImageProcessor(img_size=image_size) | |