| from transformers.tokenization_utils_fast import PreTrainedTokenizerFast | |
| from .processing import BaseProcessor, QwenImageProcessor | |
| class Helium1CASAProcessor(BaseProcessor): | |
| attributes = ["tokenizer"] | |
| tokenizer_class = "PreTrainedTokenizerFast" | |
| def __init__( | |
| self, | |
| tokenizer: PreTrainedTokenizerFast, | |
| pre_image_tokens: tuple[int, ...] = tuple(), | |
| post_image_tokens: tuple[int, ...] = tuple(), | |
| system_start_tokens: tuple[int, ...] = tuple(), | |
| system_end_tokens: tuple[int, ...] = tuple(), | |
| user_start_tokens: tuple[int, ...] = (104,), | |
| user_end_tokens: tuple[int, ...] = (105,), | |
| asst_start_tokens: tuple[int, ...] = (102,), | |
| asst_end_tokens: tuple[int, ...] = (103,), | |
| bos_token: int = 1, | |
| image_size: int = 896, | |
| ): | |
| super().__init__( | |
| tokenizer=tokenizer, | |
| pre_image_tokens=pre_image_tokens, | |
| post_image_tokens=post_image_tokens, | |
| system_start_tokens=system_start_tokens, | |
| system_end_tokens=system_end_tokens, | |
| user_start_tokens=user_start_tokens, | |
| user_end_tokens=user_end_tokens, | |
| asst_start_tokens=asst_start_tokens, | |
| asst_end_tokens=asst_end_tokens, | |
| allow_system_prompt=False, | |
| bos_token=bos_token, | |
| ) | |
| self._image_processor = QwenImageProcessor(img_size=image_size) | |