from transformers.tokenization_utils_fast import PreTrainedTokenizerFast from .processing import BaseProcessor, QwenImageProcessor class Helium1CASAProcessor(BaseProcessor): attributes = ["tokenizer"] tokenizer_class = "PreTrainedTokenizerFast" def __init__( self, tokenizer: PreTrainedTokenizerFast, pre_image_tokens: tuple[int, ...] = tuple(), post_image_tokens: tuple[int, ...] = tuple(), system_start_tokens: tuple[int, ...] = tuple(), system_end_tokens: tuple[int, ...] = tuple(), user_start_tokens: tuple[int, ...] = (104,), user_end_tokens: tuple[int, ...] = (105,), asst_start_tokens: tuple[int, ...] = (102,), asst_end_tokens: tuple[int, ...] = (103,), bos_token: int = 1, image_size: int = 896, ): super().__init__( tokenizer=tokenizer, pre_image_tokens=pre_image_tokens, post_image_tokens=post_image_tokens, system_start_tokens=system_start_tokens, system_end_tokens=system_end_tokens, user_start_tokens=user_start_tokens, user_end_tokens=user_end_tokens, asst_start_tokens=asst_start_tokens, asst_end_tokens=asst_end_tokens, allow_system_prompt=False, bos_token=bos_token, ) self._image_processor = QwenImageProcessor(img_size=image_size)