File size: 1,423 Bytes
2ec00ae | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
from .processing import BaseProcessor, QwenImageProcessor
class Helium1CASAProcessor(BaseProcessor):
attributes = ["tokenizer"]
tokenizer_class = "PreTrainedTokenizerFast"
def __init__(
self,
tokenizer: PreTrainedTokenizerFast,
pre_image_tokens: tuple[int, ...] = tuple(),
post_image_tokens: tuple[int, ...] = tuple(),
system_start_tokens: tuple[int, ...] = tuple(),
system_end_tokens: tuple[int, ...] = tuple(),
user_start_tokens: tuple[int, ...] = (104,),
user_end_tokens: tuple[int, ...] = (105,),
asst_start_tokens: tuple[int, ...] = (102,),
asst_end_tokens: tuple[int, ...] = (103,),
bos_token: int = 1,
image_size: int = 896,
):
super().__init__(
tokenizer=tokenizer,
pre_image_tokens=pre_image_tokens,
post_image_tokens=post_image_tokens,
system_start_tokens=system_start_tokens,
system_end_tokens=system_end_tokens,
user_start_tokens=user_start_tokens,
user_end_tokens=user_end_tokens,
asst_start_tokens=asst_start_tokens,
asst_end_tokens=asst_end_tokens,
allow_system_prompt=False,
bos_token=bos_token,
)
self._image_processor = QwenImageProcessor(img_size=image_size)
|