Image-Text-to-Text
Transformers
Safetensors
English
CASA_Helium1_VL_2B
custom_code
File size: 1,423 Bytes
2ec00ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast

from .processing import BaseProcessor, QwenImageProcessor


class Helium1CASAProcessor(BaseProcessor):
    attributes = ["tokenizer"]
    tokenizer_class = "PreTrainedTokenizerFast"

    def __init__(
        self,
        tokenizer: PreTrainedTokenizerFast,
        pre_image_tokens: tuple[int, ...] = tuple(),
        post_image_tokens: tuple[int, ...] = tuple(),
        system_start_tokens: tuple[int, ...] = tuple(),
        system_end_tokens: tuple[int, ...] = tuple(),
        user_start_tokens: tuple[int, ...] = (104,),
        user_end_tokens: tuple[int, ...] = (105,),
        asst_start_tokens: tuple[int, ...] = (102,),
        asst_end_tokens: tuple[int, ...] = (103,),
        bos_token: int = 1,
        image_size: int = 896,
    ):
        super().__init__(
            tokenizer=tokenizer,
            pre_image_tokens=pre_image_tokens,
            post_image_tokens=post_image_tokens,
            system_start_tokens=system_start_tokens,
            system_end_tokens=system_end_tokens,
            user_start_tokens=user_start_tokens,
            user_end_tokens=user_end_tokens,
            asst_start_tokens=asst_start_tokens,
            asst_end_tokens=asst_end_tokens,
            allow_system_prompt=False,
            bos_token=bos_token,
        )
        self._image_processor = QwenImageProcessor(img_size=image_size)