| { |
| "version": "1.0", |
| "truncation": { |
| "direction": "Right", |
| "max_length": 2048, |
| "strategy": "LongestFirst", |
| "stride": 0 |
| }, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "<|endoftext|>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "<|padding|>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": { |
| "type": "NFC" |
| }, |
| "pre_tokenizer": { |
| "type": "ByteLevel", |
| "add_prefix_space": false, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "post_processor": { |
| "type": "ByteLevel", |
| "add_prefix_space": false, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "decoder": { |
| "type": "ByteLevel", |
| "add_prefix_space": false, |
| "trim_offsets": true, |
| "use_regex": true |
| }, |
| "model": { |
| "type": "BPE", |
| "dropout": null, |
| "unk_token": null, |
| "continuing_subword_prefix": null, |
| "end_of_word_suffix": true, |
| "fuse_unk": false, |
| "byte_fallback": false, |
| "vocab": { |
| "<|endoftext|>": 0, |
| "<|padding|>": 1, |
| "a": 2, |
| "c": 3, |
| "g": 4, |
| "t": 5, |
| "n": 6, |
| "y":7 |
| |
| } |
| } |
| } |