| { |
| "version": "1.0", |
| "truncation": null, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "<|endoftext|>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "<|quantum|>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "<|classical|>", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": { |
| "type": "Sequence", |
| "normalizers": [ |
| { |
| "type": "NFC" |
| }, |
| { |
| "type": "Prepend", |
| "prepend": "▁" |
| } |
| ] |
| }, |
| "pre_tokenizer": { |
| "type": "Sequence", |
| "pretokenizers": [ |
| { |
| "type": "Split", |
| "pattern": { |
| "Regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" |
| }, |
| "behavior": "Isolated", |
| "invert": false |
| } |
| ] |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": [ |
| { |
| "SpecialToken": { |
| "id": "<|endoftext|>", |
| "ids": [0] |
| } |
| } |
| ], |
| "pair": [ |
| { |
| "SpecialToken": { |
| "id": "<|endoftext|>", |
| "ids": [0] |
| } |
| } |
| ], |
| "special_tokens": { |
| "<|endoftext|>": { |
| "id": 0, |
| "ids": [0] |
| }, |
| "<|quantum|>": { |
| "id": 1, |
| "ids": [1] |
| }, |
| "<|classical|>": { |
| "id": 2, |
| "ids": [2] |
| } |
| } |
| }, |
| "decoder": { |
| "type": "Sequence", |
| "decoders": [ |
| { |
| "type": "Replace", |
| "pattern": { |
| "String": "▁" |
| }, |
| "content": " " |
| } |
| ] |
| }, |
| "model": { |
| "type": "BPE", |
| "dropout": null, |
| "unk_token": null, |
| "continuing_subword_prefix": null, |
| "end_of_word_suffix": null, |
| "fuse_unk": false, |
| "byte_fallback": false, |
| "vocab": {}, |
| "merges": [] |
| } |
| } |