Upload tokenizer
Browse files- .gitattributes +1 -0
- chat_template.jinja +6 -0
- tokenizer.json +3 -0
- tokenizer_config.json +12 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% set has_system = messages[0]['role'] == 'system' %}{% if not has_system %}{{ bos_token + 'system
|
| 2 |
+
You are an AI programming assistant, utilizing the Seed-Coder model, developed by ByteDance Seed, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.
|
| 3 |
+
|
| 4 |
+
' + eos_token }}{% endif %}{% for message in messages %}{{ bos_token + message['role'] + '
|
| 5 |
+
' + message['content'] | trim + eos_token }}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant
|
| 6 |
+
'}}{% endif %}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2128ee20a31a0044f8c974670003040103aaef6c5381db2d815908f5ea2f5330
|
| 3 |
+
size 11891784
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"backend": "tokenizers",
|
| 3 |
+
"bos_token": "<[begin▁of▁sentence]>",
|
| 4 |
+
"clean_up_tokenization_spaces": false,
|
| 5 |
+
"eos_token": "<[end▁of▁sentence]>",
|
| 6 |
+
"is_local": false,
|
| 7 |
+
"mask_token": "<[MASK_TOKEN]>",
|
| 8 |
+
"model_max_length": 32768,
|
| 9 |
+
"pad_token": "<[PAD▁TOKEN]>",
|
| 10 |
+
"sep_token": "<[SEP▁TOKEN]>",
|
| 11 |
+
"tokenizer_class": "TokenizersBackend"
|
| 12 |
+
}
|