| | import subprocess |
| | import shlex |
| | import torch |
| | from transformers import LlamaConfig, LlamaForCausalLM, LlamaTokenizerFast |
| |
|
| |
|
| | mname_from = "meta-llama/Llama-2-7b-hf" |
| | mname_tiny = "tiny-llama-2-debug" |
| | vocab_keep_items = 3000 |
| |
|
| | config = LlamaConfig.from_pretrained(mname_from) |
| | config.update(dict( |
| | hidden_size=16, |
| | intermediate_size=64, |
| | num_attention_heads=4, |
| | num_hidden_layers=2, |
| | max_position_embeddings=256, |
| | num_key_value_heads=4, |
| | vocab_size=vocab_keep_items, |
| | )) |
| | print("new config", config) |
| |
|
| | |
| | tiny_model = LlamaForCausalLM(config) |
| | print(f"num of params {tiny_model.num_parameters()}") |
| |
|
| | |
| | tiny_model.save_pretrained(mname_tiny) |
| |
|
| | |
| | tokenizer_fast = LlamaTokenizerFast.from_pretrained(mname_from) |
| | tmp_dir = f"/tmp/{mname_from}" |
| | tokenizer_fast.save_pretrained(tmp_dir) |
| | |
| | |
| | closing_pat = '},"merges": []}}' |
| | cmd = (f"perl -0777 -pi -e 's|({vocab_keep_items-1}).*|$1{closing_pat}|msg' {tmp_dir}/tokenizer.json") |
| | |
| | result = subprocess.run(shlex.split(cmd), capture_output=True, text=True) |
| | |
| |
|
| | |
| | tiny_tokenizer = LlamaTokenizerFast.from_pretrained(tmp_dir) |
| | tiny_tokenizer.save_pretrained(mname_tiny) |
| |
|
| | |
| | model_inputs = tiny_tokenizer("Making tiny model", return_tensors="pt") |
| | gen_tokens = tiny_model.generate(**model_inputs, max_new_tokens=100) |
| | print(tiny_tokenizer.batch_decode(gen_tokens, skip_special_tokens=True)) |
| | print("Random output should be expected, but no crashing") |
| |
|
| | print(f"Model+Tokenizer saved in {mname_tiny}") |
| |
|
| | |
| | tiny_model.push_to_hub(f"boom-project/{mname_tiny}") |
| | tiny_tokenizer.push_to_hub(f"boom-project/{mname_tiny}") |
| |
|
| | print(f"Model and tokenizer pushed to boom-project/{mname_tiny}") |