| import os |
| import pytest |
|
|
| from tests.utils import wrap_test_forked |
|
|
|
|
| @pytest.mark.skipif(not os.getenv('BENCHMARK'), |
| reason="Only valid on sufficiently large system and not normal part of testing." |
| " Instead used to get eval scores for all models.") |
| @pytest.mark.parametrize( |
| "base_model", |
| [ |
| "h2oai/h2ogpt-oasst1-falcon-40b", |
| "h2oai/h2ogpt-oig-oasst1-512-6_9b", |
| "h2oai/h2ogpt-oig-oasst1-512-12b", |
| "h2oai/h2ogpt-oig-oasst1-512-20b", |
| "h2oai/h2ogpt-oasst1-512-12b", |
| "h2oai/h2ogpt-oasst1-512-20b", |
| "h2oai/h2ogpt-gm-oasst1-en-1024-20b", |
| "databricks/dolly-v2-12b", |
| "h2oai/h2ogpt-gm-oasst1-en-2048-open-llama-7b-preview-300bt-v2", |
| "ehartford/WizardLM-7B-Uncensored", |
| "ehartford/WizardLM-13B-Uncensored", |
| "AlekseyKorshuk/vicuna-7b", |
| "TheBloke/stable-vicuna-13B-HF", |
| "decapoda-research/llama-7b-hf", |
| "decapoda-research/llama-13b-hf", |
| "decapoda-research/llama-30b-hf", |
| "junelee/wizard-vicuna-13b", |
| "openaccess-ai-collective/wizard-mega-13b", |
| ] |
| ) |
| @wrap_test_forked |
| def test_score_eval(base_model): |
| from src.gen import main |
| main( |
| base_model=base_model, |
| chat=False, |
| stream_output=False, |
| gradio=False, |
| eval_prompts_only_num=500, |
| eval_as_output=False, |
| num_beams=2, |
| use_gpu_id=False, |
| ) |
|
|
|
|
| @pytest.mark.skipif(not os.getenv('FALCONS'), reason="download purpose") |
| @pytest.mark.parametrize( |
| "base_model", |
| [ |
| "OpenAssistant/falcon-7b-sft-top1-696", |
| "OpenAssistant/falcon-7b-sft-mix-2000", |
| "h2oai/h2ogpt-oasst1-falcon-40b", |
| "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-40b-v1", |
| "h2oai/h2ogpt-gm-oasst1-en-2048-falcon-7b-v2", |
| "h2oai/h2ogpt-gm-oasst1-multilang-2048-falcon-7b", |
| "OpenAssistant/falcon-40b-sft-top1-560", |
| "OpenAssistant/falcon-40b-sft-mix-1226", |
| ] |
| ) |
| @wrap_test_forked |
| def test_get_falcons(base_model): |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
| t = AutoTokenizer.from_pretrained(base_model, |
| use_fast=False, |
| padding_side="left", |
| trust_remote_code=True, |
| token=True, |
| ) |
| assert t is not None |
| m = AutoModelForCausalLM.from_pretrained(base_model, |
| trust_remote_code=True, |
| torch_dtype=torch.float16, |
| token=True, |
| ) |
| assert m is not None |
|
|
|
|
| @pytest.mark.skipif(not os.getenv('LLAMA'), reason="LLaMa conversion") |
| @wrap_test_forked |
| def test_get_landmark_llama(): |
| import torch |
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| from transformers import LlamaForCausalLM, LlamaTokenizer |
| m = LlamaForCausalLM.from_pretrained("epfml/landmark-attention-llama7b-wdiff") |
| t = LlamaTokenizer.from_pretrained("epfml/landmark-attention-llama7b-wdiff") |
| assert m is not None and t is not None |
|
|
| os.system(""" |
| # |
| # step 1, convert llama to HF format |
| pip install protobuf==3.19.0 |
| source ~/.bashrc.mamba |
| mamba create -n trans |
| conda activate trans |
| conda install python=3.10 -y |
| |
| git clone https://github.com/epfml/landmark-attention.git |
| pip install fire datasets |
| git clone https://github.com/huggingface/transformers.git |
| cd transformers |
| pip install . |
| pip install torch accelerate sentencepiece protobuf==3.19.0 |
| # below requires LLaMa weights |
| python src/transformers/models/llama/convert_llama_weights_to_hf.py --input_dir /data/jon/LLaMA --model_size 7B --output_dir llama_7B |
| # |
| # step 2, make landmark model (change hash if updated) |
| mkdir -p epfml/landmark-attention-llama7b-wdiff |
| cd epfml/landmark-attention-llama7b-wdiff |
| ln -s ~/.cache/huggingface/hub/models--epfml--landmark-attention-llama7b-wdiff/snapshots/050562871ac72723b4ab674f0392b02cd9609842/* . |
| cd ../../ |
| python ../landmark-attention/llama/weight_diff.py recover --path_raw llama_7B --path_diff epfml/landmark-attention-llama7b-wdiff --path_tuned landmark_llama_7b |
| """) |
|
|