| | from mmengine.config import read_base |
| |
|
| | from opencompass.models import VLLM, HuggingFaceBaseModel, TurboMindModel |
| |
|
| | with read_base(): |
| | |
| | from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import \ |
| | gpqa_datasets |
| | from opencompass.configs.datasets.gsm8k.gsm8k_gen_17d0dc import \ |
| | gsm8k_datasets |
| | from opencompass.configs.datasets.race.race_ppl import \ |
| | race_datasets |
| | from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \ |
| | winogrande_datasets |
| |
|
| | from ...rjob import eval, infer |
| |
|
| | race_datasets = [race_datasets[1]] |
| | models = [ |
| | dict( |
| | type=TurboMindModel, |
| | abbr='qwen3-8b-base-turbomind', |
| | path='Qwen/Qwen3-0.6B-Base', |
| | engine_config=dict(max_batch_size=1, tp=1), |
| | gen_config=dict(top_k=1, |
| | temperature=1e-6, |
| | top_p=0.9, |
| | max_new_tokens=2048), |
| | max_seq_len=8192, |
| | max_out_len=2048, |
| | batch_size=1, |
| | run_cfg=dict(num_gpus=1), |
| | ), |
| | dict( |
| | type=VLLM, |
| | abbr='qwen3-8b-base-vllm', |
| | path='Qwen/Qwen3-0.6B-Base', |
| | model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.6), |
| | max_seq_len=8192, |
| | max_out_len=2048, |
| | batch_size=16, |
| | generation_kwargs=dict(temperature=0), |
| | run_cfg=dict(num_gpus=1), |
| | ), |
| | dict(type=HuggingFaceBaseModel, |
| | abbr='qwen3-8b-base-hf', |
| | path='Qwen/Qwen3-0.6B-Base', |
| | max_out_len=1024, |
| | batch_size=4, |
| | run_cfg=dict(num_gpus=1)) |
| | ] |
| | datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], []) |
| |
|
| | for d in datasets: |
| | d['reader_cfg']['test_range'] = '[0:32]' |
| |
|
| | for m in models: |
| | if 'turbomind' in m['abbr'] or 'lmdeploy' in m['abbr']: |
| | m['engine_config']['max_batch_size'] = 1 |
| | m['batch_size'] = 1 |
| | models = sorted(models, key=lambda x: x['run_cfg']['num_gpus']) |
| |
|
| | summarizer = dict( |
| | dataset_abbrs=[ |
| | ['gsm8k', 'accuracy'], |
| | ['GPQA_diamond', 'accuracy'], |
| | ['race-high', 'accuracy'], |
| | ['winogrande', 'accuracy'], |
| | ], |
| | summary_groups=sum( |
| | [v for k, v in locals().items() if k.endswith('_summary_groups')], []), |
| | ) |
| |
|