| from pathlib import Path |
| import csv |
| import time |
| from funasr import AutoModel |
| from funasr_onnx import SeacoParaformer, CT_Transformer, Fsmn_vad |
|
|
|
|
| model_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models") |
|
|
| def save_csv(file_path, rows): |
| with open(file_path, "w", encoding="utf-8") as f: |
| writer = csv.writer(f) |
| writer.writerows(rows) |
| print(f"write csv to {file_path}") |
|
|
| def export_onnx(): |
| asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
| vad_model_path = model_dir / '/speech_fsmn_vad_zh-cn-16k-common-pytorch' |
| punc_model_path = model_dir / '/punc_ct-transformer_cn-en-common-vocab471067-large' |
|
|
| model = AutoModel(model=asr_model_path) |
| output = model.export(type="onnx", quantize=True, disable_update=True) |
| print(output) |
|
|
| model = AutoModel(model=vad_model_path) |
| output = model.export(type="onnx", quantize=True, disable_update=True) |
| print(output) |
|
|
| model = AutoModel(model=punc_model_path) |
| output = model.export(type="onnx", quantize=True, disable_update=True) |
| print(output) |
|
|
| def run_funasr(): |
| asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
| vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' |
| punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' |
| t0 = time.time() |
| model = AutoModel( |
| model=asr_model_path.as_posix(), |
| vad_model=vad_model_path.as_posix(), |
| punc_model=punc_model_path.as_posix(), |
| log_level="ERROR", |
| disable_update=True |
| ) |
| t1 = time.time() |
| print("load model: ", t1 - t0) |
| audios = Path("/test_data/audio_clips") |
| rows = [["file_name", "inference_time", "inference_result"]] |
| for audio in sorted(audios.glob("Chinese-mayun-part2.mp3")): |
| print(audio) |
| t1 = time.time() |
| try: |
| result = model.generate(input=str(audio), disable_pbar=True, |
| hotword="") |
| except Exception as e: |
| print(audio) |
| print(e) |
| t2 = time.time() |
| t = t2 - t1 |
| print("inference time:", t) |
| text = result[0]["text"] |
| print("inference result", text) |
| rows.append([f"{audio.parent.name}/{audio.name}", t, text]) |
| save_csv(f"run_funasr.csv", rows) |
|
|
| def run_onnx(quant=True): |
| asr_model_path = model_dir / 'speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch' |
| vad_model_path = model_dir / 'speech_fsmn_vad_zh-cn-16k-common-pytorch' |
| punc_model_path = model_dir / 'punc_ct-transformer_cn-en-common-vocab471067-large' |
| t0 = time.time() |
| vad_model = Fsmn_vad(vad_model_path, quantize=quant) |
| asr_model = SeacoParaformer(asr_model_path, quantize=quant) |
| punc_model = CT_Transformer(punc_model_path, quantize=quant) |
| t1 = time.time() |
| print("load model: ", t1 - t0) |
| audios = Path("/Users/moyoyo/code/tests/audios") |
| rows = [["file_name", "inference_time", "inference_result"]] |
| for audio in sorted(audios.glob("*s/*.wav")): |
| t1 = time.time() |
| vad_res = vad_model(str(audio)) |
| t2 = time.time() |
| |
| asr_res = asr_model(str(audio), hotwords="") |
| asr_text = asr_res[0]["preds"] |
| t3 = time.time() |
| |
| |
| result = punc_model(asr_text) |
| text = result[0] |
| t4 = time.time() |
| |
| |
| print(text) |
| t = t4 - t1 |
| print("inference time:", t) |
| rows.append([f"{audio.parent.name}/{audio.name}", t, text]) |
| file_name = "run_quant.csv" if quant else "run_onnx.csv" |
| save_csv(file_name, rows) |
|
|
| if __name__ == '__main__': |
| run_funasr() |