| | |
| | import os |
| | os.environ["OMP_NUM_THREADS"] = "1" |
| | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
| |
|
| | import gradio as gr |
| | import spaces |
| | from transformers import pipeline |
| |
|
| | |
| | |
| | |
| | MODEL_NAME = "palli23/whisper-large-v3-is-samromur-20-40s-3x-final" |
| |
|
| | @spaces.GPU(duration=180) |
| | def get_pipe(): |
| | return pipeline( |
| | "automatic-speech-recognition", |
| | model=MODEL_NAME, |
| | torch_dtype="float16", |
| | device=0, |
| | token=os.getenv("HF_TOKEN"), |
| | ) |
| |
|
| | pipe = get_pipe() |
| |
|
| | |
| | |
| | |
| | def transcribe_3min(audio_path): |
| | if not audio_path: |
| | return "Hladdu upp hljóðskrá" |
| | |
| | result = pipe( |
| | audio_path, |
| | chunk_length_s=30, |
| | stride_length_s=(6, 0), |
| | batch_size=8, |
| | return_timestamps=False, |
| | ) |
| | return result["text"] |
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# Íslenskt ASR – 3 mínútur") |
| | gr.Markdown("**Whisper Large · Enn lægra WER (18% spjallrómur 5% samrómur) · allt að 5 mín hljóð**") |
| | gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
| |
|
| | audio_in = gr.Audio( |
| | type="filepath", |
| | label="Hladdu upp .mp3 / .wav (max 5 mín)" |
| | ) |
| | btn = gr.Button("Transcribe", variant="primary", size="lg") |
| | output = gr.Textbox(lines=30, label="Útskrift") |
| |
|
| | btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output) |
| |
|
| | |
| | |
| | |
| | demo.launch( |
| | auth=None, |
| | share=True, |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | show_error=True, |
| | quiet=False |
| | ) |