| | |
| |
|
| | import os |
| | os.environ["OMP_NUM_THREADS"] = "1" |
| | os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128" |
| |
|
| | import gradio as gr |
| | import spaces |
| | from transformers import pipeline |
| | import torch |
| | import gc |
| | import re |
| |
|
| | |
| | def clean_asr_text(text: str) -> str: |
| | if not text: |
| | return "" |
| | |
| | |
| | text = re.sub(r'<[^>]+>', '', text) |
| | text = re.sub(r'\[.*?\]', '', text) |
| | |
| | |
| | text = re.sub(r'(?i)\b(unk|hik|laughter|music|cough|applause|noise|background)\b', '', text) |
| | |
| | |
| | text = re.sub(r'\s+', ' ', text) |
| | text = re.sub(r'^\s+|\s+$', '', text) |
| | text = text.replace(' ,', ',').replace(' .', '.').replace(' ?', '?').replace(' !', '!') |
| | text = re.sub(r' +([.,!?])', r'\1', text) |
| | |
| | return text.strip() |
| |
|
| | |
| | |
| | |
| | @spaces.GPU(duration=180) |
| | def transcribe_3min(audio_path): |
| | if not audio_path: |
| | return "Hlaðið upp hljóðskrá" |
| |
|
| | pipe = pipeline( |
| | "automatic-speech-recognition", |
| | model="palli23/whisper-tiny-distilled-spjallromur-polish-v5", |
| | torch_dtype=torch.float16, |
| | device=0, |
| | ) |
| |
|
| | try: |
| | result = pipe( |
| | audio_path, |
| | chunk_length_s=30, |
| | batch_size=8, |
| | return_timestamps=False, |
| | generate_kwargs={ |
| | "num_beams": 5, |
| | "repetition_penalty": 1.3, |
| | "no_repeat_ngram_size": 4, |
| | "temperature": 0.0, |
| | "suppress_tokens": [-1], |
| | "max_new_tokens": 444, |
| | } |
| | ) |
| |
|
| | raw_text = result.get("text", "") |
| | cleaned = clean_asr_text(raw_text) |
| |
|
| | except Exception as e: |
| | cleaned = f"Villa við umritun: {str(e)}" |
| |
|
| | |
| | del pipe |
| | gc.collect() |
| | torch.cuda.empty_cache() |
| |
|
| | return cleaned or "(ekkert texti fannst eða villa kom upp)" |
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as demo: |
| | gr.Markdown("# Íslenskt ASR – 3 mínútur (hreinsuð útgáfa)") |
| | gr.Markdown( |
| | "**Model:** palli23/whisper-tiny-distilled-spjallromur-polish-v5 \n" |
| | "**Stillingar:** no timestamps, temperature=0.0, repetition_penalty=1.3, no_repeat_ngram_size=4 \n" |
| | "Reynir að fjarlægja <UNK>, [HIK...], [laughter] o.s.frv." |
| | ) |
| | gr.Markdown("**Hafa samband:** pallinr1@protonmail.com") |
| |
|
| | audio_in = gr.Audio( |
| | type="filepath", |
| | label="Hlaðið upp .mp3 / .wav / .m4a (allt að ~5 mín)", |
| | format="mp3" |
| | ) |
| | |
| | btn = gr.Button("Umrita", variant="primary", size="lg") |
| | |
| | output = gr.Textbox( |
| | lines=25, |
| | label="Útskrift (hreinsuð)", |
| | placeholder="Hér kemur textinn..." |
| | ) |
| |
|
| | examples = gr.Examples( |
| | examples=[ |
| | ["example_clip_14nov2025.mp3"], |
| | ], |
| | inputs=audio_in, |
| | label="Dæmi (ef þú hefur sett upp dæmi skrá)" |
| | ) |
| |
|
| | btn.click( |
| | fn=transcribe_3min, |
| | inputs=audio_in, |
| | outputs=output |
| | ) |
| |
|
| | |
| | |
| | |
| | if __name__ == "__main__": |
| | demo.launch( |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | share=False, |
| | debug=False |
| | ) |