palli23 commited on
Commit
a39d532
·
verified ·
1 Parent(s): 060c793

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py — Your original working version + repetition_penalty=1.2 + ngram=3
2
 
3
  import os
4
  os.environ["OMP_NUM_THREADS"] = "1"
@@ -9,6 +9,21 @@ import spaces
9
  from transformers import pipeline
10
  import torch
11
  import gc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # ——————————————————————————————
14
  # ZeroGPU worker – model loaded inside
@@ -19,13 +34,8 @@ def transcribe_3min(audio_path):
19
  return "Hlaðið upp hljóðskrá"
20
 
21
  pipe = pipeline(
22
- "automatic-speech-recognition",
23
- #model="palli23/whisper-tiny-icelandic-distilled-v3",
24
- #model = "palli23/whisper-tiny-distilled-spjallromur-polish-v3",
25
- model = "palli23/whisper-tiny-distilled-spjallromur-polish-v5",
26
- #model="palli23/whisper-tiny-distilled-samromur-spjallromur-polish",
27
- #model="palli23/whisper-tiny-samromur-spjallromur",
28
- #model="palli23/whisper-small-sam_spjall",
29
  torch_dtype=torch.float16,
30
  device=0, # GPU inside @spaces.GPU
31
  )
@@ -34,33 +44,37 @@ def transcribe_3min(audio_path):
34
  audio_path,
35
  chunk_length_s=30,
36
  batch_size=8,
37
- return_timestamps=False, # no timestamps, as you want
38
  generate_kwargs={
39
- "num_beams": 5, #var beam size 1
40
- "repetition_penalty": 1.2, # ← exactly what you asked for
41
- "no_repeat_ngram_size": 3, # ← exactly what you asked for
42
- "temperature": 0.3, #when problems, temp to 0.0 first!
 
43
  }
44
  )
45
 
46
- # Clean memory so ZeroGPU lives forever
 
 
 
47
  del pipe
48
  gc.collect()
49
  torch.cuda.empty_cache()
50
 
51
- return result["text"]
52
 
53
  # ——————————————————————————————
54
  # UI – clean and simple
55
  # ——————————————————————————————
56
  with gr.Blocks() as demo:
57
- gr.Markdown("# Íslenskt ASR – 3 mínútur")
58
- gr.Markdown("**palli23/whisper-small-sam_spjall** · mjög lágur WER · allt 5 mín hljóð")
59
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
60
 
61
  audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
62
  btn = gr.Button("Transcribe", variant="primary", size="lg")
63
- output = gr.Textbox(lines=25, label="Útskrift")
64
 
65
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
66
 
 
1
+ # app.py — Cleaned output version (no <UNK>, [HIK:...], etc.)
2
 
3
  import os
4
  os.environ["OMP_NUM_THREADS"] = "1"
 
9
  from transformers import pipeline
10
  import torch
11
  import gc
12
+ import re
13
+
14
+ # Simple post-processing to remove noise tokens
15
+ def clean_asr_text(text: str) -> str:
16
+ if not text:
17
+ return text
18
+
19
+ # Remove common Whisper noise patterns
20
+ text = re.sub(r'<[^>]+>', '', text) # <UNK>, < |0.00| > etc.
21
+ text = re.sub(r'\[.*?\]', '', text) # [HIK:xxx], [laughter] etc.
22
+ text = re.sub(r'\s+', ' ', text) # normalize spaces
23
+ text = re.sub(r'^\s+|\s+$', '', text) # strip leading/trailing
24
+ text = text.replace(' ,', ',').replace(' .', '.') # fix spacing around punctuation
25
+ text = re.sub(r' +([.,!?])', r'\1', text) # no space before punctuation
26
+ return text.strip()
27
 
28
  # ——————————————————————————————
29
  # ZeroGPU worker – model loaded inside
 
34
  return "Hlaðið upp hljóðskrá"
35
 
36
  pipe = pipeline(
37
+ "automatic-speech-recognition",
38
+ model="palli23/whisper-tiny-distilled-spjallromur-polish-v5", # your current best model
 
 
 
 
 
39
  torch_dtype=torch.float16,
40
  device=0, # GPU inside @spaces.GPU
41
  )
 
44
  audio_path,
45
  chunk_length_s=30,
46
  batch_size=8,
47
+ return_timestamps="word", # often gives cleaner raw text than False
48
  generate_kwargs={
49
+ "num_beams": 5,
50
+ "repetition_penalty": 1.2,
51
+ "no_repeat_ngram_size": 3,
52
+ "temperature": 0.2, # lower less creative garbage
53
+ "suppress_tokens": [-1], # sometimes helps suppress <unk> (-1 = unk token)
54
  }
55
  )
56
 
57
+ raw_text = result["text"]
58
+ cleaned = clean_asr_text(raw_text)
59
+
60
+ # Clean memory
61
  del pipe
62
  gc.collect()
63
  torch.cuda.empty_cache()
64
 
65
+ return cleaned
66
 
67
  # ——————————————————————————————
68
  # UI – clean and simple
69
  # ——————————————————————————————
70
  with gr.Blocks() as demo:
71
+ gr.Markdown("# Íslenskt ASR – 3 mínútur (hreinsuð útgáfa)")
72
+ gr.Markdown("**palli23/whisper-tiny-distilled-spjallromur-polish-v5** · reynirfjarlægja <UNK>, [HIK...], osfrv.")
73
  gr.Markdown("**Hafa samband:** pallinr1@protonmail.com")
74
 
75
  audio_in = gr.Audio(type="filepath", label="Hlaðið upp .mp3 / .wav")
76
  btn = gr.Button("Transcribe", variant="primary", size="lg")
77
+ output = gr.Textbox(lines=25, label="Útskrift (hreinsuð)")
78
 
79
  btn.click(fn=transcribe_3min, inputs=audio_in, outputs=output)
80