| import os |
| import tempfile |
| import datetime |
|
|
| import gradio as gr |
| import torch |
| import whisperx |
| from whisperx.diarize import DiarizationPipeline |
|
|
| |
| device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
| |
| asr_model = whisperx.load_model( |
| "small", |
| device=device, |
| compute_type="int8" |
| ) |
|
|
| |
| align_model, metadata = whisperx.load_align_model( |
| language_code="ru", |
| device=device |
| ) |
|
|
| |
| hf_token = os.getenv("HUGGINGFACEHUB_API_TOKEN", None) |
| diarization_pipeline = DiarizationPipeline( |
| use_auth_token=hf_token, |
| device=device |
| ) |
|
|
| def transcribe_with_diarization(audio_path): |
| |
| result = asr_model.transcribe(audio_path, language="ru", batch_size=16) |
|
|
| |
| aligned = whisperx.align( |
| result["segments"], |
| align_model, |
| metadata, |
| audio_path, |
| device |
| ) |
|
|
| |
| diarization = diarization_pipeline(audio_path) |
|
|
| |
| merged = whisperx.assign_word_speakers(diarization, aligned)["segments"] |
|
|
| |
| lines = [] |
| for seg in merged: |
| spk = seg.get("speaker", "Speaker") |
| txt = seg.get("text", "").strip() |
| lines.append(f"[{spk}] {txt}") |
| return "\n".join(lines) |
|
|
| def export_to_txt(text): |
| |
| timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") |
| filename = f"transcript_{timestamp}.txt" |
| path = os.path.join(tempfile.gettempdir(), filename) |
| with open(path, "w", encoding="utf-8") as f: |
| f.write(text) |
| return path |
|
|
| |
| app = gr.Blocks(title="🎙️ DiarAI: Транскрибация и диаризация (RU)") |
|
|
| with app: |
| gr.Markdown(""" |
| ## Транскрибация и диаризация (русский язык) |
| - Фиксированный язык распознавания: **ru** для повышения скорости. |
| - Диаризация спикеров через Pyannote. |
| """) |
|
|
| audio_input = gr.Audio(type="filepath", label="Загрузите аудио (только RU)") |
| transcribe_btn = gr.Button("▶️ Транскрибировать") |
| output_txt = gr.Textbox(label="Результат транскрипции", lines=20) |
| save_btn = gr.Button("💾 Экспорт в .txt") |
| download_file = gr.File(label="Скачать результат") |
|
|
| transcribe_btn.click( |
| fn=transcribe_with_diarization, |
| inputs=audio_input, |
| outputs=output_txt |
| ) |
| save_btn.click( |
| fn=export_to_txt, |
| inputs=output_txt, |
| outputs=download_file |
| ) |
|
|
| if __name__ == "__main__": |
| app.launch() |