training-tts / app.py
f15hb0wn's picture
Upload app.py with huggingface_hub
f8fcbb0 verified
"""
WitFoo Training TTS — HuggingFace Space
Multilingual text-to-speech using Microsoft Edge TTS neural voices.
Supports: en, es, fr, de, ja, ar
"""
import gradio as gr
import asyncio
import tempfile
import os
import edge_tts
# High-quality neural voices per language
VOICES = {
"en": "en-US-GuyNeural",
"es": "es-MX-JorgeNeural",
"fr": "fr-FR-HenriNeural",
"de": "de-DE-ConradNeural",
"ja": "ja-JP-KeitaNeural",
"ar": "ar-SA-HamedNeural",
}
# Female voice alternatives
VOICES_FEMALE = {
"en": "en-US-JennyNeural",
"es": "es-MX-DaliaNeural",
"fr": "fr-FR-DeniseNeural",
"de": "de-DE-KatjaNeural",
"ja": "ja-JP-NanamiNeural",
"ar": "ar-SA-ZariyahNeural",
}
SUPPORTED_LANGUAGES = list(VOICES.keys())
LANG_NAMES = {"en": "English", "es": "Spanish", "fr": "French", "de": "German", "ja": "Japanese", "ar": "Arabic"}
async def _generate_async(text: str, voice: str, output_path: str) -> None:
"""Generate speech using Edge TTS."""
communicate = edge_tts.Communicate(text, voice, rate="-5%")
await communicate.save(output_path)
def _run_async(coro):
"""Run async coroutine, handling both fresh and existing event loops."""
try:
loop = asyncio.get_running_loop()
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor() as pool:
future = pool.submit(asyncio.run, coro)
return future.result()
except RuntimeError:
return asyncio.run(coro)
def generate_speech(text: str, language: str, voice_type: str) -> str:
"""Generate speech in the specified language."""
if not text.strip():
raise gr.Error("Text cannot be empty")
if language not in SUPPORTED_LANGUAGES:
raise gr.Error(f"Unsupported language: {language}")
voices = VOICES_FEMALE if voice_type == "Female" else VOICES
voice = voices[language]
output_path = tempfile.mktemp(suffix=".mp3")
_run_async(_generate_async(text, voice, output_path))
return output_path
def batch_generate(texts: str, language: str, voice_type: str):
"""Generate speech for multiple segments (||| separated)."""
segments = [t.strip() for t in texts.split("|||") if t.strip()]
if not segments:
raise gr.Error("No text segments found. Separate with |||")
voices = VOICES_FEMALE if voice_type == "Female" else VOICES
voice = voices.get(language, VOICES["en"])
results = []
for i, segment in enumerate(segments):
print(f"[{i+1}/{len(segments)}] {segment[:60]}...")
output_path = tempfile.mktemp(suffix=f"_seg{i+1:03d}.mp3")
_run_async(_generate_async(segment, voice, output_path))
results.append(output_path)
return results
# Gradio interface
with gr.Blocks(title="WitFoo Training TTS", theme=gr.themes.Base()) as demo:
gr.Markdown("# WitFoo Training TTS")
gr.Markdown("Generate multilingual voiceover for training courses using neural TTS voices.")
with gr.Tab("Single Generation"):
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Narration Text", lines=8,
placeholder="Enter narration text to convert to speech...")
with gr.Row():
lang_input = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
voice_input = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")
with gr.Column():
audio_output = gr.Audio(label="Generated Speech", type="filepath")
generate_btn.click(fn=generate_speech, inputs=[text_input, lang_input, voice_input], outputs=audio_output)
with gr.Tab("Batch Generation"):
gr.Markdown("Separate text segments with `|||` for batch processing.")
batch_text = gr.Textbox(label="Texts (||| separated)", lines=12,
placeholder="First paragraph...\n|||\nSecond paragraph...\n|||\nThird paragraph...")
with gr.Row():
batch_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
batch_voice = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
batch_btn = gr.Button("Generate All Segments", variant="primary", size="lg")
batch_output = gr.File(label="Generated Audio Files", file_count="multiple")
batch_btn.click(fn=batch_generate, inputs=[batch_text, batch_lang, batch_voice], outputs=batch_output)
gr.Markdown("---")
gr.Markdown("**Voices:** " + " | ".join([f"{LANG_NAMES[k]}: {v}" for k, v in VOICES.items()]))
gr.Markdown("*WitFoo Training Program — 9 certification courses, 6 languages, 178 lessons*")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)