| import spaces |
| import os |
| import shutil |
| import threading |
| import time |
| import sys |
|
|
| from huggingface_hub import snapshot_download |
|
|
| current_dir = os.path.dirname(os.path.abspath(__file__)) |
| sys.path.append(current_dir) |
| sys.path.append(os.path.join(current_dir, "indextts")) |
|
|
| import gradio as gr |
| from indextts.infer import IndexTTS |
| from tools.i18n.i18n import I18nAuto |
|
|
| i18n = I18nAuto(language="zh_CN") |
| MODE = 'local' |
| snapshot_download("IndexTeam/IndexTTS-1.5",local_dir="checkpoints",) |
| tts = IndexTTS(model_dir="checkpoints", cfg_path="checkpoints/config.yaml") |
|
|
| os.makedirs("outputs/tasks",exist_ok=True) |
| os.makedirs("prompts",exist_ok=True) |
|
|
| @spaces.GPU |
| def infer(voice, text,output_path=None): |
| if not tts: |
| raise Exception("Model not loaded") |
| if not output_path: |
| output_path = os.path.join("outputs", f"spk_{int(time.time())}.wav") |
| tts.infer(voice, text, output_path) |
| return output_path |
|
|
| def gen_single(prompt, text): |
| output_path = infer(prompt, text) |
| return gr.update(value=output_path,visible=True) |
|
|
| def update_prompt_audio(): |
| update_button = gr.update(interactive=True) |
| return update_button |
|
|
|
|
| with gr.Blocks() as demo: |
| mutex = threading.Lock() |
| gr.HTML(''' |
| <h2><center>Echo AI : High-Fidelity,Controllable, and Zero-Shot Text-to-Speech and voice cloning for the Real World</center></h2> |
| |
| <p align="center"> |
| <a href='https://arxiv.org/abs/2502.05512'><img src='https://img.shields.io/badge/ArXiv-2502.05512-red'></a> |
| |
| ''') |
| with gr.Tab("Voice cloning and audio generation"): |
| with gr.Row(): |
| os.makedirs("prompts",exist_ok=True) |
| prompt_audio = gr.Audio(label="Please upload reference audio",key="prompt_audio", |
| sources=["upload","microphone"],type="filepath") |
| prompt_list = os.listdir("prompts") |
| default = '' |
| if prompt_list: |
| default = prompt_list[0] |
| input_text_single = gr.Textbox(label="Please enter target text",key="input_text_single") |
| gen_button = gr.Button("Generate speech",key="gen_button",interactive=True) |
| output_audio = gr.Audio(label="Generate results", visible=False,key="output_audio") |
|
|
| prompt_audio.upload(update_prompt_audio, |
| inputs=[], |
| outputs=[gen_button]) |
|
|
| gen_button.click(gen_single, |
| inputs=[prompt_audio, input_text_single], |
| outputs=[output_audio]) |
|
|
|
|
| def main(): |
| tts.load_normalizer() |
| demo.queue(20) |
| demo.launch(server_name="0.0.0.0",share=True) |
|
|
| if __name__ == "__main__": |
| main() |
|
|
|
|