| import torch |
| import scipy |
| import os |
| import streamlit as st |
| import pandas as pd |
| from transformers import set_seed, pipeline |
| from transformers import VitsTokenizer, VitsModel |
| from datasets import load_dataset, Audio |
| from src import * |
|
|
| |
| |
|
|
| |
| |
| |
|
|
|
|
| |
| language_list = ['mos', 'fra', 'eng'] |
|
|
|
|
| st.title("Demo: Automated Tools for Mooré Language") |
| tts, stt, trans, lid, about = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID", "**About**"]) |
|
|
| |
| with tts: |
| |
| tts_text = st.text_area(label = "Please enter your text here:", value="", placeholder="ne y wĩndga") |
|
|
| tts_col1, tts_col2, = st.columns(2) |
|
|
| with tts_col1: |
| tts_lang = st.selectbox('Language of text', (language_list), format_func = decode_iso) |
| |
| |
|
|
| if st.button("Speak"): |
| st.divider() |
| with st.spinner(":rainbow[Synthesizing, please wait...]"): |
| synth = synthesize_facebook(tts_text, tts_lang) |
| st.audio(synth, sample_rate=16_000) |
|
|
|
|
| |
| with stt: |
|
|
| stt_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "stt_uploader") |
| stt_lang = st.selectbox("Please select the language:" , (language_list), format_func = decode_iso) |
|
|
|
|
| if st.button("Transcribe"): |
| st.divider() |
| with st.spinner(":rainbow[Received your file, please wait while I process it...]"): |
| stt = transcribe(stt_file, stt_lang) |
| ":violet[The transcription is:]" |
| ':violet[ "' + stt + '"]' |
|
|
| st.subheader("Examples") |
| "Using the supplied clips, here are the transcriptions:" |
| df = pd.read_csv("data/speech_to_text.csv") |
| df.columns = ['Clip ID', 'Spoken in Moore', 'Spoken in French', 'Transcription in Moore', 'Transcription in French'] |
| |
| df.set_index('Clip ID', inplace=True) |
| st.table(df[['Spoken in Moore', 'Transcription in Moore']]) |
| |
| st.table(df[['Spoken in French', 'Transcription in French']]) |
|
|
| |
| with trans: |
| |
| trans_text = st.text_area(label = "Please enter your translation text here:", value="", placeholder="ne y wĩndga") |
| |
| trans_col1, trans_col2 = st.columns(2) |
|
|
| with trans_col1: |
| src_lang = st.selectbox('Translate from:', (language_list), format_func = decode_iso) |
| with trans_col2: |
| target_lang = st.selectbox('Translate to:', (language_list), format_func = decode_iso, index=1) |
| |
| |
| |
| |
| |
| |
| |
| |
| if st.button("Translate"): |
| st.divider() |
| with st.spinner(":rainbow[Translating from " + decode_iso(src_lang) + " into " + decode_iso(target_lang) + ", please wait...]"): |
| translation = translate(trans_text, src_lang, target_lang) |
| translation |
|
|
|
|
|
|
| st.subheader("Examples") |
| "Using the supplied clips, here are the translations:" |
| df = pd.read_csv("data/translated_eng.csv", |
| usecols=['ID', 'French', 'Moore', 'English', |
| 'tr_meta_mos_fra', 'tr_meta_mos_eng', 'tr_meta_eng_mos', 'tr_meta_fra_mos']) |
| |
| df.columns = ['Clip ID', 'Original Moore', 'Original French', 'Original English', |
| 'Moore-English Translation', 'Moore-French Translation', |
| 'English-Moore Translation', 'French-Moore Translation'] |
| |
| df.set_index('Clip ID', inplace=True) |
| |
| st.table(df[['Original Moore', 'Moore-French Translation', 'Moore-English Translation']]) |
| st.table(df[['Original French', 'French-Moore Translation']]) |
| st.table(df[['Original English', 'English-Moore Translation']]) |
|
|
| |
| with lid: |
| langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader") |
|
|
| if st.button("Identify"): |
| st.divider() |
| with st.spinner(":rainbow[Received your file, please wait while I process it...]"): |
| lang = identify_language(langid_file) |
| lang = decode_iso(lang) |
| ":violet[The detected language is " + lang + "]" |
|
|
| st.subheader("Examples") |
| "Using the supplied clips, here are the recognized languages:" |
| df = pd.read_csv("data/language_id.csv") |
| df.columns = ['Clip ID', 'Language detected when speaking Mooré', 'Language detected when speaking French'] |
| df.set_index('Clip ID', inplace=True) |
| st.dataframe(df) |
|
|
|
|
| |
| |
|
|
| with about: |
| |
| st.markdown(''' |
| **Text to speech**, **speech to text**, and **language identification** capabilities are provided by Meta's [Massively Multilingual Speech (MMS)](https://ai.meta.com/blog/multilingual-model-speech-recognition/) model, which supports over 1000 languages.[^1] |
| |
| **Translation** capabilities are provided primarily by Meta's [No Language Left Behind (NLLB)](https://ai.meta.com/research/no-language-left-behind/) model, which supports translation between 200 languages.[^3] |
| We compare Meta's NLLB translations to two other translation alternatives. Masakhane, an African NLP initiative, offers endpoints for translations between Mooré and French.[^4] Helsinki NLP offers enpoints between Mooré and English, and one endpoint from French to Mooré.[^5] |
| |
| Facebook has since released [SeamlessM4T](https://huggingface.co/docs/transformers/main/model_doc/seamless_m4t) which also provides support for audio-to-audio translation, however, Mooré is not currently one of the included languages. |
| [^1]: Endpoints used: TTS ([English](https://huggingface.co/facebook/mms-tts-eng), |
| [French](https://huggingface.co/facebook/mms-tts-fra), |
| [Mooré](https://huggingface.co/facebook/mms-tts-mos)), |
| [STT](https://huggingface.co/facebook/mms-1b-all), |
| [LID](https://huggingface.co/facebook/mms-lid-256). For language ID, the 256-language variant was chosen as this was the model with the smallest number of languages, which still included Mooré. |
| Learn more: |
| [Docs](https://huggingface.co/docs/transformers/model_doc/mms) | |
| [Paper](https://arxiv.org/abs/2305.13516) | |
| [Supported languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html) |
| [^3]: Endpoint used: [NLLB](https://huggingface.co/facebook/nllb-200-distilled-600M). |
| Learn more: |
| [Docs](https://huggingface.co/docs/transformers/model_doc/nllb) | |
| [Paper](https://huggingface.co/docs/transformers/model_doc/nllb) | |
| [Supported languages](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200) |
| [^4]: Endpoint used: [Mooré to French](https://huggingface.co/masakhane/m2m100_418M_mos_fr_news), |
| [French to Mooré](https://huggingface.co/masakhane/m2m100_418M_fr_mos_news). |
| Learn more: |
| [Docs](https://github.com/masakhane-io/lafand-mt) | |
| [Paper](https://arxiv.org/abs/2205.02022) |
| [^5]: Endpoints used: [Mooré to English](https://huggingface.co/Helsinki-NLP/opus-mt-mos-en), |
| [English to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-en-mos), |
| [French to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-fr-mos). |
| Learn more: |
| [Docs](https://github.com/Helsinki-NLP/Opus-MT) |
| ''') |