Spaces:

MiakOnline
/

RecToTextPro2

Sleeping

File size: 6,985 Bytes

644ed40

import streamlit as st
import whisper
import tempfile
import os
import time
import re
from pydub import AudioSegment
from openpyxl import Workbook
from openpyxl.styles import Font
from docx import Document
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from io import BytesIO
from collections import Counter

# ---------------------------------------------------
# PAGE CONFIG
# ---------------------------------------------------
st.set_page_config(
    page_title="RecToText Pro - AI Edition",
    layout="wide",
    page_icon="🎤"
)

# ---------------------------------------------------
# SIDEBAR
# ---------------------------------------------------
st.sidebar.title("⚙️ Settings")

model_option = st.sidebar.selectbox(
    "Select Whisper Model",
    ["base", "small"]
)

output_mode = st.sidebar.radio(
    "Output Format",
    ["Roman Urdu", "English"]
)

if st.sidebar.button("🧹 Clear Session"):
    st.session_state.clear()
    st.rerun()

# ---------------------------------------------------
# HEADER
# ---------------------------------------------------
st.markdown("<h1 style='text-align:center;'>🎤 RecToText Pro - AI Enhanced</h1>", unsafe_allow_html=True)
st.markdown("<p style='text-align:center;'>Auto Title | AI Summary | Smart Formatting</p>", unsafe_allow_html=True)
st.divider()

# ---------------------------------------------------
# FUNCTIONS
# ---------------------------------------------------

@st.cache_resource
def load_model(model_size):
    return whisper.load_model(model_size)

def clean_text(text):
    filler_words = ["um", "hmm", "acha", "matlab", "uh", "huh"]
    pattern = r'\b(?:' + '|'.join(filler_words) + r')\b'
    text = re.sub(pattern, '', text, flags=re.IGNORECASE)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

def convert_to_roman_urdu(text):
    replacements = {
        "ہے": "hai",
        "میں": "main",
        "اور": "aur",
        "کیا": "kya",
        "آپ": "aap",
        "کی": "ki",
        "کا": "ka"
    }
    for urdu, roman in replacements.items():
        text = text.replace(urdu, roman)
    return text

# -----------------------------
# AI Title Detection
# -----------------------------
def generate_title(text):
    words = re.findall(r'\b[a-zA-Z]{4,}\b', text.lower())
    common_words = Counter(words).most_common(5)
    keywords = [word.capitalize() for word, _ in common_words[:3]]
    if keywords:
        return "Lecture on " + " ".join(keywords)
    return "Lecture Transcription"

# -----------------------------
# AI Summary Generator
# -----------------------------
def generate_summary(text):
    sentences = re.split(r'(?<=[.!?]) +', text)
    summary = " ".join(sentences[:5])
    return summary

# -----------------------------
# Smart Formatting
# -----------------------------
def smart_format(text):
    sentences = re.split(r'(?<=[.!?]) +', text)
    formatted = ""
    for i, sentence in enumerate(sentences):
        if len(sentence.split()) < 8:
            formatted += f"\n\n{sentence.upper()}\n"
        else:
            formatted += sentence + " "
    return formatted.strip()

# -----------------------------
# Excel Export
# -----------------------------
def create_excel(segments):
    wb = Workbook()
    ws = wb.active
    ws.title = "Transcription"

    headers = ["Timestamp", "Transcribed Text", "Cleaned Output"]
    ws.append(headers)

    for col in range(1, 4):
        ws.cell(row=1, column=col).font = Font(bold=True)

    for seg in segments:
        timestamp = f"{round(seg['start'],2)} - {round(seg['end'],2)}"
        raw_text = seg["text"]
        cleaned = clean_text(raw_text)
        ws.append([timestamp, raw_text, cleaned])

    buffer = BytesIO()
    wb.save(buffer)
    buffer.seek(0)
    return buffer

# -----------------------------
# Word Export
# -----------------------------
def create_word_document(title, summary, formatted_text):
    doc = Document()

    # Title
    doc.add_heading(title, level=1).alignment = WD_ALIGN_PARAGRAPH.CENTER

    doc.add_page_break()

    # Summary Page
    doc.add_heading("Executive Summary", level=2)
    doc.add_paragraph(summary)

    doc.add_page_break()

    # Main Content
    doc.add_heading("Full Lecture Content", level=2)

    paragraphs = formatted_text.split("\n\n")
    for para in paragraphs:
        doc.add_paragraph(para).paragraph_format.space_after = Pt(12)

    buffer = BytesIO()
    doc.save(buffer)
    buffer.seek(0)
    return buffer

# ---------------------------------------------------
# FILE UPLOADER
# ---------------------------------------------------
uploaded_file = st.file_uploader(
    "Upload Lecture Recording (.mp3, .wav, .m4a, .aac)",
    type=["mp3", "wav", "m4a", "aac"]
)

if uploaded_file:

    st.audio(uploaded_file)

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
        ext = uploaded_file.name.split(".")[-1]
        audio = AudioSegment.from_file(uploaded_file, format=ext)
        audio.export(tmp.name, format="wav")
        temp_audio_path = tmp.name

    st.info("Loading Whisper model...")
    model = load_model(model_option)

    start_time = time.time()
    with st.spinner("Transcribing..."):
        result = model.transcribe(temp_audio_path)
    end_time = time.time()

    os.remove(temp_audio_path)

    full_text = result["text"]
    segments = result["segments"]
    detected_lang = result.get("language", "Unknown")

    cleaned_text = clean_text(full_text)

    if output_mode == "Roman Urdu":
        cleaned_text = convert_to_roman_urdu(cleaned_text)

    title = generate_title(cleaned_text)
    summary = generate_summary(cleaned_text)
    formatted_text = smart_format(cleaned_text)

    word_count = len(cleaned_text.split())
    processing_time = round(end_time - start_time, 2)

    col1, col2 = st.columns(2)

    with col1:
        st.subheader("📜 Raw Transcription")
        st.text_area("", full_text, height=350)

    with col2:
        st.subheader("✨ AI Formatted Version")
        st.text_area("", formatted_text, height=350)

    st.divider()

    st.write(f"**Auto Detected Title:** {title}")
    st.write(f"**Detected Language:** {detected_lang}")
    st.write(f"**Word Count:** {word_count}")
    st.write(f"**Processing Time:** {processing_time} sec")

    excel_file = create_excel(segments)
    word_file = create_word_document(title, summary, formatted_text)

    colA, colB = st.columns(2)

    with colA:
        st.download_button(
            "📥 Download Excel (.xlsx)",
            data=excel_file,
            file_name="RecToText_Transcription.xlsx"
        )

    with colB:
        st.download_button(
            "📄 Download Word (.docx)",
            data=word_file,
            file_name="RecToText_AI_Lecture.docx"
        )

st.divider()
st.markdown(
    "<p style='text-align:center;font-size:12px;'>RecToText Pro AI Edition | Auto Title | Smart Summary | AI Formatting</p>",
    unsafe_allow_html=True
)