Spaces:
Sleeping
Sleeping
| # audio_processor.py - Enhanced with AI Translation Support | |
| import os | |
| from dotenv import load_dotenv | |
| import tempfile | |
| from typing import List, Dict, Optional, Tuple | |
| import json | |
| import traceback | |
| # --- DEFINITIVE NUMBA FIX --- | |
| # This MUST be done BEFORE importing librosa | |
| os.environ["NUMBA_CACHE_DIR"] = "/tmp" | |
| # Now, import librosa safely | |
| import librosa | |
| # --- END OF FIX --- | |
| import google.generativeai as genai | |
| from translator import AITranslator | |
| from google.api_core import exceptions as google_exceptions | |
| class AudioProcessor: | |
| def __init__(self): | |
| self.translator = None | |
| self.init_error = None | |
| self._initialize_translator() | |
| def _initialize_translator(self): | |
| """Initialize AI translator for multi-language support""" | |
| try: | |
| self.translator = AITranslator() | |
| if self.translator.init_error: | |
| print(f"--- WARNING: Translator has initialization error: {self.translator.init_error} ---") | |
| except Exception as e: | |
| print(f"--- WARNING: Translator initialization failed: {str(e)} ---") | |
| self.translator = None | |
| def transcribe_audio(self, audio_file_path: str) -> Tuple[Optional[str], Optional[str]]: | |
| """ | |
| Transcribes audio. Returns (text, error_message). | |
| """ | |
| if not self.translator or not self.translator.model: | |
| return None, "--- ERROR: Translator model is not available for transcription. ---" | |
| try: | |
| if not os.path.exists(audio_file_path): | |
| return None, f"--- ERROR: Audio file for transcription not found at: {audio_file_path} ---" | |
| audio_file = genai.upload_file(path=audio_file_path) | |
| prompt = "Transcribe this audio file accurately. Provide only the text content." | |
| response = self.translator.model.generate_content([prompt, audio_file]) | |
| if response and hasattr(response, 'text') and response.text: | |
| return response.text.strip(), None | |
| else: | |
| return None, "--- WARNING: Gemini returned an empty response for transcription. ---" | |
| except google_exceptions.ResourceExhausted: | |
| error_msg = "--- QUOTA ERROR: You have exceeded the daily free usage limit for the AI service. Please wait for your quota to reset (usually within 24 hours) or upgrade your Google AI plan. ---" | |
| return None, error_msg | |
| except Exception as e: | |
| error_msg = f"--- FATAL ERROR during transcription: {traceback.format_exc()} ---" | |
| return None, error_msg | |
| def get_audio_duration(self, audio_file_path: str) -> Tuple[Optional[float], Optional[str]]: | |
| """ | |
| Gets audio duration. Returns (duration, error_message). | |
| """ | |
| try: | |
| if not os.path.exists(audio_file_path): | |
| return None, f"--- ERROR: Audio file for duration not found at: {audio_file_path} ---" | |
| duration = librosa.get_duration(path=audio_file_path) | |
| if duration is None or duration < 0.1: | |
| return None, f"--- ERROR: librosa returned an invalid duration: {duration}s ---" | |
| return duration, None | |
| except Exception as e: | |
| error_msg = f"--- FATAL ERROR getting audio duration with librosa: {traceback.format_exc()} ---" | |
| return None, error_msg | |
| def get_word_timestamps(self, audio_file_path: str) -> Tuple[List[Dict], List[str]]: | |
| """ | |
| Generates timestamps. Returns (timestamps, log_messages). | |
| """ | |
| logs = ["--- INFO: Starting get_word_timestamps... ---"] | |
| transcription, error = self.transcribe_audio(audio_file_path) | |
| if error: | |
| logs.append(error) | |
| return [], logs | |
| logs.append(f"--- DEBUG: Transcription successful. Text: '{transcription[:50]}...'") | |
| audio_duration, error = self.get_audio_duration(audio_file_path) | |
| if error: | |
| logs.append(error) | |
| return [], logs | |
| logs.append(f"--- DEBUG: Audio duration successful. Duration: {audio_duration:.2f}s") | |
| words = transcription.split() | |
| if not words: | |
| logs.append("--- WARNING: Transcription resulted in zero words. ---") | |
| return [], logs | |
| logs.append(f"--- INFO: Distributing {len(words)} words across the duration. ---") | |
| word_timestamps = [] | |
| total_words = len(words) | |
| usable_duration = max(0, audio_duration - 1.0) | |
| for i, word in enumerate(words): | |
| start_time = 0.5 + (i * (usable_duration / total_words)) | |
| end_time = 0.5 + ((i + 1) * (usable_duration / total_words)) | |
| word_timestamps.append({'word': word.strip(), 'start': round(start_time, 3), 'end': round(end_time, 3)}) | |
| logs.append(f"--- SUCCESS: Generated {len(word_timestamps)} word timestamps. ---") | |
| return word_timestamps, logs | |
| def get_word_timestamps_with_translation(self, audio_file_path: str, target_language: str = 'ar') -> Tuple[Dict, List[str]]: | |
| """ | |
| Enhanced function that provides both transcription and translation | |
| Args: | |
| audio_file_path: Path to audio file | |
| target_language: Target language for translation ('ar' for Arabic) | |
| Returns: | |
| Tuple of (result_dict, log_messages) | |
| result_dict contains: { | |
| 'original_text': str, | |
| 'translated_text': str, | |
| 'word_timestamps': List[Dict], | |
| 'translated_timestamps': List[Dict], | |
| 'language_detected': str, | |
| 'target_language': str | |
| } | |
| """ | |
| logs = ["--- INFO: Starting enhanced transcription with translation... ---"] | |
| # Get original transcription and timestamps | |
| word_timestamps, transcription_logs = self.get_word_timestamps(audio_file_path) | |
| logs.extend(transcription_logs) | |
| if not word_timestamps: | |
| logs.append("--- ERROR: No transcription available for translation ---") | |
| return {}, logs | |
| # Extract original text | |
| original_text = " ".join([d['word'] for d in word_timestamps]) | |
| logs.append(f"--- INFO: Original transcription: '{original_text[:50]}...' ---") | |
| # Initialize result dictionary | |
| result = { | |
| 'original_text': original_text, | |
| 'translated_text': '', | |
| 'word_timestamps': word_timestamps, | |
| 'translated_timestamps': [], | |
| 'language_detected': 'unknown', | |
| 'target_language': target_language, | |
| 'translation_success': False | |
| } | |
| # Check if translator is available | |
| if not self.translator: | |
| logs.append("--- WARNING: Translator not available, returning original text only ---") | |
| result['translated_text'] = original_text | |
| return result, logs | |
| try: | |
| # Detect original language | |
| detected_lang, detect_error = self.translator.detect_language(original_text) | |
| if detected_lang and detected_lang != 'unknown': | |
| result['language_detected'] = detected_lang | |
| logs.append(f"--- INFO: Detected language: {detected_lang} ---") | |
| else: | |
| logs.append(f"--- WARNING: Language detection failed: {detect_error} ---") | |
| # Translate the text | |
| translated_text, translation_error = self.translator.translate_text( | |
| original_text, | |
| target_language=target_language | |
| ) | |
| if translated_text: | |
| result['translated_text'] = translated_text | |
| result['translation_success'] = True | |
| logs.append(f"--- SUCCESS: Translation completed: '{translated_text[:50]}...' ---") | |
| # Create translated timestamps by mapping words | |
| translated_timestamps = self._create_translated_timestamps( | |
| word_timestamps, | |
| original_text, | |
| translated_text | |
| ) | |
| result['translated_timestamps'] = translated_timestamps | |
| logs.append(f"--- INFO: Created {len(translated_timestamps)} translated timestamps ---") | |
| else: | |
| logs.append(f"--- ERROR: Translation failed: {translation_error} ---") | |
| result['translated_text'] = original_text # Fallback to original | |
| result['translated_timestamps'] = word_timestamps # Use original timestamps | |
| except Exception as e: | |
| error_msg = f"--- FATAL ERROR during translation process: {traceback.format_exc()} ---" | |
| logs.append(error_msg) | |
| result['translated_text'] = original_text # Fallback | |
| result['translated_timestamps'] = word_timestamps | |
| return result, logs | |
| def _create_translated_timestamps(self, original_timestamps: List[Dict], original_text: str, translated_text: str) -> List[Dict]: | |
| """ | |
| Create timestamps for translated text by proportional mapping | |
| Args: | |
| original_timestamps: Original word timestamps | |
| original_text: Original transcribed text | |
| translated_text: Translated text | |
| Returns: | |
| List of translated word timestamps | |
| """ | |
| try: | |
| translated_words = translated_text.split() | |
| if not translated_words: | |
| return [] | |
| # Get total duration from original timestamps | |
| if not original_timestamps: | |
| return [] | |
| start_time = original_timestamps[0]['start'] | |
| end_time = original_timestamps[-1]['end'] | |
| total_duration = end_time - start_time | |
| # Create proportional timestamps for translated words | |
| translated_timestamps = [] | |
| word_count = len(translated_words) | |
| for i, word in enumerate(translated_words): | |
| # Calculate proportional timing | |
| word_start = start_time + (i * total_duration / word_count) | |
| word_end = start_time + ((i + 1) * total_duration / word_count) | |
| translated_timestamps.append({ | |
| 'word': word.strip(), | |
| 'start': round(word_start, 3), | |
| 'end': round(word_end, 3) | |
| }) | |
| return translated_timestamps | |
| except Exception as e: | |
| print(f"--- ERROR creating translated timestamps: {str(e)} ---") | |
| return [] | |
| def batch_translate_transcription(self, audio_file_path: str, target_languages: List[str]) -> Tuple[Dict, List[str]]: | |
| """ | |
| Transcribe audio and translate to multiple languages | |
| Args: | |
| audio_file_path: Path to audio file | |
| target_languages: List of target language codes | |
| Returns: | |
| Tuple of (results_dict, log_messages) | |
| """ | |
| logs = ["--- INFO: Starting batch translation process... ---"] | |
| # Get original transcription | |
| word_timestamps, transcription_logs = self.get_word_timestamps(audio_file_path) | |
| logs.extend(transcription_logs) | |
| if not word_timestamps: | |
| return {}, logs | |
| original_text = " ".join([d['word'] for d in word_timestamps]) | |
| # Initialize results | |
| results = { | |
| 'original': { | |
| 'text': original_text, | |
| 'timestamps': word_timestamps, | |
| 'language': 'detected' | |
| }, | |
| 'translations': {} | |
| } | |
| # Translate to each target language | |
| if self.translator: | |
| for lang_code in target_languages: | |
| try: | |
| translated_text, error = self.translator.translate_text(original_text, lang_code) | |
| if translated_text: | |
| translated_timestamps = self._create_translated_timestamps( | |
| word_timestamps, original_text, translated_text | |
| ) | |
| results['translations'][lang_code] = { | |
| 'text': translated_text, | |
| 'timestamps': translated_timestamps, | |
| 'success': True | |
| } | |
| logs.append(f"--- SUCCESS: Translation to {lang_code} completed ---") | |
| else: | |
| results['translations'][lang_code] = { | |
| 'text': original_text, | |
| 'timestamps': word_timestamps, | |
| 'success': False, | |
| 'error': error | |
| } | |
| logs.append(f"--- ERROR: Translation to {lang_code} failed: {error} ---") | |
| except Exception as e: | |
| logs.append(f"--- FATAL ERROR translating to {lang_code}: {str(e)} ---") | |
| else: | |
| logs.append("--- WARNING: Translator not available for batch translation ---") | |
| return results, logs | |