# audio_processor.py - Enhanced with AI Translation Support import os from dotenv import load_dotenv import tempfile from typing import List, Dict, Optional, Tuple import json import traceback # --- DEFINITIVE NUMBA FIX --- # This MUST be done BEFORE importing librosa os.environ["NUMBA_CACHE_DIR"] = "/tmp" # Now, import librosa safely import librosa # --- END OF FIX --- import google.generativeai as genai from translator import AITranslator from google.api_core import exceptions as google_exceptions class AudioProcessor: def __init__(self): self.translator = None self.init_error = None self._initialize_translator() def _initialize_translator(self): """Initialize AI translator for multi-language support""" try: self.translator = AITranslator() if self.translator.init_error: print(f"--- WARNING: Translator has initialization error: {self.translator.init_error} ---") except Exception as e: print(f"--- WARNING: Translator initialization failed: {str(e)} ---") self.translator = None def transcribe_audio(self, audio_file_path: str) -> Tuple[Optional[str], Optional[str]]: """ Transcribes audio. Returns (text, error_message). """ if not self.translator or not self.translator.model: return None, "--- ERROR: Translator model is not available for transcription. ---" try: if not os.path.exists(audio_file_path): return None, f"--- ERROR: Audio file for transcription not found at: {audio_file_path} ---" audio_file = genai.upload_file(path=audio_file_path) prompt = "Transcribe this audio file accurately. Provide only the text content." response = self.translator.model.generate_content([prompt, audio_file]) if response and hasattr(response, 'text') and response.text: return response.text.strip(), None else: return None, "--- WARNING: Gemini returned an empty response for transcription. ---" except google_exceptions.ResourceExhausted: error_msg = "--- QUOTA ERROR: You have exceeded the daily free usage limit for the AI service. Please wait for your quota to reset (usually within 24 hours) or upgrade your Google AI plan. ---" return None, error_msg except Exception as e: error_msg = f"--- FATAL ERROR during transcription: {traceback.format_exc()} ---" return None, error_msg def get_audio_duration(self, audio_file_path: str) -> Tuple[Optional[float], Optional[str]]: """ Gets audio duration. Returns (duration, error_message). """ try: if not os.path.exists(audio_file_path): return None, f"--- ERROR: Audio file for duration not found at: {audio_file_path} ---" duration = librosa.get_duration(path=audio_file_path) if duration is None or duration < 0.1: return None, f"--- ERROR: librosa returned an invalid duration: {duration}s ---" return duration, None except Exception as e: error_msg = f"--- FATAL ERROR getting audio duration with librosa: {traceback.format_exc()} ---" return None, error_msg def get_word_timestamps(self, audio_file_path: str) -> Tuple[List[Dict], List[str]]: """ Generates timestamps. Returns (timestamps, log_messages). """ logs = ["--- INFO: Starting get_word_timestamps... ---"] transcription, error = self.transcribe_audio(audio_file_path) if error: logs.append(error) return [], logs logs.append(f"--- DEBUG: Transcription successful. Text: '{transcription[:50]}...'") audio_duration, error = self.get_audio_duration(audio_file_path) if error: logs.append(error) return [], logs logs.append(f"--- DEBUG: Audio duration successful. Duration: {audio_duration:.2f}s") words = transcription.split() if not words: logs.append("--- WARNING: Transcription resulted in zero words. ---") return [], logs logs.append(f"--- INFO: Distributing {len(words)} words across the duration. ---") word_timestamps = [] total_words = len(words) usable_duration = max(0, audio_duration - 1.0) for i, word in enumerate(words): start_time = 0.5 + (i * (usable_duration / total_words)) end_time = 0.5 + ((i + 1) * (usable_duration / total_words)) word_timestamps.append({'word': word.strip(), 'start': round(start_time, 3), 'end': round(end_time, 3)}) logs.append(f"--- SUCCESS: Generated {len(word_timestamps)} word timestamps. ---") return word_timestamps, logs def get_word_timestamps_with_translation(self, audio_file_path: str, target_language: str = 'ar') -> Tuple[Dict, List[str]]: """ Enhanced function that provides both transcription and translation Args: audio_file_path: Path to audio file target_language: Target language for translation ('ar' for Arabic) Returns: Tuple of (result_dict, log_messages) result_dict contains: { 'original_text': str, 'translated_text': str, 'word_timestamps': List[Dict], 'translated_timestamps': List[Dict], 'language_detected': str, 'target_language': str } """ logs = ["--- INFO: Starting enhanced transcription with translation... ---"] # Get original transcription and timestamps word_timestamps, transcription_logs = self.get_word_timestamps(audio_file_path) logs.extend(transcription_logs) if not word_timestamps: logs.append("--- ERROR: No transcription available for translation ---") return {}, logs # Extract original text original_text = " ".join([d['word'] for d in word_timestamps]) logs.append(f"--- INFO: Original transcription: '{original_text[:50]}...' ---") # Initialize result dictionary result = { 'original_text': original_text, 'translated_text': '', 'word_timestamps': word_timestamps, 'translated_timestamps': [], 'language_detected': 'unknown', 'target_language': target_language, 'translation_success': False } # Check if translator is available if not self.translator: logs.append("--- WARNING: Translator not available, returning original text only ---") result['translated_text'] = original_text return result, logs try: # Detect original language detected_lang, detect_error = self.translator.detect_language(original_text) if detected_lang and detected_lang != 'unknown': result['language_detected'] = detected_lang logs.append(f"--- INFO: Detected language: {detected_lang} ---") else: logs.append(f"--- WARNING: Language detection failed: {detect_error} ---") # Translate the text translated_text, translation_error = self.translator.translate_text( original_text, target_language=target_language ) if translated_text: result['translated_text'] = translated_text result['translation_success'] = True logs.append(f"--- SUCCESS: Translation completed: '{translated_text[:50]}...' ---") # Create translated timestamps by mapping words translated_timestamps = self._create_translated_timestamps( word_timestamps, original_text, translated_text ) result['translated_timestamps'] = translated_timestamps logs.append(f"--- INFO: Created {len(translated_timestamps)} translated timestamps ---") else: logs.append(f"--- ERROR: Translation failed: {translation_error} ---") result['translated_text'] = original_text # Fallback to original result['translated_timestamps'] = word_timestamps # Use original timestamps except Exception as e: error_msg = f"--- FATAL ERROR during translation process: {traceback.format_exc()} ---" logs.append(error_msg) result['translated_text'] = original_text # Fallback result['translated_timestamps'] = word_timestamps return result, logs def _create_translated_timestamps(self, original_timestamps: List[Dict], original_text: str, translated_text: str) -> List[Dict]: """ Create timestamps for translated text by proportional mapping Args: original_timestamps: Original word timestamps original_text: Original transcribed text translated_text: Translated text Returns: List of translated word timestamps """ try: translated_words = translated_text.split() if not translated_words: return [] # Get total duration from original timestamps if not original_timestamps: return [] start_time = original_timestamps[0]['start'] end_time = original_timestamps[-1]['end'] total_duration = end_time - start_time # Create proportional timestamps for translated words translated_timestamps = [] word_count = len(translated_words) for i, word in enumerate(translated_words): # Calculate proportional timing word_start = start_time + (i * total_duration / word_count) word_end = start_time + ((i + 1) * total_duration / word_count) translated_timestamps.append({ 'word': word.strip(), 'start': round(word_start, 3), 'end': round(word_end, 3) }) return translated_timestamps except Exception as e: print(f"--- ERROR creating translated timestamps: {str(e)} ---") return [] def batch_translate_transcription(self, audio_file_path: str, target_languages: List[str]) -> Tuple[Dict, List[str]]: """ Transcribe audio and translate to multiple languages Args: audio_file_path: Path to audio file target_languages: List of target language codes Returns: Tuple of (results_dict, log_messages) """ logs = ["--- INFO: Starting batch translation process... ---"] # Get original transcription word_timestamps, transcription_logs = self.get_word_timestamps(audio_file_path) logs.extend(transcription_logs) if not word_timestamps: return {}, logs original_text = " ".join([d['word'] for d in word_timestamps]) # Initialize results results = { 'original': { 'text': original_text, 'timestamps': word_timestamps, 'language': 'detected' }, 'translations': {} } # Translate to each target language if self.translator: for lang_code in target_languages: try: translated_text, error = self.translator.translate_text(original_text, lang_code) if translated_text: translated_timestamps = self._create_translated_timestamps( word_timestamps, original_text, translated_text ) results['translations'][lang_code] = { 'text': translated_text, 'timestamps': translated_timestamps, 'success': True } logs.append(f"--- SUCCESS: Translation to {lang_code} completed ---") else: results['translations'][lang_code] = { 'text': original_text, 'timestamps': word_timestamps, 'success': False, 'error': error } logs.append(f"--- ERROR: Translation to {lang_code} failed: {error} ---") except Exception as e: logs.append(f"--- FATAL ERROR translating to {lang_code}: {str(e)} ---") else: logs.append("--- WARNING: Translator not available for batch translation ---") return results, logs