Spaces:

pp22
/

voice-detection-api

Sleeping

voice-detection-api / src /generate_ai_data.py

pratikpawar0204

AI Voice Detection API - Competition Submission

dead0b1 about 2 months ago

7.03 kB

	import os
	import asyncio
	import pandas as pd
	import soundfile as sf
	import librosa
	from gtts import gTTS
	import edge_tts
	from tqdm import tqdm
	import sys

	# Add src to path
	sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
	from src.config import LANGUAGES, RAW_AI_DIR, SAMPLE_RATE

	# Text prompts for generation (Mix of lengths and types)
	# We will generate permutations or use a larger corpus if needed.
	# For now, a small set repeated with different voices/speeds is a good start.
	TEXT_CORPUS = {
	'en': [
	"The quick brown fox jumps over the lazy dog.",
	"Hello, this is an AI voice generation test.",
	"Artificial intelligence is transforming the world.",
	"Can you tell me the time please?",
	"I am not a human, but I sound like one.",
	"Verification code is one two three four.",
	"Open the pod bay doors, HAL.",
	"The weather today is sunny with a chance of rain.",
	"Please confirm your identity.",
	"This is a secure channel."
	],
	'ta': [
	"வணக்கம், எப்படி இருக்கிறீர்கள்?", # Hello, how are you?
	"இது ஒரு செயற்கை நுண்ணறிவு குரல் சோதனை.", # This is an AI voice test.
	"தமிழ் உலகின் மூத்த மொழிகளில் ஒன்று.", # Tamil is one of the oldest languages.
	"இன்று வானிலை மிக நன்றாக உள்ளது.", # The weather is very good today.
	"தயவுசெய்து உங்கள் அடையாளத்தை உறுதிப்படுத்தவும்." # Please verify your identity.
	],
	'hi': [
	"नमस्ते, आप कैसे हैं?", # Hello, how are you?
	"यह एक एआई आवाज़ परीक्षण है।", # This is an AI voice test.
	"भारत एक विशाल देश है।", # India is a huge country.
	"कृपया अपना पासवर्ड दर्ज करें।", # Please enter your password.
	"मौसम आज बहुत सुहावना है।" # The weather is very pleasant today.
	],
	'ml': [
	"നമസ്കാരം, സുഖമാണോ?", # Hello, are you fine?
	"ഇതൊരു നിർമ്മിത ബുദ്ധി പരീക്ഷണമാണ്.", # This is an AI test.
	"കേരളം ദൈവത്തിന്റെ സ്വന്തം നാടാണ്.", # Kerala is God's own country.
	"ദയവായി വാതിൽ തുറക്കൂ.", # Please open the door.
	"ഇന്നത്തെ കാലാവസ്ഥ എങ്ങനെയുണ്ട്?" # How is today's weather?
	],
	'te': [
	"నమస్కారం, మీరు ఎలా ఉన్నారు?", # Hello, how are you?
	"ఇది ఒక కృత్రిమ మేధస్సు పరీక్ష.", # This is an AI test.
	"తెలుగు చాలా తీయని భాష.", # Telugu is a very sweet language.
	"దయచేసి మీ పేరు చెప్పండి.", # Please tell your name.
	"ఈ రోజు వర్షం పడే అవకాశం ఉంది." # There is a chance of rain today.
	]
	}

	async def generate_edge_tts(text, voice, output_path):
	"""Generate audio using Edge TTS"""
	communicate = edge_tts.Communicate(text, voice)
	await communicate.save(output_path)

	def generate_gtts(text, lang_code, output_path):
	"""Generate audio using Google TTS"""
	tts = gTTS(text=text, lang=lang_code, slow=False)
	tts.save(output_path)

	async def main():
	if not os.path.exists(RAW_AI_DIR):
	os.makedirs(RAW_AI_DIR)

	data_records = []

	# Edge TTS Voices Map (Approximate)
	EDGE_VOICES = {
	'en': ['en-US-ChristopherNeural', 'en-US-JennyNeural', 'en-GB-SoniaNeural'],
	'ta': ['ta-IN-ValluvarNeural', 'ta-IN-PallaviNeural'],
	'hi': ['hi-IN-MadhurNeural', 'hi-IN-SwaraNeural'],
	'ml': ['ml-IN-MidhunNeural', 'ml-IN-SobhanaNeural'],
	'te': ['te-IN-MohanNeural', 'te-IN-ShrutiNeural']
	}

	target_per_lang = 50

	for lang_code, lang_name in LANGUAGES.items():
	print(f"Generating AI samples for {lang_name} ({lang_code})...")
	lang_dir = os.path.join(RAW_AI_DIR, lang_code)
	if not os.path.exists(lang_dir):
	os.makedirs(lang_dir)

	texts = TEXT_CORPUS.get(lang_code, TEXT_CORPUS['en']) # Fallback to English if missing
	count = 0

	# 1. Edge TTS Generation
	voices = EDGE_VOICES.get(lang_code, [])
	for voice in voices:
	for text in texts:
	if count >= target_per_lang // 2: # Do half with Edge, half with gTTS
	break

	fname = f"ai_edge_{lang_code}_{count:04d}.mp3"
	fpath = os.path.join(lang_dir, fname)

	try:
	await generate_edge_tts(text, voice, fpath)

	# Verify and convert to consistent format if needed (deferred to preprocessing)
	# For now just save record
	data_records.append({
	'filename': fname,
	'language': lang_code,
	'path': fpath,
	'source': 'edge_tts',
	'voice_engine': voice
	})
	count += 1
	except Exception as e:
	print(f"Error generating Edge TTS for {lang_code}: {e}")

	# 2. gTTS Generation (Fill the rest)
	gtts_lang = lang_code
	# gTTS mappings usually match ISO codes, but check docs if failures occur.
	# ta, hi, ml, te are supported.

	for text in texts:
	if count >= target_per_lang:
	break

	fname = f"ai_gtts_{lang_code}_{count:04d}.mp3"
	fpath = os.path.join(lang_dir, fname)

	try:
	generate_gtts(text, gtts_lang, fpath)

	data_records.append({
	'filename': fname,
	'language': lang_code,
	'path': fpath,
	'source': 'gtts',
	'voice_engine': 'gtts_standard'
	})
	count += 1
	except Exception as e:
	print(f"Error generating gTTS for {lang_code}: {e}")

	# Save Metadata
	df = pd.DataFrame(data_records)
	csv_path = os.path.join(RAW_AI_DIR, 'ai_samples.csv')
	df.to_csv(csv_path, index=False)
	print(f"AI Data Generation Complete! Saved to {csv_path}")

	if __name__ == "__main__":
	asyncio.run(main())