| | import os |
| | import ffmpeg |
| | import librosa |
| | import numpy as np |
| | import soundfile as sf |
| | import tempfile |
| |
|
| | from .vad import VoiceActivityDetection |
| |
|
| |
|
| | class PostProcessor: |
| |
|
| | def __init__(self, target_sr:int): |
| | self.target_sr = target_sr |
| | self.vad = VoiceActivityDetection() |
| |
|
| | def set_tempo(self, wav:np.ndarray, atempo:str ='1'): |
| | with tempfile.TemporaryDirectory() as tmpdirname: |
| | inpath = os.path.join(tmpdirname, 'input.wav') |
| | outpath = inpath.replace('input.wav', 'output.wav') |
| | sf.write(inpath, wav, self.target_sr) |
| | in_stream = ffmpeg.input(inpath) |
| | audio_stream = ffmpeg.filter_(in_stream, 'atempo', atempo) |
| | audio_stream = audio_stream.output(outpath) |
| | ffmpeg.run(audio_stream, overwrite_output=True) |
| | wav, _ = librosa.load(outpath, sr=self.target_sr) |
| | return wav |
| | |
| | def trim_silence(self, wav:np.ndarray): |
| | return self.vad.process(wav, sc_threshold=40) |
| |
|
| | def process(self, wav, lang:str, gender:str): |
| | if type(wav) != np.ndarray: |
| | wav = np.array(wav) |
| |
|
| | if (lang == "te") and (gender=='female'): |
| | wav = self.set_tempo(wav, '0.85') |
| | wav = self.trim_silence(wav) |
| | elif (lang == 'mr') and (gender=='female'): |
| | wav = self.trim_silence(wav) |
| | wav = self.set_tempo(wav, '1.15') |
| | elif (lang == 'gu'): |
| | |
| | wav = self.set_tempo(wav, '1.20') |
| |
|
| | return wav |
| |
|