| | |
| | |
| | ''' |
| | MIT License |
| | |
| | Copyright (c) 2018 Mauricio |
| | |
| | Permission is hereby granted, free of charge, to any person obtaining a copy |
| | of this software and associated documentation files (the "Software"), to deal |
| | in the Software without restriction, including without limitation the rights |
| | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| | copies of the Software, and to permit persons to whom the Software is |
| | furnished to do so, subject to the following conditions: |
| | |
| | The above copyright notice and this permission notice shall be included in all |
| | copies or substantial portions of the Software. |
| | |
| | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| | SOFTWARE. |
| | |
| | Adapted from https://github.com/mauriciovander/silence-removal/blob/master/vad.py |
| | ''' |
| | import numpy |
| |
|
| | class VoiceActivityDetection: |
| |
|
| | def __init__(self): |
| | self.__step = 160 |
| | self.__buffer_size = 160 |
| | self.__buffer = numpy.array([],dtype=numpy.int16) |
| | self.__out_buffer = numpy.array([],dtype=numpy.int16) |
| | self.__n = 0 |
| | self.__VADthd = 0. |
| | self.__VADn = 0. |
| | self.__silence_counter = 0 |
| |
|
| | |
| | |
| | def vad(self, _frame, sc_threshold=20): |
| | frame = numpy.array(_frame) ** 2. |
| | result = True |
| | threshold = 0.2 |
| | thd = numpy.min(frame) + numpy.ptp(frame) * threshold |
| | self.__VADthd = (self.__VADn * self.__VADthd + thd) / float(self.__VADn + 1.) |
| | self.__VADn += 1. |
| |
|
| | if numpy.mean(frame) <= self.__VADthd: |
| | self.__silence_counter += 1 |
| | else: |
| | self.__silence_counter = 0 |
| | if self.__silence_counter > sc_threshold: |
| | result = False |
| | return result |
| |
|
| | |
| | def add_samples(self, data): |
| | self.__buffer = numpy.append(self.__buffer, data) |
| | result = len(self.__buffer) >= self.__buffer_size |
| | |
| | return result |
| |
|
| | |
| | |
| | |
| | def get_frame(self): |
| | window = self.__buffer[:self.__buffer_size] |
| | self.__buffer = self.__buffer[self.__step:] |
| | |
| | return window |
| |
|
| | |
| | |
| | def process(self, data, sc_threshold): |
| | self.__buffer = numpy.array([],dtype=numpy.int16) |
| | self.__out_buffer = numpy.array([],dtype=numpy.int16) |
| | if self.add_samples(data): |
| | while len(self.__buffer) >= self.__buffer_size: |
| | |
| | window = self.get_frame() |
| | if self.vad(window, sc_threshold): |
| | self.__out_buffer = numpy.append(self.__out_buffer, window) |
| | return self.__out_buffer |
| |
|