本文整理汇总了Python中webrtcvad.Vad方法的典型用法代码示例。如果您正苦于以下问题:Python webrtcvad.Vad方法的具体用法?Python webrtcvad.Vad怎么用?Python webrtcvad.Vad使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类webrtcvad
的用法示例。
在下文中一共展示了webrtcvad.Vad方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=0):
"""
Args:
sample_rate: audio sample rate
level: between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
"""
self.sample_rate = sample_rate
self.frame_ms = 30
self.frame_bytes = int(2 * self.frame_ms * self.sample_rate / 1000) # S16_LE, 2 bytes width
self.vad = webrtcvad.Vad(level)
self.active = False
self.data = b''
self.history = collections.deque(maxlen=128)
示例2: initialize
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def initialize(self):
"""Initialize a Hermes audio recorder."""
self.logger.debug('Probing for available input devices...')
for index in range(self.audio.get_device_count()):
device = self.audio.get_device_info_by_index(index)
name = device['name']
channels = device['maxInputChannels']
if channels:
self.logger.debug('[%d] %s', index, name)
try:
self.audio_in = self.audio.get_default_input_device_info()['name']
except OSError:
raise NoDefaultAudioDeviceError('input')
self.logger.info('Connected to audio input %s.', self.audio_in)
if self.config.vad.enabled:
self.logger.info('Voice Activity Detection enabled with mode %s.',
self.config.vad.mode)
self.vad = webrtcvad.Vad(self.config.vad.mode)
示例3: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=2, sample_rate=SAMPLE_RATE,
min_utt_length = MIN_UTT_LENGTH,
max_utt_length = MAX_UTT_LENGTH,
max_utt_gap = MAX_UTT_GAP):
self.sample_rate = sample_rate
self.vad = webrtcvad.Vad()
self.vad.set_mode(aggressiveness)
self.state = STATE_IDLE
self.buf = []
self.buf_sent = 0
self.min_buf_entries = int(min_utt_length * 1000) / BUFFER_DURATION
self.max_buf_entries = int(max_utt_length * 1000) / BUFFER_DURATION
self.max_gap = int(max_utt_gap * 1000) / BUFFER_DURATION
self.frame_cnt = 0
self.avg_vol_sum = 0.0
self.avg_vol_cnt = 0
示例4: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=3):
"""
Args:
sample_rate: audio sample rate
level: between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
"""
self.sample_rate = sample_rate
self.frame_ms = 30
self.frame_bytes = int(2 * self.frame_ms * self.sample_rate / 1000) # S16_LE, 2 bytes width
self.vad = webrtcvad.Vad(level)
self.active = False
self.data = b''
self.history = collections.deque(maxlen=128)
示例5: VAD_chunk
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def VAD_chunk(aggressiveness, path):
audio, byte_audio = read_wave(path, hp.data.sr)
vad = webrtcvad.Vad(int(aggressiveness))
frames = frame_generator(20, byte_audio, hp.data.sr)
frames = list(frames)
times = vad_collector(hp.data.sr, 20, 200, vad, frames)
speech_times = []
speech_segs = []
for i, time in enumerate(times):
start = np.round(time[0],decimals=2)
end = np.round(time[1],decimals=2)
j = start
while j + .4 < end:
end_j = np.round(j+.4,decimals=2)
speech_times.append((j, end_j))
speech_segs.append(audio[int(j*hp.data.sr):int(end_j*hp.data.sr)])
j = end_j
else:
speech_times.append((j, end))
speech_segs.append(audio[int(j*hp.data.sr):int(end*hp.data.sr)])
return speech_times, speech_segs
示例6: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, chop_factors=[(0.05, 0.025), (0.1, 0.05)],
max_chops=2, force_regions=False, report=False):
# chop factors in seconds (mean, std) per possible chop
import webrtcvad
self.chop_factors = chop_factors
self.max_chops = max_chops
self.force_regions = force_regions
# create VAD to get speech chunks
self.vad = webrtcvad.Vad(2)
# make scalers to norm/denorm
self.denormalizer = Scale(1. / ((2 ** 15) - 1))
self.normalizer = Scale((2 ** 15) - 1)
self.report = report
# @profile
示例7: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=0):
self.vad = webrtcvad.Vad(level)
self.sample_rate = int(sample_rate)
self.num_padding_frames = 10
self.reset()
示例8: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, rate=16000, mode=0, duration=1000, on_inactive=None):
super(VAD, self).__init__()
self.rate = rate
self.vad = Vad(mode)
self.on_inactive = on_inactive
self.limit_inactive_cnt = duration / 10 # a frame is 10 ms
self.current_inactive_cnt = 0
示例9: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=3, **kwargs):
super(VADAudio, self).__init__(**kwargs)
self.vad = webrtcvad.Vad(aggressiveness)
示例10: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, frame_length: int, sample_rate: int) -> None:
"""Initialize Microphone processing."""
self.audio = pyaudio.PyAudio()
self.vad = webrtcvad.Vad(1)
self.stream: Optional[pyaudio.Stream] = None
self._frame_length = frame_length
self._sample_rate = sample_rate
self._last_frame: Optional[np.ndarray] = None
示例11: _make_webrtcvad_detector
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def _make_webrtcvad_detector(sample_rate, frame_rate):
import webrtcvad
vad = webrtcvad.Vad()
vad.set_mode(3) # set non-speech pruning aggressiveness from 0 to 3
window_duration = 1. / sample_rate # duration in seconds
frames_per_window = int(window_duration * frame_rate + 0.5)
bytes_per_frame = 2
def _detect(asegment):
media_bstring = []
failures = 0
for start in range(0, len(asegment) // bytes_per_frame,
frames_per_window):
stop = min(start + frames_per_window,
len(asegment) // bytes_per_frame)
try:
is_speech = vad.is_speech(
asegment[start * bytes_per_frame: stop * bytes_per_frame],
sample_rate=frame_rate)
except:
is_speech = False
failures += 1
# webrtcvad has low recall on mode 3, so treat non-speech as "not sure"
media_bstring.append(1. if is_speech else 0.5)
return np.array(media_bstring)
return _detect
示例12: main
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def main():
vad = webrtcvad.Vad(3)
speech_count = 0
chunks = []
doa_chunks = int(DOA_FRAMES / VAD_FRAMES)
try:
with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000) as mic:
for chunk in mic.read_chunks():
# Use single channel audio to detect voice activity
if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
speech_count += 1
sys.stdout.write('1')
else:
sys.stdout.write('0')
sys.stdout.flush()
chunks.append(chunk)
if len(chunks) == doa_chunks:
if speech_count > (doa_chunks / 2):
frames = np.concatenate(chunks)
direction = mic.get_direction(frames)
pixel_ring.set_direction(direction)
print('\n{}'.format(int(direction)))
speech_count = 0
chunks = []
except KeyboardInterrupt:
pass
pixel_ring.off()
示例13: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=3):
super().__init__()
self.vad = webrtcvad.Vad(aggressiveness)
示例14: __init__
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sr=16000, chunk_duration_ms=30, video_path='', out_path=''):
self._sr = sr
self._chunk_duration_ms = chunk_duration_ms
self._chunk_size = int(sr * chunk_duration_ms / 1000) # chunk to read in samples
self._nb_window_chunks = int(400 / chunk_duration_ms) # 400ms / 30ms frame
self._nb_window_chunks_end = self._nb_window_chunks * 2
self._vad = webrtcvad.Vad(mode=3)
self._video_path = video_path
self._out_path = out_path
示例15: trim_long_silences
# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def trim_long_silences(wav):
"""
Ensures that segments without voice in the waveform remain no longer than a
threshold determined by the VAD parameters in params.py.
:param wav: the raw waveform as a numpy array of floats
:return: the same waveform with silences trimmed away (length <= original wav length)
"""
# Compute the voice detection window size
samples_per_window = (vad_window_length * sampling_rate) // 1000
# Trim the end of the audio to have a multiple of the window size
wav = wav[:len(wav) - (len(wav) % samples_per_window)]
# Convert the float waveform to 16-bit mono PCM
pcm_wave = struct.pack("%dh" % len(wav), *(np.round(wav * int16_max)).astype(np.int16))
# Perform voice activation detection
voice_flags = []
vad = webrtcvad.Vad(mode=3)
for window_start in range(0, len(wav), samples_per_window):
window_end = window_start + samples_per_window
voice_flags.append(vad.is_speech(pcm_wave[window_start * 2:window_end * 2],
sample_rate=sampling_rate))
voice_flags = np.array(voice_flags)
# Smooth the voice detection with a moving average
def moving_average(array, width):
array_padded = np.concatenate((np.zeros((width - 1) // 2), array, np.zeros(width // 2)))
ret = np.cumsum(array_padded, dtype=float)
ret[width:] = ret[width:] - ret[:-width]
return ret[width - 1:] / width
audio_mask = moving_average(voice_flags, vad_moving_average_width)
audio_mask = np.round(audio_mask).astype(np.bool)
# Dilate the voiced regions
audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1))
audio_mask = np.repeat(audio_mask, samples_per_window)
return wav[audio_mask == True]