当前位置: 首页>>代码示例>>Python>>正文


Python webrtcvad.Vad方法代码示例

本文整理汇总了Python中webrtcvad.Vad方法的典型用法代码示例。如果您正苦于以下问题:Python webrtcvad.Vad方法的具体用法?Python webrtcvad.Vad怎么用?Python webrtcvad.Vad使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在webrtcvad的用法示例。


在下文中一共展示了webrtcvad.Vad方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=0):
        """

        Args:
            sample_rate: audio sample rate
            level: between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
        """
        self.sample_rate = sample_rate

        self.frame_ms = 30
        self.frame_bytes = int(2 * self.frame_ms * self.sample_rate / 1000)   # S16_LE, 2 bytes width

        self.vad = webrtcvad.Vad(level)
        self.active = False
        self.data = b''
        self.history = collections.deque(maxlen=128) 
开发者ID:respeaker,项目名称:respeaker_python_library,代码行数:18,代码来源:vad.py

示例2: initialize

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def initialize(self):
        """Initialize a Hermes audio recorder."""
        self.logger.debug('Probing for available input devices...')
        for index in range(self.audio.get_device_count()):
            device = self.audio.get_device_info_by_index(index)
            name = device['name']
            channels = device['maxInputChannels']
            if channels:
                self.logger.debug('[%d] %s', index, name)
        try:
            self.audio_in = self.audio.get_default_input_device_info()['name']
        except OSError:
            raise NoDefaultAudioDeviceError('input')
        self.logger.info('Connected to audio input %s.', self.audio_in)

        if self.config.vad.enabled:
            self.logger.info('Voice Activity Detection enabled with mode %s.',
                             self.config.vad.mode)
            self.vad = webrtcvad.Vad(self.config.vad.mode) 
开发者ID:koenvervloesem,项目名称:hermes-audio-server,代码行数:21,代码来源:recorder.py

示例3: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=2, sample_rate=SAMPLE_RATE,
                 min_utt_length = MIN_UTT_LENGTH,
                 max_utt_length = MAX_UTT_LENGTH,
                 max_utt_gap    = MAX_UTT_GAP):


        self.sample_rate = sample_rate

        self.vad = webrtcvad.Vad()
        self.vad.set_mode(aggressiveness)

        self.state          = STATE_IDLE
        self.buf            = []
        self.buf_sent       = 0

        self.min_buf_entries = int(min_utt_length * 1000) / BUFFER_DURATION 
        self.max_buf_entries = int(max_utt_length * 1000) / BUFFER_DURATION
        self.max_gap         = int(max_utt_gap    * 1000) / BUFFER_DURATION

        self.frame_cnt       = 0
        self.avg_vol_sum     = 0.0
        self.avg_vol_cnt     = 0 
开发者ID:gooofy,项目名称:py-nltools,代码行数:24,代码来源:vad.py

示例4: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=3):
        """

        Args:
            sample_rate: audio sample rate
            level: between 0 and 3. 0 is the least aggressive about filtering out non-speech, 3 is the most aggressive.
        """
        self.sample_rate = sample_rate

        self.frame_ms = 30
        self.frame_bytes = int(2 * self.frame_ms * self.sample_rate / 1000)   # S16_LE, 2 bytes width

        self.vad = webrtcvad.Vad(level)
        self.active = False
        self.data = b''
        self.history = collections.deque(maxlen=128) 
开发者ID:xiongyihui,项目名称:tdoa,代码行数:18,代码来源:vad.py

示例5: VAD_chunk

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def VAD_chunk(aggressiveness, path):
    audio, byte_audio = read_wave(path, hp.data.sr)
    vad = webrtcvad.Vad(int(aggressiveness))
    frames = frame_generator(20, byte_audio, hp.data.sr)
    frames = list(frames)
    times = vad_collector(hp.data.sr, 20, 200, vad, frames)
    speech_times = []
    speech_segs = []
    for i, time in enumerate(times):
        start = np.round(time[0],decimals=2)
        end = np.round(time[1],decimals=2)
        j = start
        while j + .4 < end:
            end_j = np.round(j+.4,decimals=2)
            speech_times.append((j, end_j))
            speech_segs.append(audio[int(j*hp.data.sr):int(end_j*hp.data.sr)])
            j = end_j
        else:
            speech_times.append((j, end))
            speech_segs.append(audio[int(j*hp.data.sr):int(end*hp.data.sr)])
    return speech_times, speech_segs 
开发者ID:HarryVolek,项目名称:PyTorch_Speaker_Verification,代码行数:23,代码来源:VAD_segments.py

示例6: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, chop_factors=[(0.05, 0.025), (0.1, 0.05)],
                 max_chops=2, force_regions=False, report=False):
        # chop factors in seconds (mean, std) per possible chop
        import webrtcvad
        self.chop_factors = chop_factors
        self.max_chops = max_chops
        self.force_regions = force_regions
        # create VAD to get speech chunks
        self.vad = webrtcvad.Vad(2)
        # make scalers to norm/denorm
        self.denormalizer = Scale(1. / ((2 ** 15) - 1))
        self.normalizer = Scale((2 ** 15) - 1)
        self.report = report

    # @profile 
开发者ID:santi-pdp,项目名称:pase,代码行数:17,代码来源:transforms.py

示例7: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sample_rate=16000, level=0):
        self.vad = webrtcvad.Vad(level)
        self.sample_rate = int(sample_rate)
        self.num_padding_frames = 10
        self.reset() 
开发者ID:UFAL-DSG,项目名称:cloud-asr,代码行数:7,代码来源:vad.py

示例8: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, rate=16000, mode=0, duration=1000, on_inactive=None):
        super(VAD, self).__init__()

        self.rate = rate
        self.vad = Vad(mode)
        self.on_inactive = on_inactive
        self.limit_inactive_cnt = duration / 10  # a frame is 10 ms
        self.current_inactive_cnt = 0 
开发者ID:voice-engine,项目名称:voice-engine,代码行数:10,代码来源:vad.py

示例9: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=3, **kwargs):
        super(VADAudio, self).__init__(**kwargs)
        self.vad = webrtcvad.Vad(aggressiveness) 
开发者ID:dictation-toolbox,项目名称:dragonfly,代码行数:5,代码来源:audio.py

示例10: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, frame_length: int, sample_rate: int) -> None:
        """Initialize Microphone processing."""
        self.audio = pyaudio.PyAudio()
        self.vad = webrtcvad.Vad(1)
        self.stream: Optional[pyaudio.Stream] = None

        self._frame_length = frame_length
        self._sample_rate = sample_rate
        self._last_frame: Optional[np.ndarray] = None 
开发者ID:home-assistant,项目名称:ada,代码行数:11,代码来源:microphone.py

示例11: _make_webrtcvad_detector

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def _make_webrtcvad_detector(sample_rate, frame_rate):
    import webrtcvad
    vad = webrtcvad.Vad()
    vad.set_mode(3)  # set non-speech pruning aggressiveness from 0 to 3
    window_duration = 1. / sample_rate  # duration in seconds
    frames_per_window = int(window_duration * frame_rate + 0.5)
    bytes_per_frame = 2

    def _detect(asegment):
        media_bstring = []
        failures = 0
        for start in range(0, len(asegment) // bytes_per_frame,
                           frames_per_window):
            stop = min(start + frames_per_window,
                       len(asegment) // bytes_per_frame)
            try:
                is_speech = vad.is_speech(
                    asegment[start * bytes_per_frame: stop * bytes_per_frame],
                    sample_rate=frame_rate)
            except:
                is_speech = False
                failures += 1
            # webrtcvad has low recall on mode 3, so treat non-speech as "not sure"
            media_bstring.append(1. if is_speech else 0.5)
        return np.array(media_bstring)

    return _detect 
开发者ID:smacke,项目名称:ffsubsync,代码行数:29,代码来源:speech_transformers.py

示例12: main

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def main():
    vad = webrtcvad.Vad(3)

    speech_count = 0
    chunks = []
    doa_chunks = int(DOA_FRAMES / VAD_FRAMES)

    try:
        with MicArray(RATE, CHANNELS, RATE * VAD_FRAMES / 1000)  as mic:
            for chunk in mic.read_chunks():
                # Use single channel audio to detect voice activity
                if vad.is_speech(chunk[0::CHANNELS].tobytes(), RATE):
                    speech_count += 1
                    sys.stdout.write('1')
                else:
                    sys.stdout.write('0')

                sys.stdout.flush()

                chunks.append(chunk)
                if len(chunks) == doa_chunks:
                    if speech_count > (doa_chunks / 2):
                        frames = np.concatenate(chunks)
                        direction = mic.get_direction(frames)
                        pixel_ring.set_direction(direction)
                        print('\n{}'.format(int(direction)))

                    speech_count = 0
                    chunks = []

    except KeyboardInterrupt:
        pass
        
    pixel_ring.off() 
开发者ID:respeaker,项目名称:mic_array,代码行数:36,代码来源:vad_doa.py

示例13: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, aggressiveness=3):
        super().__init__()
        self.vad = webrtcvad.Vad(aggressiveness) 
开发者ID:daanzu,项目名称:deepspeech-websocket-server,代码行数:5,代码来源:client.py

示例14: __init__

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def __init__(self, sr=16000, chunk_duration_ms=30, video_path='', out_path=''):
        self._sr = sr
        self._chunk_duration_ms = chunk_duration_ms
        self._chunk_size = int(sr * chunk_duration_ms / 1000)  # chunk to read in samples
        self._nb_window_chunks = int(400 / chunk_duration_ms)  # 400ms / 30ms frame
        self._nb_window_chunks_end = self._nb_window_chunks * 2
        self._vad = webrtcvad.Vad(mode=3)

        self._video_path = video_path
        self._out_path = out_path 
开发者ID:RayanWang,项目名称:Speech_emotion_recognition_BLSTM,代码行数:12,代码来源:audio.py

示例15: trim_long_silences

# 需要导入模块: import webrtcvad [as 别名]
# 或者: from webrtcvad import Vad [as 别名]
def trim_long_silences(wav):
    """
    Ensures that segments without voice in the waveform remain no longer than a 
    threshold determined by the VAD parameters in params.py.

    :param wav: the raw waveform as a numpy array of floats 
    :return: the same waveform with silences trimmed away (length <= original wav length)
    """
    # Compute the voice detection window size
    samples_per_window = (vad_window_length * sampling_rate) // 1000
    
    # Trim the end of the audio to have a multiple of the window size
    wav = wav[:len(wav) - (len(wav) % samples_per_window)]
    
    # Convert the float waveform to 16-bit mono PCM
    pcm_wave = struct.pack("%dh" % len(wav), *(np.round(wav * int16_max)).astype(np.int16))
    
    # Perform voice activation detection
    voice_flags = []
    vad = webrtcvad.Vad(mode=3)
    for window_start in range(0, len(wav), samples_per_window):
        window_end = window_start + samples_per_window
        voice_flags.append(vad.is_speech(pcm_wave[window_start * 2:window_end * 2],
                                         sample_rate=sampling_rate))
    voice_flags = np.array(voice_flags)
    
    # Smooth the voice detection with a moving average
    def moving_average(array, width):
        array_padded = np.concatenate((np.zeros((width - 1) // 2), array, np.zeros(width // 2)))
        ret = np.cumsum(array_padded, dtype=float)
        ret[width:] = ret[width:] - ret[:-width]
        return ret[width - 1:] / width
    
    audio_mask = moving_average(voice_flags, vad_moving_average_width)
    audio_mask = np.round(audio_mask).astype(np.bool)
    
    # Dilate the voiced regions
    audio_mask = binary_dilation(audio_mask, np.ones(vad_max_silence_length + 1))
    audio_mask = np.repeat(audio_mask, samples_per_window)
    
    return wav[audio_mask == True] 
开发者ID:resemble-ai,项目名称:Resemblyzer,代码行数:43,代码来源:audio.py


注:本文中的webrtcvad.Vad方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。