當前位置: 首頁>>代碼示例>>Python>>正文


Python python_speech_features.delta方法代碼示例

本文整理匯總了Python中python_speech_features.delta方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.delta方法的具體用法?Python python_speech_features.delta怎麽用?Python python_speech_features.delta使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在python_speech_features的用法示例。


在下文中一共展示了python_speech_features.delta方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: fbank

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def fbank(wav_path, flat=True):
    """ Currently grabs log Mel filterbank, deltas and double deltas."""

    (rate, sig) = wav.read(wav_path)
    if len(sig) == 0:
        logger.warning("Empty wav: {}".format(wav_path))
    fbank_feat = python_speech_features.logfbank(sig, rate, nfilt=40)
    energy = extract_energy(rate, sig)
    feat = np.hstack([energy, fbank_feat])
    delta_feat = python_speech_features.delta(feat, 2)
    delta_delta_feat = python_speech_features.delta(delta_feat, 2)
    all_feats = [feat, delta_feat, delta_delta_feat]
    if not flat:
        all_feats = np.array(all_feats)
        # Make time the first dimension for easy length normalization padding
        # later.
        all_feats = np.swapaxes(all_feats, 0, 1)
        all_feats = np.swapaxes(all_feats, 1, 2)
    else:
        all_feats = np.concatenate(all_feats, axis=1)

    # Log Mel Filterbank, with delta, and double delta
    feat_fn = wav_path[:-3] + "fbank.npy"
    np.save(feat_fn, all_feats) 
開發者ID:persephone-tools,項目名稱:persephone,代碼行數:26,代碼來源:feat_extract.py

示例2: mfcc

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def mfcc(wav_path):
    """ Grabs MFCC features with energy and derivates. """

    (rate, sig) = wav.read(wav_path)
    feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    delta_feat = python_speech_features.delta(feat, 2)
    all_feats = [feat, delta_feat]
    all_feats = np.array(all_feats)
    # Make time the first dimension for easy length normalization padding later.
    all_feats = np.swapaxes(all_feats, 0, 1)
    all_feats = np.swapaxes(all_feats, 1, 2)

    feat_fn = wav_path[:-3] + "mfcc13_d.npy"
    np.save(feat_fn, all_feats) 
開發者ID:persephone-tools,項目名稱:persephone,代碼行數:16,代碼來源:feat_extract.py

示例3: mk_MFB

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def mk_MFB(filename, sample_rate=c.SAMPLE_RATE,use_delta = c.USE_DELTA,use_scale = c.USE_SCALE,use_logscale = c.USE_LOGSCALE):
    audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
    #audio = audio.flatten()


    filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)

    if use_logscale:
        filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))

    if use_delta:
        delta_1 = delta(filter_banks, N=1)
        delta_2 = delta(delta_1, N=1)

        filter_banks = normalize_frames(filter_banks, Scale=use_scale)
        delta_1 = normalize_frames(delta_1, Scale=use_scale)
        delta_2 = normalize_frames(delta_2, Scale=use_scale)

        frames_features = np.hstack([filter_banks, delta_1, delta_2])
    else:
        filter_banks = normalize_frames(filter_banks, Scale=use_scale)
        frames_features = filter_banks



    np.save(filename.replace('.wav', '.npy'),frames_features)

    return 
開發者ID:qqueing,項目名稱:DeepSpeaker-pytorch,代碼行數:30,代碼來源:audio_processing.py

示例4: pre_process_inputs

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def pre_process_inputs(signal=np.random.uniform(size=32000), target_sample_rate=8000,use_delta = c.USE_DELTA):
    filter_banks, energies = fbank(signal, samplerate=target_sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
    delta_1 = delta(filter_banks, N=1)
    delta_2 = delta(delta_1, N=1)

    filter_banks = normalize_frames(filter_banks)
    delta_1 = normalize_frames(delta_1)
    delta_2 = normalize_frames(delta_2)

    if use_delta:
        frames_features = np.hstack([filter_banks, delta_1, delta_2])
    else:
        frames_features = filter_banks
    num_frames = len(frames_features)
    network_inputs = []
    """Too complicated
    for j in range(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME):
        frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
        #network_inputs.append(np.reshape(frames_slice, (32, 20, 3)))
        network_inputs.append(frames_slice)
        
    """
    import random
    j = random.randrange(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME)
    frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
    network_inputs.append(frames_slice)
    return np.array(network_inputs) 
開發者ID:qqueing,項目名稱:DeepSpeaker-pytorch,代碼行數:29,代碼來源:audio_processing.py

示例5: _compute_mfcc

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def _compute_mfcc(self,
                      samples,
                      sample_rate,
                      stride_ms=10.0,
                      window_ms=20.0,
                      max_freq=None):
        """Compute mfcc from samples."""
        if max_freq is None:
            max_freq = sample_rate / 2
        if max_freq > sample_rate / 2:
            raise ValueError("max_freq must not be greater than half of "
                             "sample rate.")
        if stride_ms > window_ms:
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        # compute the 13 cepstral coefficients, and the first one is replaced
        # by log(frame energy)
        mfcc_feat = mfcc(
            signal=samples,
            samplerate=sample_rate,
            winlen=0.001 * window_ms,
            winstep=0.001 * stride_ms,
            highfreq=max_freq)
        # Deltas
        d_mfcc_feat = delta(mfcc_feat, 2)
        # Deltas-Deltas
        dd_mfcc_feat = delta(d_mfcc_feat, 2)
        # transpose
        mfcc_feat = np.transpose(mfcc_feat)
        d_mfcc_feat = np.transpose(d_mfcc_feat)
        dd_mfcc_feat = np.transpose(dd_mfcc_feat)
        # concat above three features
        concat_mfcc_feat = np.concatenate(
            (mfcc_feat, d_mfcc_feat, dd_mfcc_feat))
        return concat_mfcc_feat 
開發者ID:Pelhans,項目名稱:ZASR_tensorflow,代碼行數:37,代碼來源:audio_featurizer.py

示例6: SpeechFeaturesPreprocessor

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def SpeechFeaturesPreprocessor(feature_type: str = "mfcc",
                               delta_order: int = 0,
                               delta_window: int = 2,
                               **kwargs) -> Callable:
    """Calculate speech features.

    First, the given type of features (e.g. MFCC) is computed using a window
    of length `winlen` and step `winstep`; for additional keyword arguments
    (specific to each feature type), see
    http://python-speech-features.readthedocs.io/. Then, delta features up to
    `delta_order` are added.

    By default, 13 MFCCs per frame are computed. To add delta and delta-delta
    features (resulting in 39 coefficients per frame), set `delta_order=2`.

    Arguments:
        feature_type: mfcc, fbank, logfbank or ssc (default is mfcc)
        delta_order: maximum order of the delta features (default is 0)
        delta_window: window size for delta features (default is 2)
        **kwargs: keyword arguments for the appropriate function from
            python_speech_features

    Returns:
        A numpy array of shape [num_frames, num_features].
    """

    if feature_type not in FEATURE_TYPES:
        raise ValueError(
            "Unknown speech feature type '{}'".format(feature_type))

    def preprocess(audio: Audio) -> np.ndarray:
        features = [FEATURE_TYPES[feature_type](
            audio.data, samplerate=audio.rate, **kwargs)]

        for _ in range(delta_order):
            features.append(delta(features[-1], delta_window))

        return np.concatenate(features, axis=1)

    return preprocess 
開發者ID:ufal,項目名稱:neuralmonkey,代碼行數:42,代碼來源:speech.py

示例7: get_mfcc_feature

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def get_mfcc_feature(wavsignal, fs):
    '''
    輸入為wav文件數學表示和采樣頻率,輸出為語音的MFCC特征+一階差分+二階差分;
    '''
    feat_mfcc = mfcc(wavsignal, fs)
    print(feat_mfcc)
    feat_mfcc_d = delta(feat_mfcc, 2)
    feat_mfcc_dd = delta(feat_mfcc_d, 2)
    wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
    return wav_feature 
開發者ID:zw76859420,項目名稱:ASR_WORD,代碼行數:12,代碼來源:file_wav.py

示例8: get_fbank_feature

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def get_fbank_feature(wavsignal, fs):
    '''
    輸入為wav文件數學表示和采樣頻率,輸出為語音的FBANK特征+一階差分+二階差分;
    '''
    feat_fbank = logfbank(wavsignal, fs, nfilt=40)
    feat_fbank_d = delta(feat_fbank, 2)
    feat_fbank_dd = delta(feat_fbank_d, 2)
    wav_feature = np.column_stack((feat_fbank, feat_fbank_d, feat_fbank_dd))
    return wav_feature 
開發者ID:zw76859420,項目名稱:ASR_WORD,代碼行數:11,代碼來源:file_wav.py

示例9: extract_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def extract_features(self, audio_path):
        """
        Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
        from an audio using the python_speech_features module, performs Cepstral Mean
        Normalization (CMS) and combine it with MFCC deltas and the MFCC double
        deltas.
     
        Args: 	    
            audio_path (str) : path to wave file without silent moments. 
        Returns: 	    
            (array) : Extracted features matrix. 	
        """
        rate, audio  = read(audio_path)
        mfcc_feature = mfcc(# The audio signal from which to compute features.
                            audio,
                            # The samplerate of the signal we are working with.
                            rate,
                            # The length of the analysis window in seconds. 
                            # Default is 0.025s (25 milliseconds)
                            winlen       = 0.05,
                            # The step between successive windows in seconds. 
                            # Default is 0.01s (10 milliseconds)
                            winstep      = 0.01,
                            # The number of cepstrum to return. 
                            # Default 13.
                            numcep       = 13,
                            # The number of filters in the filterbank.
                            # Default is 26.
                            nfilt        = 30,
                            # The FFT size. Default is 512.
                            nfft         = 1024,
                            # If true, the zeroth cepstral coefficient is replaced 
                            # with the log of the total frame energy.
                            appendEnergy = True)
    
        
        mfcc_feature  = preprocessing.scale(mfcc_feature)
        deltas        = delta(mfcc_feature, 2)
        double_deltas = delta(deltas, 2)
        combined      = np.hstack((mfcc_feature, deltas, double_deltas))
        return combined 
開發者ID:SuperKogito,項目名稱:Voice-based-gender-recognition,代碼行數:43,代碼來源:FeaturesExtractor.py

示例10: extract_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def extract_features(self, audio_path):
        """
        Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
        from an audio using the python_speech_features module, performs Cepstral Mean
        Normalization (CMS) and combine it with MFCC deltas and the MFCC double
        deltas.
     
        Args: 	    
            audio_path (str) : path to wave file without silent moments. 
        Returns: 	    
            (array) : Extracted features matrix. 	
        """
        rate, audio  = read(audio_path)
        mfcc_feature = mfcc(# The audio signal from which to compute features.
                            audio,
                            # The samplerate of the signal we are working with.
                            rate,
                            # The length of the analysis window in seconds. 
                            # Default is 0.025s (25 milliseconds)
                            winlen       = 0.05,
                            # The step between successive windows in seconds. 
                            # Default is 0.01s (10 milliseconds)
                            winstep      = 0.01,
                            # The number of cepstrum to return. 
                            # Default 13.
                            numcep       = 5,
                            # The number of filters in the filterbank.
                            # Default is 26.
                            nfilt        = 30,
                            # The FFT size. Default is 512.
                            nfft         = 512,
                            # If true, the zeroth cepstral coefficient is replaced 
                            # with the log of the total frame energy.
                            appendEnergy = True)
    
        
        mfcc_feature  = preprocessing.scale(mfcc_feature)
        deltas        = delta(mfcc_feature, 2)
        double_deltas = delta(deltas, 2)
        combined      = np.hstack((mfcc_feature, deltas, double_deltas))
        return combined 
開發者ID:SuperKogito,項目名稱:Voice-based-gender-recognition,代碼行數:43,代碼來源:FeaturesExtractor.py

示例11: __mfcc

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def __mfcc(audio_data, sampling_rate, win_len, win_step, num_features, n_fft, f_min, f_max):
    """Convert a wav signal into Mel Frequency Cepstral Coefficients (MFCC).

    Args:
        audio_data (np.ndarray): Wav signal.
        sampling_rate (int):  Sampling rate.
        win_len (float): Window length in seconds.
        win_step (float): Window stride in seconds.
        num_features (int): Number of features to generate.
        n_fft (int): Number of Fast Fourier Transforms.
        f_min (float): Minimum frequency to consider.
        f_max (float): Maximum frequency to consider.

    Returns:
        np.ndarray: MFCC feature vectors. Shape: [time, num_features]
    """
    if num_features % 2 != 0:
        raise ValueError('num_features is not a multiple of 2.')

    # Compute MFCC features.
    mfcc = psf.mfcc(signal=audio_data, samplerate=sampling_rate, winlen=win_len, winstep=win_step,
                    numcep=num_features // 2, nfilt=num_features, nfft=n_fft,
                    lowfreq=f_min, highfreq=f_max,
                    preemph=0.97, ceplifter=22, appendEnergy=True)

    # And the first-order differences (delta features).
    mfcc_delta = psf.delta(mfcc, 2)

    # Combine MFCC with MFCC_delta
    return np.concatenate([mfcc, mfcc_delta], axis=1) 
開發者ID:mdangschat,項目名稱:ctc-asr,代碼行數:32,代碼來源:input_functions.py

示例12: GetMfccFeature

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def GetMfccFeature(wavsignal, fs):
    # 獲取輸入特征
    feat_mfcc = mfcc(wavsignal[0], fs)
    feat_mfcc_d = delta(feat_mfcc, 2)
    feat_mfcc_dd = delta(feat_mfcc_d, 2)
    # 返回值分別是mfcc特征向量的矩陣及其一階差分和二階差分矩陣
    wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
    return wav_feature 
開發者ID:shibing624,項目名稱:parrots,代碼行數:10,代碼來源:wav_util.py

示例13: GetMfccFeature

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def GetMfccFeature(wavsignal, fs):
	# 獲取輸入特征
	feat_mfcc=mfcc(wavsignal[0],fs)
	feat_mfcc_d=delta(feat_mfcc,2)
	feat_mfcc_dd=delta(feat_mfcc_d,2)
	# 返回值分別是mfcc特征向量的矩陣及其一階差分和二階差分矩陣
	wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
	return wav_feature 
開發者ID:nl8590687,項目名稱:ASRT_SpeechRecognition,代碼行數:10,代碼來源:file_wav.py


注:本文中的python_speech_features.delta方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。