Python python_speech_features.fbank方法代碼示例

本文整理匯總了Python中python_speech_features.fbank方法的典型用法代碼示例。如果您正苦於以下問題：Python python_speech_features.fbank方法的具體用法？Python python_speech_features.fbank怎麽用？Python python_speech_features.fbank使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類python_speech_features的用法示例。

在下文中一共展示了python_speech_features.fbank方法的7個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: wav2fbank

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def wav2fbank(args):
    wavname, out_dir, nfilt, log = args
    x, rate = sf.read(wavname)
    fb, egy = fbank(x, rate, nfilt=nfilt)
    if log:
        fb = np.log(fb)
    bname = os.path.splitext(os.path.basename(wavname))[0]
    outfile = os.path.join(out_dir, bname + '.fb')
    np.save(outfile, fb)

開發者ID:santi-pdp，項目名稱:pase，代碼行數:11，代碼來源:make_fbanks.py

示例2: get_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def get_features(filename, numcep, numfilt, winlen, winstep, grad):

    f = Sndfile(filename, 'r')

    frames = f.nframes
    samplerate = f.samplerate
    data = f.read_frames(frames)
    data = np.asarray(data)

    #calc mfcc
    feat_raw,energy = sf.fbank(data, samplerate,winlen,winstep, nfilt=numfilt)
    feat = np.log(feat_raw)
    feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
    feat = sf.lifter(feat,L=22)
    feat = np.asarray(feat)

    #calc log energy
    log_energy = np.log(energy) #np.log( np.sum(feat_raw**2, axis=1) )
    log_energy = log_energy.reshape([log_energy.shape[0],1])

    mat = ( feat - np.mean(feat, axis=0) ) / (0.5 * np.std(feat, axis=0))
    mat = np.concatenate((mat, log_energy), axis=1)

    #calc first order derivatives
    if grad >= 1:
        gradf = np.gradient(mat)[0]
        mat = np.concatenate((mat, gradf), axis=1)

    #calc second order derivatives
    if grad == 2:
        grad2f = np.gradient(gradf)[0]
        mat = np.concatenate((mat, grad2f), axis=1)

    return mat, frames, samplerate

開發者ID:JoergFranke，項目名稱:phoneme_recognition，代碼行數:36，代碼來源:features.py

示例3: mk_MFB

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def mk_MFB(filename, sample_rate=c.SAMPLE_RATE,use_delta = c.USE_DELTA,use_scale = c.USE_SCALE,use_logscale = c.USE_LOGSCALE):
    audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
    #audio = audio.flatten()


    filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)

    if use_logscale:
        filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))

    if use_delta:
        delta_1 = delta(filter_banks, N=1)
        delta_2 = delta(delta_1, N=1)

        filter_banks = normalize_frames(filter_banks, Scale=use_scale)
        delta_1 = normalize_frames(delta_1, Scale=use_scale)
        delta_2 = normalize_frames(delta_2, Scale=use_scale)

        frames_features = np.hstack([filter_banks, delta_1, delta_2])
    else:
        filter_banks = normalize_frames(filter_banks, Scale=use_scale)
        frames_features = filter_banks



    np.save(filename.replace('.wav', '.npy'),frames_features)

    return

開發者ID:qqueing，項目名稱:DeepSpeaker-pytorch，代碼行數:30，代碼來源:audio_processing.py

示例4: pre_process_inputs

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def pre_process_inputs(signal=np.random.uniform(size=32000), target_sample_rate=8000,use_delta = c.USE_DELTA):
    filter_banks, energies = fbank(signal, samplerate=target_sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
    delta_1 = delta(filter_banks, N=1)
    delta_2 = delta(delta_1, N=1)

    filter_banks = normalize_frames(filter_banks)
    delta_1 = normalize_frames(delta_1)
    delta_2 = normalize_frames(delta_2)

    if use_delta:
        frames_features = np.hstack([filter_banks, delta_1, delta_2])
    else:
        frames_features = filter_banks
    num_frames = len(frames_features)
    network_inputs = []
    """Too complicated
    for j in range(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME):
        frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
        #network_inputs.append(np.reshape(frames_slice, (32, 20, 3)))
        network_inputs.append(frames_slice)
        
    """
    import random
    j = random.randrange(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME)
    frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
    network_inputs.append(frames_slice)
    return np.array(network_inputs)

開發者ID:qqueing，項目名稱:DeepSpeaker-pytorch，代碼行數:29，代碼來源:audio_processing.py

示例5: SpeechFeaturesPreprocessor

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def SpeechFeaturesPreprocessor(feature_type: str = "mfcc",
                               delta_order: int = 0,
                               delta_window: int = 2,
                               **kwargs) -> Callable:
    """Calculate speech features.

    First, the given type of features (e.g. MFCC) is computed using a window
    of length `winlen` and step `winstep`; for additional keyword arguments
    (specific to each feature type), see
    http://python-speech-features.readthedocs.io/. Then, delta features up to
    `delta_order` are added.

    By default, 13 MFCCs per frame are computed. To add delta and delta-delta
    features (resulting in 39 coefficients per frame), set `delta_order=2`.

    Arguments:
        feature_type: mfcc, fbank, logfbank or ssc (default is mfcc)
        delta_order: maximum order of the delta features (default is 0)
        delta_window: window size for delta features (default is 2)
        **kwargs: keyword arguments for the appropriate function from
            python_speech_features

    Returns:
        A numpy array of shape [num_frames, num_features].
    """

    if feature_type not in FEATURE_TYPES:
        raise ValueError(
            "Unknown speech feature type '{}'".format(feature_type))

    def preprocess(audio: Audio) -> np.ndarray:
        features = [FEATURE_TYPES[feature_type](
            audio.data, samplerate=audio.rate, **kwargs)]

        for _ in range(delta_order):
            features.append(delta(features[-1], delta_window))

        return np.concatenate(features, axis=1)

    return preprocess

開發者ID:ufal，項目名稱:neuralmonkey，代碼行數:42，代碼來源:speech.py

示例6: _fbank

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def _fbank(*args, **kwargs) -> np.ndarray:
    feat, _ = fbank(*args, **kwargs)
    return feat

開發者ID:ufal，項目名稱:neuralmonkey，代碼行數:5，代碼來源:speech.py

示例7: mfcc_fbank

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def mfcc_fbank(signal: np.array, sample_rate: int):  # 1D signal array.
    # Returns MFCC with shape (num_frames, n_filters, 3).
    filter_banks, energies = fbank(signal, samplerate=sample_rate, nfilt=NUM_FBANKS)
    frames_features = normalize_frames(filter_banks)
    # delta_1 = delta(filter_banks, N=1)
    # delta_2 = delta(delta_1, N=1)
    # frames_features = np.transpose(np.stack([filter_banks, delta_1, delta_2]), (1, 2, 0))
    return np.array(frames_features, dtype=np.float32)  # Float32 precision is enough here.

開發者ID:milvus-io，項目名稱:bootcamp，代碼行數:10，代碼來源:audio.py

注：本文中的python_speech_features.fbank方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。