本文整理匯總了Python中python_speech_features.delta方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.delta方法的具體用法?Python python_speech_features.delta怎麽用?Python python_speech_features.delta使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類python_speech_features
的用法示例。
在下文中一共展示了python_speech_features.delta方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: fbank
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def fbank(wav_path, flat=True):
""" Currently grabs log Mel filterbank, deltas and double deltas."""
(rate, sig) = wav.read(wav_path)
if len(sig) == 0:
logger.warning("Empty wav: {}".format(wav_path))
fbank_feat = python_speech_features.logfbank(sig, rate, nfilt=40)
energy = extract_energy(rate, sig)
feat = np.hstack([energy, fbank_feat])
delta_feat = python_speech_features.delta(feat, 2)
delta_delta_feat = python_speech_features.delta(delta_feat, 2)
all_feats = [feat, delta_feat, delta_delta_feat]
if not flat:
all_feats = np.array(all_feats)
# Make time the first dimension for easy length normalization padding
# later.
all_feats = np.swapaxes(all_feats, 0, 1)
all_feats = np.swapaxes(all_feats, 1, 2)
else:
all_feats = np.concatenate(all_feats, axis=1)
# Log Mel Filterbank, with delta, and double delta
feat_fn = wav_path[:-3] + "fbank.npy"
np.save(feat_fn, all_feats)
示例2: mfcc
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def mfcc(wav_path):
""" Grabs MFCC features with energy and derivates. """
(rate, sig) = wav.read(wav_path)
feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
delta_feat = python_speech_features.delta(feat, 2)
all_feats = [feat, delta_feat]
all_feats = np.array(all_feats)
# Make time the first dimension for easy length normalization padding later.
all_feats = np.swapaxes(all_feats, 0, 1)
all_feats = np.swapaxes(all_feats, 1, 2)
feat_fn = wav_path[:-3] + "mfcc13_d.npy"
np.save(feat_fn, all_feats)
示例3: mk_MFB
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def mk_MFB(filename, sample_rate=c.SAMPLE_RATE,use_delta = c.USE_DELTA,use_scale = c.USE_SCALE,use_logscale = c.USE_LOGSCALE):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
#audio = audio.flatten()
filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
if use_logscale:
filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))
if use_delta:
delta_1 = delta(filter_banks, N=1)
delta_2 = delta(delta_1, N=1)
filter_banks = normalize_frames(filter_banks, Scale=use_scale)
delta_1 = normalize_frames(delta_1, Scale=use_scale)
delta_2 = normalize_frames(delta_2, Scale=use_scale)
frames_features = np.hstack([filter_banks, delta_1, delta_2])
else:
filter_banks = normalize_frames(filter_banks, Scale=use_scale)
frames_features = filter_banks
np.save(filename.replace('.wav', '.npy'),frames_features)
return
示例4: pre_process_inputs
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def pre_process_inputs(signal=np.random.uniform(size=32000), target_sample_rate=8000,use_delta = c.USE_DELTA):
filter_banks, energies = fbank(signal, samplerate=target_sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
delta_1 = delta(filter_banks, N=1)
delta_2 = delta(delta_1, N=1)
filter_banks = normalize_frames(filter_banks)
delta_1 = normalize_frames(delta_1)
delta_2 = normalize_frames(delta_2)
if use_delta:
frames_features = np.hstack([filter_banks, delta_1, delta_2])
else:
frames_features = filter_banks
num_frames = len(frames_features)
network_inputs = []
"""Too complicated
for j in range(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME):
frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
#network_inputs.append(np.reshape(frames_slice, (32, 20, 3)))
network_inputs.append(frames_slice)
"""
import random
j = random.randrange(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME)
frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
network_inputs.append(frames_slice)
return np.array(network_inputs)
示例5: _compute_mfcc
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def _compute_mfcc(self,
samples,
sample_rate,
stride_ms=10.0,
window_ms=20.0,
max_freq=None):
"""Compute mfcc from samples."""
if max_freq is None:
max_freq = sample_rate / 2
if max_freq > sample_rate / 2:
raise ValueError("max_freq must not be greater than half of "
"sample rate.")
if stride_ms > window_ms:
raise ValueError("Stride size must not be greater than "
"window size.")
# compute the 13 cepstral coefficients, and the first one is replaced
# by log(frame energy)
mfcc_feat = mfcc(
signal=samples,
samplerate=sample_rate,
winlen=0.001 * window_ms,
winstep=0.001 * stride_ms,
highfreq=max_freq)
# Deltas
d_mfcc_feat = delta(mfcc_feat, 2)
# Deltas-Deltas
dd_mfcc_feat = delta(d_mfcc_feat, 2)
# transpose
mfcc_feat = np.transpose(mfcc_feat)
d_mfcc_feat = np.transpose(d_mfcc_feat)
dd_mfcc_feat = np.transpose(dd_mfcc_feat)
# concat above three features
concat_mfcc_feat = np.concatenate(
(mfcc_feat, d_mfcc_feat, dd_mfcc_feat))
return concat_mfcc_feat
示例6: SpeechFeaturesPreprocessor
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def SpeechFeaturesPreprocessor(feature_type: str = "mfcc",
delta_order: int = 0,
delta_window: int = 2,
**kwargs) -> Callable:
"""Calculate speech features.
First, the given type of features (e.g. MFCC) is computed using a window
of length `winlen` and step `winstep`; for additional keyword arguments
(specific to each feature type), see
http://python-speech-features.readthedocs.io/. Then, delta features up to
`delta_order` are added.
By default, 13 MFCCs per frame are computed. To add delta and delta-delta
features (resulting in 39 coefficients per frame), set `delta_order=2`.
Arguments:
feature_type: mfcc, fbank, logfbank or ssc (default is mfcc)
delta_order: maximum order of the delta features (default is 0)
delta_window: window size for delta features (default is 2)
**kwargs: keyword arguments for the appropriate function from
python_speech_features
Returns:
A numpy array of shape [num_frames, num_features].
"""
if feature_type not in FEATURE_TYPES:
raise ValueError(
"Unknown speech feature type '{}'".format(feature_type))
def preprocess(audio: Audio) -> np.ndarray:
features = [FEATURE_TYPES[feature_type](
audio.data, samplerate=audio.rate, **kwargs)]
for _ in range(delta_order):
features.append(delta(features[-1], delta_window))
return np.concatenate(features, axis=1)
return preprocess
示例7: get_mfcc_feature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def get_mfcc_feature(wavsignal, fs):
'''
輸入為wav文件數學表示和采樣頻率,輸出為語音的MFCC特征+一階差分+二階差分;
'''
feat_mfcc = mfcc(wavsignal, fs)
print(feat_mfcc)
feat_mfcc_d = delta(feat_mfcc, 2)
feat_mfcc_dd = delta(feat_mfcc_d, 2)
wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
return wav_feature
示例8: get_fbank_feature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def get_fbank_feature(wavsignal, fs):
'''
輸入為wav文件數學表示和采樣頻率,輸出為語音的FBANK特征+一階差分+二階差分;
'''
feat_fbank = logfbank(wavsignal, fs, nfilt=40)
feat_fbank_d = delta(feat_fbank, 2)
feat_fbank_dd = delta(feat_fbank_d, 2)
wav_feature = np.column_stack((feat_fbank, feat_fbank_d, feat_fbank_dd))
return wav_feature
示例9: extract_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def extract_features(self, audio_path):
"""
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
from an audio using the python_speech_features module, performs Cepstral Mean
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
deltas.
Args:
audio_path (str) : path to wave file without silent moments.
Returns:
(array) : Extracted features matrix.
"""
rate, audio = read(audio_path)
mfcc_feature = mfcc(# The audio signal from which to compute features.
audio,
# The samplerate of the signal we are working with.
rate,
# The length of the analysis window in seconds.
# Default is 0.025s (25 milliseconds)
winlen = 0.05,
# The step between successive windows in seconds.
# Default is 0.01s (10 milliseconds)
winstep = 0.01,
# The number of cepstrum to return.
# Default 13.
numcep = 13,
# The number of filters in the filterbank.
# Default is 26.
nfilt = 30,
# The FFT size. Default is 512.
nfft = 1024,
# If true, the zeroth cepstral coefficient is replaced
# with the log of the total frame energy.
appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
deltas = delta(mfcc_feature, 2)
double_deltas = delta(deltas, 2)
combined = np.hstack((mfcc_feature, deltas, double_deltas))
return combined
示例10: extract_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def extract_features(self, audio_path):
"""
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
from an audio using the python_speech_features module, performs Cepstral Mean
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
deltas.
Args:
audio_path (str) : path to wave file without silent moments.
Returns:
(array) : Extracted features matrix.
"""
rate, audio = read(audio_path)
mfcc_feature = mfcc(# The audio signal from which to compute features.
audio,
# The samplerate of the signal we are working with.
rate,
# The length of the analysis window in seconds.
# Default is 0.025s (25 milliseconds)
winlen = 0.05,
# The step between successive windows in seconds.
# Default is 0.01s (10 milliseconds)
winstep = 0.01,
# The number of cepstrum to return.
# Default 13.
numcep = 5,
# The number of filters in the filterbank.
# Default is 26.
nfilt = 30,
# The FFT size. Default is 512.
nfft = 512,
# If true, the zeroth cepstral coefficient is replaced
# with the log of the total frame energy.
appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
deltas = delta(mfcc_feature, 2)
double_deltas = delta(deltas, 2)
combined = np.hstack((mfcc_feature, deltas, double_deltas))
return combined
示例11: __mfcc
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def __mfcc(audio_data, sampling_rate, win_len, win_step, num_features, n_fft, f_min, f_max):
"""Convert a wav signal into Mel Frequency Cepstral Coefficients (MFCC).
Args:
audio_data (np.ndarray): Wav signal.
sampling_rate (int): Sampling rate.
win_len (float): Window length in seconds.
win_step (float): Window stride in seconds.
num_features (int): Number of features to generate.
n_fft (int): Number of Fast Fourier Transforms.
f_min (float): Minimum frequency to consider.
f_max (float): Maximum frequency to consider.
Returns:
np.ndarray: MFCC feature vectors. Shape: [time, num_features]
"""
if num_features % 2 != 0:
raise ValueError('num_features is not a multiple of 2.')
# Compute MFCC features.
mfcc = psf.mfcc(signal=audio_data, samplerate=sampling_rate, winlen=win_len, winstep=win_step,
numcep=num_features // 2, nfilt=num_features, nfft=n_fft,
lowfreq=f_min, highfreq=f_max,
preemph=0.97, ceplifter=22, appendEnergy=True)
# And the first-order differences (delta features).
mfcc_delta = psf.delta(mfcc, 2)
# Combine MFCC with MFCC_delta
return np.concatenate([mfcc, mfcc_delta], axis=1)
示例12: GetMfccFeature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def GetMfccFeature(wavsignal, fs):
# 獲取輸入特征
feat_mfcc = mfcc(wavsignal[0], fs)
feat_mfcc_d = delta(feat_mfcc, 2)
feat_mfcc_dd = delta(feat_mfcc_d, 2)
# 返回值分別是mfcc特征向量的矩陣及其一階差分和二階差分矩陣
wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
return wav_feature
示例13: GetMfccFeature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import delta [as 別名]
def GetMfccFeature(wavsignal, fs):
# 獲取輸入特征
feat_mfcc=mfcc(wavsignal[0],fs)
feat_mfcc_d=delta(feat_mfcc,2)
feat_mfcc_dd=delta(feat_mfcc_d,2)
# 返回值分別是mfcc特征向量的矩陣及其一階差分和二階差分矩陣
wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd))
return wav_feature