本文整理匯總了Python中python_speech_features.logfbank方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.logfbank方法的具體用法?Python python_speech_features.logfbank怎麽用?Python python_speech_features.logfbank使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類python_speech_features
的用法示例。
在下文中一共展示了python_speech_features.logfbank方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: fbank
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import logfbank [as 別名]
def fbank(wav_path, flat=True):
""" Currently grabs log Mel filterbank, deltas and double deltas."""
(rate, sig) = wav.read(wav_path)
if len(sig) == 0:
logger.warning("Empty wav: {}".format(wav_path))
fbank_feat = python_speech_features.logfbank(sig, rate, nfilt=40)
energy = extract_energy(rate, sig)
feat = np.hstack([energy, fbank_feat])
delta_feat = python_speech_features.delta(feat, 2)
delta_delta_feat = python_speech_features.delta(delta_feat, 2)
all_feats = [feat, delta_feat, delta_delta_feat]
if not flat:
all_feats = np.array(all_feats)
# Make time the first dimension for easy length normalization padding
# later.
all_feats = np.swapaxes(all_feats, 0, 1)
all_feats = np.swapaxes(all_feats, 1, 2)
else:
all_feats = np.concatenate(all_feats, axis=1)
# Log Mel Filterbank, with delta, and double delta
feat_fn = wav_path[:-3] + "fbank.npy"
np.save(feat_fn, all_feats)
示例2: __mel
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import logfbank [as 別名]
def __mel(audio_data, sampling_rate, win_len, win_step, num_features, n_fft, f_min, f_max):
"""Convert a wav signal into a logarithmically scaled mel filterbank.
Args:
audio_data (np.ndarray): Wav signal.
sampling_rate (int): Sampling rate.
win_len (float): Window length in seconds.
win_step (float): Window stride in seconds.
num_features (int): Number of features to generate.
n_fft (int): Number of Fast Fourier Transforms.
f_min (float): Minimum frequency to consider.
f_max (float): Maximum frequency to consider.
Returns:
np.ndarray: Mel-filterbank. Shape: [time, num_features]
"""
mel = psf.logfbank(signal=audio_data, samplerate=sampling_rate, winlen=win_len,
winstep=win_step, nfilt=num_features, nfft=n_fft,
lowfreq=f_min, highfreq=f_max, preemph=0.97)
return mel
示例3: __call__
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import logfbank [as 別名]
def __call__(self, pkg, cached_file=None):
pkg = format_package(pkg)
wav = pkg['chunk']
if torch.is_tensor(wav):
wav = wav.data.numpy().astype(np.float32)
max_frames = wav.shape[0] // self.hop
if cached_file is not None:
# load pre-computed data
X = torch.load(cached_file)
beg_i = pkg['chunk_beg_i'] // self.hop
end_i = pkg['chunk_end_i'] // self.hop
X = X[:, beg_i:end_i]
pkg[self.name] = X
else:
winlen = (float(self.win) / self.rate)
winstep = (float(self.hop) / self.rate)
X = logfbank(wav, self.rate, winlen, winstep,
self.n_filters, self.n_fft).T
expected_frames = len(wav) // self.hop
if self.der_order > 0 :
deltas=[X]
for n in range(1,self.der_order+1):
deltas.append(librosa.feature.delta(X,order=n))
X=np.concatenate(deltas)
fbank = torch.FloatTensor(X)
if fbank.shape[1] < expected_frames:
P = expected_frames - fbank.shape[1]
# pad repeating borders
fbank = F.pad(fbank.unsqueeze(0), (0, P), mode='replicate')
fbank = fbank.squeeze(0)
pkg[self.name] = fbank
# Overwrite resolution to hop length
pkg['dec_resolution'] = self.hop
return pkg
示例4: SpeechFeaturesPreprocessor
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import logfbank [as 別名]
def SpeechFeaturesPreprocessor(feature_type: str = "mfcc",
delta_order: int = 0,
delta_window: int = 2,
**kwargs) -> Callable:
"""Calculate speech features.
First, the given type of features (e.g. MFCC) is computed using a window
of length `winlen` and step `winstep`; for additional keyword arguments
(specific to each feature type), see
http://python-speech-features.readthedocs.io/. Then, delta features up to
`delta_order` are added.
By default, 13 MFCCs per frame are computed. To add delta and delta-delta
features (resulting in 39 coefficients per frame), set `delta_order=2`.
Arguments:
feature_type: mfcc, fbank, logfbank or ssc (default is mfcc)
delta_order: maximum order of the delta features (default is 0)
delta_window: window size for delta features (default is 2)
**kwargs: keyword arguments for the appropriate function from
python_speech_features
Returns:
A numpy array of shape [num_frames, num_features].
"""
if feature_type not in FEATURE_TYPES:
raise ValueError(
"Unknown speech feature type '{}'".format(feature_type))
def preprocess(audio: Audio) -> np.ndarray:
features = [FEATURE_TYPES[feature_type](
audio.data, samplerate=audio.rate, **kwargs)]
for _ in range(delta_order):
features.append(delta(features[-1], delta_window))
return np.concatenate(features, axis=1)
return preprocess
示例5: get_fbank_feature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import logfbank [as 別名]
def get_fbank_feature(wavsignal, fs):
'''
輸入為wav文件數學表示和采樣頻率,輸出為語音的FBANK特征+一階差分+二階差分;
'''
feat_fbank = logfbank(wavsignal, fs, nfilt=40)
feat_fbank_d = delta(feat_fbank, 2)
feat_fbank_dd = delta(feat_fbank_d, 2)
wav_feature = np.column_stack((feat_fbank, feat_fbank_d, feat_fbank_dd))
return wav_feature