本文整理匯總了Python中python_speech_features.fbank方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.fbank方法的具體用法?Python python_speech_features.fbank怎麽用?Python python_speech_features.fbank使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類python_speech_features
的用法示例。
在下文中一共展示了python_speech_features.fbank方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: wav2fbank
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def wav2fbank(args):
wavname, out_dir, nfilt, log = args
x, rate = sf.read(wavname)
fb, egy = fbank(x, rate, nfilt=nfilt)
if log:
fb = np.log(fb)
bname = os.path.splitext(os.path.basename(wavname))[0]
outfile = os.path.join(out_dir, bname + '.fb')
np.save(outfile, fb)
示例2: get_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def get_features(filename, numcep, numfilt, winlen, winstep, grad):
f = Sndfile(filename, 'r')
frames = f.nframes
samplerate = f.samplerate
data = f.read_frames(frames)
data = np.asarray(data)
#calc mfcc
feat_raw,energy = sf.fbank(data, samplerate,winlen,winstep, nfilt=numfilt)
feat = np.log(feat_raw)
feat = sf.dct(feat, type=2, axis=1, norm='ortho')[:,:numcep]
feat = sf.lifter(feat,L=22)
feat = np.asarray(feat)
#calc log energy
log_energy = np.log(energy) #np.log( np.sum(feat_raw**2, axis=1) )
log_energy = log_energy.reshape([log_energy.shape[0],1])
mat = ( feat - np.mean(feat, axis=0) ) / (0.5 * np.std(feat, axis=0))
mat = np.concatenate((mat, log_energy), axis=1)
#calc first order derivatives
if grad >= 1:
gradf = np.gradient(mat)[0]
mat = np.concatenate((mat, gradf), axis=1)
#calc second order derivatives
if grad == 2:
grad2f = np.gradient(gradf)[0]
mat = np.concatenate((mat, grad2f), axis=1)
return mat, frames, samplerate
示例3: mk_MFB
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def mk_MFB(filename, sample_rate=c.SAMPLE_RATE,use_delta = c.USE_DELTA,use_scale = c.USE_SCALE,use_logscale = c.USE_LOGSCALE):
audio, sr = librosa.load(filename, sr=sample_rate, mono=True)
#audio = audio.flatten()
filter_banks, energies = fbank(audio, samplerate=sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
if use_logscale:
filter_banks = 20 * np.log10(np.maximum(filter_banks,1e-5))
if use_delta:
delta_1 = delta(filter_banks, N=1)
delta_2 = delta(delta_1, N=1)
filter_banks = normalize_frames(filter_banks, Scale=use_scale)
delta_1 = normalize_frames(delta_1, Scale=use_scale)
delta_2 = normalize_frames(delta_2, Scale=use_scale)
frames_features = np.hstack([filter_banks, delta_1, delta_2])
else:
filter_banks = normalize_frames(filter_banks, Scale=use_scale)
frames_features = filter_banks
np.save(filename.replace('.wav', '.npy'),frames_features)
return
示例4: pre_process_inputs
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def pre_process_inputs(signal=np.random.uniform(size=32000), target_sample_rate=8000,use_delta = c.USE_DELTA):
filter_banks, energies = fbank(signal, samplerate=target_sample_rate, nfilt=c.FILTER_BANK, winlen=0.025)
delta_1 = delta(filter_banks, N=1)
delta_2 = delta(delta_1, N=1)
filter_banks = normalize_frames(filter_banks)
delta_1 = normalize_frames(delta_1)
delta_2 = normalize_frames(delta_2)
if use_delta:
frames_features = np.hstack([filter_banks, delta_1, delta_2])
else:
frames_features = filter_banks
num_frames = len(frames_features)
network_inputs = []
"""Too complicated
for j in range(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME):
frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
#network_inputs.append(np.reshape(frames_slice, (32, 20, 3)))
network_inputs.append(frames_slice)
"""
import random
j = random.randrange(c.NUM_PREVIOUS_FRAME, num_frames - c.NUM_NEXT_FRAME)
frames_slice = frames_features[j - c.NUM_PREVIOUS_FRAME:j + c.NUM_NEXT_FRAME]
network_inputs.append(frames_slice)
return np.array(network_inputs)
示例5: SpeechFeaturesPreprocessor
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def SpeechFeaturesPreprocessor(feature_type: str = "mfcc",
delta_order: int = 0,
delta_window: int = 2,
**kwargs) -> Callable:
"""Calculate speech features.
First, the given type of features (e.g. MFCC) is computed using a window
of length `winlen` and step `winstep`; for additional keyword arguments
(specific to each feature type), see
http://python-speech-features.readthedocs.io/. Then, delta features up to
`delta_order` are added.
By default, 13 MFCCs per frame are computed. To add delta and delta-delta
features (resulting in 39 coefficients per frame), set `delta_order=2`.
Arguments:
feature_type: mfcc, fbank, logfbank or ssc (default is mfcc)
delta_order: maximum order of the delta features (default is 0)
delta_window: window size for delta features (default is 2)
**kwargs: keyword arguments for the appropriate function from
python_speech_features
Returns:
A numpy array of shape [num_frames, num_features].
"""
if feature_type not in FEATURE_TYPES:
raise ValueError(
"Unknown speech feature type '{}'".format(feature_type))
def preprocess(audio: Audio) -> np.ndarray:
features = [FEATURE_TYPES[feature_type](
audio.data, samplerate=audio.rate, **kwargs)]
for _ in range(delta_order):
features.append(delta(features[-1], delta_window))
return np.concatenate(features, axis=1)
return preprocess
示例6: _fbank
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def _fbank(*args, **kwargs) -> np.ndarray:
feat, _ = fbank(*args, **kwargs)
return feat
示例7: mfcc_fbank
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import fbank [as 別名]
def mfcc_fbank(signal: np.array, sample_rate: int): # 1D signal array.
# Returns MFCC with shape (num_frames, n_filters, 3).
filter_banks, energies = fbank(signal, samplerate=sample_rate, nfilt=NUM_FBANKS)
frames_features = normalize_frames(filter_banks)
# delta_1 = delta(filter_banks, N=1)
# delta_2 = delta(delta_1, N=1)
# frames_features = np.transpose(np.stack([filter_banks, delta_1, delta_2]), (1, 2, 0))
return np.array(frames_features, dtype=np.float32) # Float32 precision is enough here.