本文整理匯總了Python中python_speech_features.mfcc方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.mfcc方法的具體用法?Python python_speech_features.mfcc怎麽用?Python python_speech_features.mfcc使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類python_speech_features
的用法示例。
在下文中一共展示了python_speech_features.mfcc方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get_file_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def get_file_features(wav_fname, num_ceps):
"""
Extract mfcc features from a file.
"""
# read wave
fs, sig = scipy.io.wavfile.read(wav_fname)
# get mfccs
mfccs = psf.mfcc(sig, samplerate=fs, winlen=0.025, winstep=0.01,
numcep=num_ceps, nfilt=26, nfft=512, lowfreq=0,
highfreq=None, preemph=0.97, ceplifter=22,
appendEnergy=False)
# compute mfcc means
mfcc_means = np.round(mfccs.mean(axis=0), 3)
return mfcc_means
示例2: _mfcc_and_labels
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def _mfcc_and_labels(audio, labels):
""" Convert to MFCC features and corresponding (interpolated) labels.
Returns:
A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
array, both with the same shape.
"""
mfcc_sample_rate = 100.0
winfunc = lambda x: np.hamming(x)
mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
mfcc_labels = interp_func(t_mfcc)
return mfcc_features, mfcc_labels
示例3: export_test_mfcc
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def export_test_mfcc(self):
# this is used to export data e.g. into iOS
testset = next(self.next_batch())[0]
mfcc = testset['the_input'][0:self.batch_size] ## export all mfcc's in batch #26 x 29 ?
words = testset['source_str'][0:self.batch_size]
labels = testset['the_labels'][0:self.batch_size]
print("exporting:", type(mfcc))
print(mfcc.shape)
print(words.shape)
print(labels.shape)
# we save each mfcc/words/label as it's own csv file
for i in range(0, mfcc.shape[0]):
np.savetxt('./Archive/test_spectro/test_spectro_{}.csv'.format(i), mfcc[i, :, :], delimiter=',')
print(words)
print(labels)
return
示例4: audiofile_to_input_vector
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
r"""
Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
context frames to the left and right of each time step, and returns this data
in a numpy array.
"""
# Load wav files
fs, audio = wav.read(audio_filename)
# Get mfcc coefficients
features = mfcc(audio, samplerate=fs, numcep=numcep, winlen=0.032, winstep=0.02, winfunc=np.hamming)
# Add empty initial and final contexts
empty_context = np.zeros((numcontext, numcep), dtype=features.dtype)
features = np.concatenate((empty_context, features, empty_context))
return features
示例5: convert_inputs_to_ctc_format
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def convert_inputs_to_ctc_format(audio, fs, target_text, num_features):
# print(target_text)
inputs = mfcc(audio, samplerate=fs, numcep=num_features)
# Transform in 3D array
train_inputs = np.asarray(inputs[np.newaxis, :])
train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
train_seq_len = [train_inputs.shape[1]]
# Get only the words between [a-z] and replace period for none
original = ' '.join(target_text.strip().lower().split(' ')).replace('.', '').replace('?', '').replace(',',
'').replace(
"'", '').replace('!', '').replace('-', '')
# print(original)
targets = original.replace(' ', ' ')
targets = targets.split(' ')
# Adding blank label
targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])
# Transform char into index
targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
for x in targets])
return train_inputs, targets, train_seq_len, original
示例6: extract_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_features(folder, num_ceps, fname, augmented=False):
"""
Extract features from files.
"""
# collect paths to wave files
wave_fnames = [os.path.join(root, file)
for root, dirs, files in os.walk(folder) for file in files]
# init features & errors and column names
features = []
errors_caused = []
# in case augmented data is processed
if augmented: wave_fnames = [fname for fname in wave_fnames if "augment" in fname]
else : wave_fnames = [fname for fname in wave_fnames if "augment" not in fname]
# get voice features
for wave_fname in wave_fnames[:]:
try:
feats = get_file_features(wave_fname, num_ceps)
features.append([wave_fname] + [x for x in list(feats)] + [wave_fname.split("/")[-2]])
except:
print("Error: error occured when processing ", wave_fname)
errors_caused.append(wave_fname)
# define column names for csv
column_names = ["file_name"] + ["mfcc" + str(i) for i in range(num_ceps)] + ["emotion"]
# export results to file
data = pd.DataFrame(features, columns=column_names)
data.to_csv(fname)
return errors_caused
示例7: extract_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_features(audio,rate):
"""extract 20 dim mfcc features from an audio, performs CMS and combines
delta to make it 40 dim feature vector"""
mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
delta = calculate_delta(mfcc_feature)
combined = np.hstack((mfcc_feature,delta))
return combined
示例8: get_feature
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def get_feature(fs, signal):
mfcc_feature = mfcc(signal, fs)
if len(mfcc_feature) == 0:
print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(signal)
return mfcc_feature
示例9: extract_energy
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_energy(rate, sig):
""" Extracts the energy of frames. """
mfcc = python_speech_features.mfcc(sig, rate, appendEnergy=True)
energy_row_vec = mfcc[:, 0]
energy_col_vec = energy_row_vec[:, np.newaxis]
return energy_col_vec
示例10: mfcc
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc(wav_path):
""" Grabs MFCC features with energy and derivates. """
(rate, sig) = wav.read(wav_path)
feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
delta_feat = python_speech_features.delta(feat, 2)
all_feats = [feat, delta_feat]
all_feats = np.array(all_feats)
# Make time the first dimension for easy length normalization padding later.
all_feats = np.swapaxes(all_feats, 0, 1)
all_feats = np.swapaxes(all_feats, 1, 2)
feat_fn = wav_path[:-3] + "mfcc13_d.npy"
np.save(feat_fn, all_feats)
示例11: load
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def load(data_dir=DEFAULT_DATA_DIR, mfcc=True):
""" Load all standardized TIMIT data with folded phoneme labels.
Args:
data_dir: A string. The data directory.
mfcc: A boolean. If True, return MFCC sequences and their corresponding
label sequences. Otherwise, return raw audio sequences in their
associated label sequences.
Returns:
A tuple with 6 elements: train inputs, train labels, val inputs,
val labels, test inputs, test labels. Each entry is a list of sequences.
All input sequences are 2-D float arrays with shape
`[length, values_per_step]` and all label sequences are 1-D int8 arrays
with shape `[length]`.
"""
types = ['mfcc', 'mfcc_labels'] if mfcc else ['audio', 'labels']
ret = []
for name in ['train', 'val', 'test']:
for type in types:
path = os.path.join(data_dir, name + '_' + type + '.npy')
if not os.path.exists(path):
raise ValueError('Data not found in %s. Run timit.py and timitphonemerec.py.' % data_dir)
data = np.load(path)
if type == 'audio':
data = [seq[:, np.newaxis] for seq in data]
ret.append(data)
return tuple(ret)
示例12: load_split
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def load_split(data_dir=DEFAULT_DATA_DIR, val=True, mfcc=True, normalize=True):
""" Load a standardized-TIMIT train, test split.
Args:
data_dir: A string. The data directory.
val: A boolean. If True, return the validation set as the test set.
mfcc: A boolean. If True, return MFCC sequences and their corresponding
label Otherwise, return raw audio sequences in their associated
label sequences.
normalize: A boolean. If True, normalize each sequence individually by
centering / scaling.
Returns:
A tuple, `(train_inputs, train_labels, test_inputs, test_labels)`. Each is
a list of sequences. All inputs are 2-D float arrays with shape
`[length, values_per_step]` and all labels are 1-D int8 arrays with shape
`[length]`.
"""
sequence_lists = load(data_dir=data_dir, mfcc=mfcc)
train_inputs, train_labels, val_inputs, val_labels, test_inputs, test_labels = sequence_lists
if val:
test_inputs = val_inputs
test_labels = val_labels
if normalize:
train_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in train_inputs]
train_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in train_inputs]
test_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in test_inputs]
test_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in test_inputs]
return train_inputs, train_labels, test_inputs, test_labels
示例13: mfcc_features_extraction
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc_features_extraction(wav):
inputWav,wav = readWavFile(wav)
print inputWav
rate,signal = wavv.read(inputWav)
mfcc_features = mfcc(signal,rate)
return mfcc_features,wav
示例14: mfcc_features_extraction
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc_features_extraction(wav):
inputWav,wav = readWavFile(wav)
rate,signal = wavv.read(inputWav)
mfcc_features = mfcc(signal,rate)
#n numpy array with size of the number of frames , each row has one feature vector
return mfcc_features,wav
示例15: mean_features
# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mean_features(mfcc_features,wav):
#make a numpy array with length the number of mfcc features
mean_features=np.zeros(len(mfcc_features[0]))
#for one input take the sum of all frames in a specific feature and divide them with the number of frames
for x in range(len(mfcc_features)):
for y in range(len(mfcc_features[x])):
mean_features[y]+=mfcc_features[x][y]
mean_features = (mean_features / len(mfcc_features))
print mean_features
writeFeatures(mean_features,wav)