当前位置: 首页>>代码示例>>Python>>正文


Python python_speech_features.mfcc方法代码示例

本文整理汇总了Python中python_speech_features.mfcc方法的典型用法代码示例。如果您正苦于以下问题:Python python_speech_features.mfcc方法的具体用法?Python python_speech_features.mfcc怎么用?Python python_speech_features.mfcc使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在python_speech_features的用法示例。


在下文中一共展示了python_speech_features.mfcc方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_file_features

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def get_file_features(wav_fname, num_ceps):
    """
    Extract mfcc features from a file.
    """
    # read wave
    fs, sig = scipy.io.wavfile.read(wav_fname)

    # get mfccs
    mfccs = psf.mfcc(sig, samplerate=fs, winlen=0.025, winstep=0.01,
                     numcep=num_ceps, nfilt=26, nfft=512, lowfreq=0,
                     highfreq=None, preemph=0.97, ceplifter=22,
                     appendEnergy=False)

    # compute mfcc means
    mfcc_means = np.round(mfccs.mean(axis=0), 3)
    return mfcc_means 
开发者ID:SuperKogito,项目名称:pydiogment,代码行数:18,代码来源:_3_get_features.py

示例2: _mfcc_and_labels

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def _mfcc_and_labels(audio, labels):
  """ Convert to MFCC features and corresponding (interpolated) labels.

  Returns:
    A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
      array, both with the same shape.
  """
  mfcc_sample_rate = 100.0
  winfunc = lambda x: np.hamming(x)
  mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
                                              winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
                                              highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
  t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
  t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
  interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
  mfcc_labels = interp_func(t_mfcc)
  return mfcc_features, mfcc_labels 
开发者ID:rdipietro,项目名称:mist-rnns,代码行数:19,代码来源:timitphonemerec.py

示例3: export_test_mfcc

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def export_test_mfcc(self):
        # this is used to export data e.g. into iOS

        testset = next(self.next_batch())[0]
        mfcc = testset['the_input'][0:self.batch_size]  ## export all mfcc's in batch #26 x 29 ?
        words = testset['source_str'][0:self.batch_size]
        labels = testset['the_labels'][0:self.batch_size]

        print("exporting:", type(mfcc))
        print(mfcc.shape)
        print(words.shape)
        print(labels.shape)

        # we save each mfcc/words/label as it's own csv file
        for i in range(0, mfcc.shape[0]):
            np.savetxt('./Archive/test_spectro/test_spectro_{}.csv'.format(i), mfcc[i, :, :], delimiter=',')

        print(words)
        print(labels)

        return 
开发者ID:robmsmt,项目名称:KerasDeepSpeech,代码行数:23,代码来源:generator.py

示例4: audiofile_to_input_vector

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    r"""
    Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
    at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
    context frames to the left and right of each time step, and returns this data
    in a numpy array.
    """
    # Load wav files
    fs, audio = wav.read(audio_filename)

    # Get mfcc coefficients
    features = mfcc(audio, samplerate=fs, numcep=numcep, winlen=0.032, winstep=0.02, winfunc=np.hamming)

    # Add empty initial and final contexts
    empty_context = np.zeros((numcontext, numcep), dtype=features.dtype)
    features = np.concatenate((empty_context, features, empty_context))

    return features 
开发者ID:AASHISHAG,项目名称:deepspeech-german,代码行数:20,代码来源:audio.py

示例5: convert_inputs_to_ctc_format

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def convert_inputs_to_ctc_format(audio, fs, target_text, num_features):
    # print(target_text)
    inputs = mfcc(audio, samplerate=fs, numcep=num_features)
    # Transform in 3D array
    train_inputs = np.asarray(inputs[np.newaxis, :])
    train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
    train_seq_len = [train_inputs.shape[1]]

    # Get only the words between [a-z] and replace period for none
    original = ' '.join(target_text.strip().lower().split(' ')).replace('.', '').replace('?', '').replace(',',
                                                                                                          '').replace(
        "'", '').replace('!', '').replace('-', '')
    # print(original)
    targets = original.replace(' ', '  ')
    targets = targets.split(' ')

    # Adding blank label
    targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])

    # Transform char into index
    targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
                          for x in targets])

    return train_inputs, targets, train_seq_len, original 
开发者ID:philipperemy,项目名称:tensorflow-ctc-speech-recognition,代码行数:26,代码来源:utils.py

示例6: extract_features

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def extract_features(folder, num_ceps, fname, augmented=False):
    """
    Extract features from files.
    """
    # collect paths to wave files
    wave_fnames = [os.path.join(root, file)
                   for root, dirs, files in os.walk(folder)  for file in files]

    # init features & errors and column names
    features = []
    errors_caused = []

    # in case augmented data is processed
    if augmented: wave_fnames = [fname for fname in wave_fnames if "augment" in fname]
    else        : wave_fnames = [fname for fname in wave_fnames if "augment" not in fname]

    # get voice features
    for wave_fname in wave_fnames[:]:
        try:
            feats = get_file_features(wave_fname, num_ceps)
            features.append([wave_fname] + [x for x in list(feats)] + [wave_fname.split("/")[-2]])
        except:
            print("Error: error occured when processing ", wave_fname)
            errors_caused.append(wave_fname)

    # define column names for csv
    column_names = ["file_name"] + ["mfcc" + str(i) for i in range(num_ceps)] + ["emotion"]

    # export results to file
    data = pd.DataFrame(features, columns=column_names)
    data.to_csv(fname)
    return errors_caused 
开发者ID:SuperKogito,项目名称:pydiogment,代码行数:34,代码来源:_3_get_features.py

示例7: extract_features

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def extract_features(audio,rate):
    """extract 20 dim mfcc features from an audio, performs CMS and combines 
    delta to make it 40 dim feature vector"""    
    
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined 
开发者ID:Atul-Anand-Jha,项目名称:Speaker-Identification-Python,代码行数:11,代码来源:featureextraction.py

示例8: get_feature

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def get_feature(fs, signal):
    mfcc_feature = mfcc(signal, fs)
    if len(mfcc_feature) == 0:
        print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(signal)
    return mfcc_feature 
开发者ID:crouchred,项目名称:speaker-recognition-py3,代码行数:7,代码来源:features.py

示例9: extract_energy

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def extract_energy(rate, sig):
    """ Extracts the energy of frames. """

    mfcc = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    energy_row_vec = mfcc[:, 0]
    energy_col_vec = energy_row_vec[:, np.newaxis]
    return energy_col_vec 
开发者ID:persephone-tools,项目名称:persephone,代码行数:9,代码来源:feat_extract.py

示例10: mfcc

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def mfcc(wav_path):
    """ Grabs MFCC features with energy and derivates. """

    (rate, sig) = wav.read(wav_path)
    feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    delta_feat = python_speech_features.delta(feat, 2)
    all_feats = [feat, delta_feat]
    all_feats = np.array(all_feats)
    # Make time the first dimension for easy length normalization padding later.
    all_feats = np.swapaxes(all_feats, 0, 1)
    all_feats = np.swapaxes(all_feats, 1, 2)

    feat_fn = wav_path[:-3] + "mfcc13_d.npy"
    np.save(feat_fn, all_feats) 
开发者ID:persephone-tools,项目名称:persephone,代码行数:16,代码来源:feat_extract.py

示例11: load

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def load(data_dir=DEFAULT_DATA_DIR, mfcc=True):
  """ Load all standardized TIMIT data with folded phoneme labels.

  Args:
    data_dir: A string. The data directory.
    mfcc: A boolean. If True, return MFCC sequences and their corresponding
      label sequences. Otherwise, return raw audio sequences in their
      associated label sequences.

  Returns:
    A tuple with 6 elements: train inputs, train labels, val inputs,
    val labels, test inputs, test labels. Each entry is a list of sequences.
    All input sequences are 2-D float arrays with shape
    `[length, values_per_step]` and all label sequences are 1-D int8 arrays
    with shape `[length]`.
  """
  types = ['mfcc', 'mfcc_labels'] if mfcc else ['audio', 'labels']
  ret = []
  for name in ['train', 'val', 'test']:
    for type in types:
      path = os.path.join(data_dir, name + '_' + type + '.npy')
      if not os.path.exists(path):
        raise ValueError('Data not found in %s. Run timit.py and timitphonemerec.py.' % data_dir)
      data = np.load(path)
      if type == 'audio':
        data = [seq[:, np.newaxis] for seq in data]
      ret.append(data)
  return tuple(ret) 
开发者ID:rdipietro,项目名称:mist-rnns,代码行数:30,代码来源:timitphonemerec.py

示例12: load_split

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def load_split(data_dir=DEFAULT_DATA_DIR, val=True, mfcc=True, normalize=True):
  """ Load a standardized-TIMIT train, test split.

  Args:
    data_dir: A string. The data directory.
    val: A boolean. If True, return the validation set as the test set.
    mfcc: A boolean. If True, return MFCC sequences and their corresponding
      label Otherwise, return raw audio sequences in their associated
      label sequences.
    normalize: A boolean. If True, normalize each sequence individually by
      centering / scaling.

  Returns:
    A tuple, `(train_inputs, train_labels, test_inputs, test_labels)`. Each is
    a list of sequences. All inputs are 2-D float arrays with shape
    `[length, values_per_step]` and all labels are 1-D int8 arrays with shape
    `[length]`.
  """
  sequence_lists = load(data_dir=data_dir, mfcc=mfcc)
  train_inputs, train_labels, val_inputs, val_labels, test_inputs, test_labels = sequence_lists
  if val:
    test_inputs = val_inputs
    test_labels = val_labels
  if normalize:
    train_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in train_inputs]
    train_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in train_inputs]
    test_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in test_inputs]
    test_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in test_inputs]
  return train_inputs, train_labels, test_inputs, test_labels 
开发者ID:rdipietro,项目名称:mist-rnns,代码行数:31,代码来源:timitphonemerec.py

示例13: mfcc_features_extraction

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def mfcc_features_extraction(wav):
	inputWav,wav = readWavFile(wav)
	print inputWav
	rate,signal = wavv.read(inputWav)
	mfcc_features = mfcc(signal,rate)
	return mfcc_features,wav 
开发者ID:gionanide,项目名称:Speech_Signal_Processing_and_Classification,代码行数:8,代码来源:mfcc_pca_feature.py

示例14: mfcc_features_extraction

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def mfcc_features_extraction(wav):
	inputWav,wav = readWavFile(wav)
	rate,signal = wavv.read(inputWav)
	mfcc_features = mfcc(signal,rate)
	#n numpy array with size of the number of frames , each row has one feature vector
	return mfcc_features,wav 
开发者ID:gionanide,项目名称:Speech_Signal_Processing_and_Classification,代码行数:8,代码来源:mfcc.py

示例15: mean_features

# 需要导入模块: import python_speech_features [as 别名]
# 或者: from python_speech_features import mfcc [as 别名]
def mean_features(mfcc_features,wav):
	#make a numpy array with length the number of mfcc features
	mean_features=np.zeros(len(mfcc_features[0]))
	#for one input take the sum of all frames in a specific feature and divide them with the number of frames
	for x in range(len(mfcc_features)):
		for y in range(len(mfcc_features[x])):
			mean_features[y]+=mfcc_features[x][y]
	mean_features = (mean_features / len(mfcc_features)) 
	print mean_features
	writeFeatures(mean_features,wav) 
开发者ID:gionanide,项目名称:Speech_Signal_Processing_and_Classification,代码行数:12,代码来源:mfcc.py


注:本文中的python_speech_features.mfcc方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。