當前位置: 首頁>>代碼示例>>Python>>正文


Python python_speech_features.mfcc方法代碼示例

本文整理匯總了Python中python_speech_features.mfcc方法的典型用法代碼示例。如果您正苦於以下問題:Python python_speech_features.mfcc方法的具體用法?Python python_speech_features.mfcc怎麽用?Python python_speech_features.mfcc使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在python_speech_features的用法示例。


在下文中一共展示了python_speech_features.mfcc方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get_file_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def get_file_features(wav_fname, num_ceps):
    """
    Extract mfcc features from a file.
    """
    # read wave
    fs, sig = scipy.io.wavfile.read(wav_fname)

    # get mfccs
    mfccs = psf.mfcc(sig, samplerate=fs, winlen=0.025, winstep=0.01,
                     numcep=num_ceps, nfilt=26, nfft=512, lowfreq=0,
                     highfreq=None, preemph=0.97, ceplifter=22,
                     appendEnergy=False)

    # compute mfcc means
    mfcc_means = np.round(mfccs.mean(axis=0), 3)
    return mfcc_means 
開發者ID:SuperKogito,項目名稱:pydiogment,代碼行數:18,代碼來源:_3_get_features.py

示例2: _mfcc_and_labels

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def _mfcc_and_labels(audio, labels):
  """ Convert to MFCC features and corresponding (interpolated) labels.

  Returns:
    A tuple, `(mfcc_features, mfcc_labels)`. A 1-D float array and a 1-D int
      array, both with the same shape.
  """
  mfcc_sample_rate = 100.0
  winfunc = lambda x: np.hamming(x)
  mfcc_features = python_speech_features.mfcc(audio, samplerate=timit.SAMPLE_RATE, winlen=0.025,
                                              winstep=1.0/mfcc_sample_rate, lowfreq=85.0,
                                              highfreq=timit.SAMPLE_RATE/2, winfunc=winfunc)
  t_audio = np.linspace(0.0, audio.shape[0] * 1.0 / timit.SAMPLE_RATE, audio.size, endpoint=False)
  t_mfcc = np.linspace(0.0, mfcc_features.shape[0] * 1.0 / mfcc_sample_rate, mfcc_features.shape[0], endpoint=False)
  interp_func = scipy.interpolate.interp1d(t_audio, labels, kind='nearest')
  mfcc_labels = interp_func(t_mfcc)
  return mfcc_features, mfcc_labels 
開發者ID:rdipietro,項目名稱:mist-rnns,代碼行數:19,代碼來源:timitphonemerec.py

示例3: export_test_mfcc

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def export_test_mfcc(self):
        # this is used to export data e.g. into iOS

        testset = next(self.next_batch())[0]
        mfcc = testset['the_input'][0:self.batch_size]  ## export all mfcc's in batch #26 x 29 ?
        words = testset['source_str'][0:self.batch_size]
        labels = testset['the_labels'][0:self.batch_size]

        print("exporting:", type(mfcc))
        print(mfcc.shape)
        print(words.shape)
        print(labels.shape)

        # we save each mfcc/words/label as it's own csv file
        for i in range(0, mfcc.shape[0]):
            np.savetxt('./Archive/test_spectro/test_spectro_{}.csv'.format(i), mfcc[i, :, :], delimiter=',')

        print(words)
        print(labels)

        return 
開發者ID:robmsmt,項目名稱:KerasDeepSpeech,代碼行數:23,代碼來源:generator.py

示例4: audiofile_to_input_vector

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def audiofile_to_input_vector(audio_filename, numcep, numcontext):
    r"""
    Given a WAV audio file at ``audio_filename``, calculates ``numcep`` MFCC features
    at every 0.01s time step with a window length of 0.025s. Appends ``numcontext``
    context frames to the left and right of each time step, and returns this data
    in a numpy array.
    """
    # Load wav files
    fs, audio = wav.read(audio_filename)

    # Get mfcc coefficients
    features = mfcc(audio, samplerate=fs, numcep=numcep, winlen=0.032, winstep=0.02, winfunc=np.hamming)

    # Add empty initial and final contexts
    empty_context = np.zeros((numcontext, numcep), dtype=features.dtype)
    features = np.concatenate((empty_context, features, empty_context))

    return features 
開發者ID:AASHISHAG,項目名稱:deepspeech-german,代碼行數:20,代碼來源:audio.py

示例5: convert_inputs_to_ctc_format

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def convert_inputs_to_ctc_format(audio, fs, target_text, num_features):
    # print(target_text)
    inputs = mfcc(audio, samplerate=fs, numcep=num_features)
    # Transform in 3D array
    train_inputs = np.asarray(inputs[np.newaxis, :])
    train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
    train_seq_len = [train_inputs.shape[1]]

    # Get only the words between [a-z] and replace period for none
    original = ' '.join(target_text.strip().lower().split(' ')).replace('.', '').replace('?', '').replace(',',
                                                                                                          '').replace(
        "'", '').replace('!', '').replace('-', '')
    # print(original)
    targets = original.replace(' ', '  ')
    targets = targets.split(' ')

    # Adding blank label
    targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])

    # Transform char into index
    targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
                          for x in targets])

    return train_inputs, targets, train_seq_len, original 
開發者ID:philipperemy,項目名稱:tensorflow-ctc-speech-recognition,代碼行數:26,代碼來源:utils.py

示例6: extract_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_features(folder, num_ceps, fname, augmented=False):
    """
    Extract features from files.
    """
    # collect paths to wave files
    wave_fnames = [os.path.join(root, file)
                   for root, dirs, files in os.walk(folder)  for file in files]

    # init features & errors and column names
    features = []
    errors_caused = []

    # in case augmented data is processed
    if augmented: wave_fnames = [fname for fname in wave_fnames if "augment" in fname]
    else        : wave_fnames = [fname for fname in wave_fnames if "augment" not in fname]

    # get voice features
    for wave_fname in wave_fnames[:]:
        try:
            feats = get_file_features(wave_fname, num_ceps)
            features.append([wave_fname] + [x for x in list(feats)] + [wave_fname.split("/")[-2]])
        except:
            print("Error: error occured when processing ", wave_fname)
            errors_caused.append(wave_fname)

    # define column names for csv
    column_names = ["file_name"] + ["mfcc" + str(i) for i in range(num_ceps)] + ["emotion"]

    # export results to file
    data = pd.DataFrame(features, columns=column_names)
    data.to_csv(fname)
    return errors_caused 
開發者ID:SuperKogito,項目名稱:pydiogment,代碼行數:34,代碼來源:_3_get_features.py

示例7: extract_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_features(audio,rate):
    """extract 20 dim mfcc features from an audio, performs CMS and combines 
    delta to make it 40 dim feature vector"""    
    
    mfcc_feature = mfcc.mfcc(audio,rate, 0.025, 0.01,20,nfft = 1200, appendEnergy = True)    
    mfcc_feature = preprocessing.scale(mfcc_feature)
    delta = calculate_delta(mfcc_feature)
    combined = np.hstack((mfcc_feature,delta)) 
    return combined 
開發者ID:Atul-Anand-Jha,項目名稱:Speaker-Identification-Python,代碼行數:11,代碼來源:featureextraction.py

示例8: get_feature

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def get_feature(fs, signal):
    mfcc_feature = mfcc(signal, fs)
    if len(mfcc_feature) == 0:
        print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(signal)
    return mfcc_feature 
開發者ID:crouchred,項目名稱:speaker-recognition-py3,代碼行數:7,代碼來源:features.py

示例9: extract_energy

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def extract_energy(rate, sig):
    """ Extracts the energy of frames. """

    mfcc = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    energy_row_vec = mfcc[:, 0]
    energy_col_vec = energy_row_vec[:, np.newaxis]
    return energy_col_vec 
開發者ID:persephone-tools,項目名稱:persephone,代碼行數:9,代碼來源:feat_extract.py

示例10: mfcc

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc(wav_path):
    """ Grabs MFCC features with energy and derivates. """

    (rate, sig) = wav.read(wav_path)
    feat = python_speech_features.mfcc(sig, rate, appendEnergy=True)
    delta_feat = python_speech_features.delta(feat, 2)
    all_feats = [feat, delta_feat]
    all_feats = np.array(all_feats)
    # Make time the first dimension for easy length normalization padding later.
    all_feats = np.swapaxes(all_feats, 0, 1)
    all_feats = np.swapaxes(all_feats, 1, 2)

    feat_fn = wav_path[:-3] + "mfcc13_d.npy"
    np.save(feat_fn, all_feats) 
開發者ID:persephone-tools,項目名稱:persephone,代碼行數:16,代碼來源:feat_extract.py

示例11: load

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def load(data_dir=DEFAULT_DATA_DIR, mfcc=True):
  """ Load all standardized TIMIT data with folded phoneme labels.

  Args:
    data_dir: A string. The data directory.
    mfcc: A boolean. If True, return MFCC sequences and their corresponding
      label sequences. Otherwise, return raw audio sequences in their
      associated label sequences.

  Returns:
    A tuple with 6 elements: train inputs, train labels, val inputs,
    val labels, test inputs, test labels. Each entry is a list of sequences.
    All input sequences are 2-D float arrays with shape
    `[length, values_per_step]` and all label sequences are 1-D int8 arrays
    with shape `[length]`.
  """
  types = ['mfcc', 'mfcc_labels'] if mfcc else ['audio', 'labels']
  ret = []
  for name in ['train', 'val', 'test']:
    for type in types:
      path = os.path.join(data_dir, name + '_' + type + '.npy')
      if not os.path.exists(path):
        raise ValueError('Data not found in %s. Run timit.py and timitphonemerec.py.' % data_dir)
      data = np.load(path)
      if type == 'audio':
        data = [seq[:, np.newaxis] for seq in data]
      ret.append(data)
  return tuple(ret) 
開發者ID:rdipietro,項目名稱:mist-rnns,代碼行數:30,代碼來源:timitphonemerec.py

示例12: load_split

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def load_split(data_dir=DEFAULT_DATA_DIR, val=True, mfcc=True, normalize=True):
  """ Load a standardized-TIMIT train, test split.

  Args:
    data_dir: A string. The data directory.
    val: A boolean. If True, return the validation set as the test set.
    mfcc: A boolean. If True, return MFCC sequences and their corresponding
      label Otherwise, return raw audio sequences in their associated
      label sequences.
    normalize: A boolean. If True, normalize each sequence individually by
      centering / scaling.

  Returns:
    A tuple, `(train_inputs, train_labels, test_inputs, test_labels)`. Each is
    a list of sequences. All inputs are 2-D float arrays with shape
    `[length, values_per_step]` and all labels are 1-D int8 arrays with shape
    `[length]`.
  """
  sequence_lists = load(data_dir=data_dir, mfcc=mfcc)
  train_inputs, train_labels, val_inputs, val_labels, test_inputs, test_labels = sequence_lists
  if val:
    test_inputs = val_inputs
    test_labels = val_labels
  if normalize:
    train_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in train_inputs]
    train_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in train_inputs]
    test_inputs = [seq - np.mean(seq, axis=0, keepdims=True) for seq in test_inputs]
    test_inputs = [seq / np.std(seq, axis=0, keepdims=True) for seq in test_inputs]
  return train_inputs, train_labels, test_inputs, test_labels 
開發者ID:rdipietro,項目名稱:mist-rnns,代碼行數:31,代碼來源:timitphonemerec.py

示例13: mfcc_features_extraction

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc_features_extraction(wav):
	inputWav,wav = readWavFile(wav)
	print inputWav
	rate,signal = wavv.read(inputWav)
	mfcc_features = mfcc(signal,rate)
	return mfcc_features,wav 
開發者ID:gionanide,項目名稱:Speech_Signal_Processing_and_Classification,代碼行數:8,代碼來源:mfcc_pca_feature.py

示例14: mfcc_features_extraction

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mfcc_features_extraction(wav):
	inputWav,wav = readWavFile(wav)
	rate,signal = wavv.read(inputWav)
	mfcc_features = mfcc(signal,rate)
	#n numpy array with size of the number of frames , each row has one feature vector
	return mfcc_features,wav 
開發者ID:gionanide,項目名稱:Speech_Signal_Processing_and_Classification,代碼行數:8,代碼來源:mfcc.py

示例15: mean_features

# 需要導入模塊: import python_speech_features [as 別名]
# 或者: from python_speech_features import mfcc [as 別名]
def mean_features(mfcc_features,wav):
	#make a numpy array with length the number of mfcc features
	mean_features=np.zeros(len(mfcc_features[0]))
	#for one input take the sum of all frames in a specific feature and divide them with the number of frames
	for x in range(len(mfcc_features)):
		for y in range(len(mfcc_features[x])):
			mean_features[y]+=mfcc_features[x][y]
	mean_features = (mean_features / len(mfcc_features)) 
	print mean_features
	writeFeatures(mean_features,wav) 
開發者ID:gionanide,項目名稱:Speech_Signal_Processing_and_Classification,代碼行數:12,代碼來源:mfcc.py


注:本文中的python_speech_features.mfcc方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。