本文整理汇总了Python中librosa.logamplitude方法的典型用法代码示例。如果您正苦于以下问题:Python librosa.logamplitude方法的具体用法?Python librosa.logamplitude怎么用?Python librosa.logamplitude使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类librosa
的用法示例。
在下文中一共展示了librosa.logamplitude方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __extract_melspec
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def __extract_melspec(audio_fpath, audio_fname):
"""
Using librosa to calculate log mel spectrogram values
and scipy.misc to draw and store them (in grayscale).
:param audio_fpath:
:param audio_fname:
:return:
"""
# Load sound file
y, sr = librosa.load(audio_fpath, sr=12000)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
spectr_fname = audio_fname + '.png'
subdir_path = __get_subdir(spectr_fname)
# Draw log values matrix in grayscale
scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))
示例2: process_file
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def process_file(self, file_path):
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
src, sr = librosa.load(file_path, sr=SR) # whole signal
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
mel_spectrogram = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS) ** 2,
ref_power=1.0)
mel_spectrogram = np.expand_dims(mel_spectrogram, -1)
# for 10secs shape (96, 469, 1)
return mel_spectrogram
示例3: plot_spectrum
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def plot_spectrum(sound_files, sound_names):
"""plot log power spectrum"""
i = 1
fig = plt.figure(figsize=(20, 64))
for f, n in zip(sound_files, sound_names):
y, sr = librosa.load(os.path.join('./data/esc10/audio/', f))
plt.subplot(10, 1, i)
D = librosa.logamplitude(np.abs(librosa.stft(y)) ** 2, ref_power=np.max)
librosa.display.specshow(D, sr=sr, y_axis='log')
plt.title(n + ' - ' + 'Spectrum')
i += 1
plt.tight_layout(pad=10)
plt.show()
示例4: compute_spectrograms
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def compute_spectrograms(filename):
out_rate = 22050
frames, rate = librosa.load(filename, sr=out_rate, mono=True)
if len(frames) < out_rate:
# if less then 1 second - can't process
raise Exception("Audio duration is too short")
normalized_audio = _normalize(frames)
melspectr = librosa.feature.melspectrogram(y=normalized_audio, sr=out_rate, n_mels=N_MEL_BANDS, fmax=out_rate/2)
logmelspectr = librosa.logamplitude(melspectr**2, ref_power=1.0)
# now going through spectrogram with the stride of the segment duration
for start_idx in range(0, logmelspectr.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
yield logmelspectr[:, start_idx:start_idx + SEGMENT_DUR]
示例5: __extract_hpss_melspec
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def __extract_hpss_melspec(audio_fpath, audio_fname):
"""
Extension of :func:`__extract_melspec`.
Not used as it's about ten times slower, but
if you have resources, try it out.
:param audio_fpath:
:param audio_fname:
:return:
"""
y, sr = librosa.load(audio_fpath, sr=44100)
# Harmonic-percussive source separation
y_harmonic, y_percussive = librosa.effects.hpss(y)
S_h = librosa.feature.melspectrogram(y_harmonic, sr=sr, n_mels=128)
S_p = librosa.feature.melspectrogram(y_percussive, sr=sr, n_mels=128)
log_S_h = librosa.logamplitude(S_h, ref_power=np.max)
log_S_p = librosa.logamplitude(S_p, ref_power=np.max)
spectr_fname_h = (audio_fname + '_h.png')
spectr_fname_p = (audio_fname + '_p.png')
subdir_path = __get_subdir(audio_fname)
scipy.misc.toimage(log_S_h).save(subdir_path.format(spectr_fname_h))
scipy.misc.toimage(log_S_p).save(subdir_path.format(spectr_fname_p))
示例6: plotSpectrogram
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def plotSpectrogram(self, mels=512, maxfreq=30000):
#Plot the Mel power-scaled frequency spectrum, with any factor of 128 frequency bins and 512 frames (frame default)
mel = librosa.feature.melspectrogram(y=self.wav, sr=self.samplefreq, n_mels=mels, fmax=maxfreq)
librosa.display.specshow(librosa.logamplitude(mel, ref_power=np.max), y_axis='mel', fmax=maxfreq, x_axis='time')
plt.colorbar(format='%+2.0f dB')
plt.title('Mel Power-Scaled Frequency Spectrogram')
plt.tight_layout()
plt.show()
return mel
开发者ID:nlinc1905,项目名称:Convolutional-Autoencoder-Music-Similarity,代码行数:11,代码来源:02_wav_features_and_spectrogram.py
示例7: readFile
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def readFile(filenbr):
#Load data as array, noting that the log amplitude must be taken to scale the values
spec = librosa.logamplitude(np.loadtxt(str(filenbr) + '.csv', delimiter=','), ref_power=np.max)
x_train = spec.astype('float32') / 255.
x_train = np.reshape(x_train, (512, 2584, 1))
#Test data will be the same as training data
return x_train
开发者ID:nlinc1905,项目名称:Convolutional-Autoencoder-Music-Similarity,代码行数:9,代码来源:03_autoencoding_and_tsne.py
示例8: compute_melgram
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def compute_melgram(audio_path):
''' Compute a mel-spectrogram and returns it in a shape of (1,1,96,1366), where
96 == #mel-bins and 1366 == #time frame
parameters
----------
audio_path: path for the audio file.
Any format supported by audioread will work.
More info: http://librosa.github.io/librosa/generated/librosa.core.load.html#librosa.core.load
'''
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
DURA = 29.12 # to make it 1366 frame..
src, sr = librosa.load(audio_path, sr=SR) # whole signal
n_sample = src.shape[0]
n_sample_fit = int(DURA*SR)
if n_sample < n_sample_fit: # if too short
src = np.hstack((src, np.zeros((int(DURA*SR) - n_sample,))))
elif n_sample > n_sample_fit: # if too long
src = src[(n_sample-n_sample_fit)/2:(n_sample+n_sample_fit)/2]
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
ret = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS)**2,
ref_power=1.0)
ret = ret[np.newaxis, np.newaxis, :]
return ret
示例9: preprocess_dataset
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def preprocess_dataset(inpath="Samples/", outpath="Preproc/"):
if not os.path.exists(outpath):
os.mkdir( outpath, 0755 ); # make a new directory for preproc'd files
class_names = get_class_names(path=inpath) # get the names of the subdirectories
nb_classes = len(class_names)
print("class_names = ",class_names)
for idx, classname in enumerate(class_names): # go through the subdirs
if not os.path.exists(outpath+classname):
os.mkdir( outpath+classname, 0755 ); # make a new subdirectory for preproc class
class_files = os.listdir(inpath+classname)
n_files = len(class_files)
n_load = n_files
print(' class name = {:14s} - {:3d}'.format(classname,idx),
", ",n_files," files in this class",sep="")
printevery = 20
for idx2, infilename in enumerate(class_files):
audio_path = inpath + classname + '/' + infilename
if (0 == idx2 % printevery):
print('\r Loading class: {:14s} ({:2d} of {:2d} classes)'.format(classname,idx+1,nb_classes),
", file ",idx2+1," of ",n_load,": ",audio_path,sep="")
#start = timer()
aud, sr = librosa.load(audio_path, sr=None)
melgram = librosa.logamplitude(librosa.feature.melspectrogram(aud, sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
outfile = outpath + classname + '/' + infilename+'.npy'
np.save(outfile,melgram)
示例10: make_melgram
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def make_melgram(mono_sig, sr, n_mels=128): # @keunwoochoi upgraded form 96 to 128 mel bins in kapre
#melgram = librosa.logamplitude(librosa.feature.melspectrogram(mono_sig, # latest librosa deprecated logamplitude in favor of amplitude_to_db
# sr=sr, n_mels=96),ref_power=1.0)[np.newaxis,np.newaxis,:,:]
melgram = librosa.amplitude_to_db(librosa.feature.melspectrogram(mono_sig,
sr=sr, n_mels=n_mels))[np.newaxis,:,:,np.newaxis] # last newaxis is b/c tensorflow wants 'channels_last' order
'''
# librosa docs also include a perceptual CQT example:
CQT = librosa.cqt(mono_sig, sr=sr, fmin=librosa.note_to_hz('A1'))
freqs = librosa.cqt_frequencies(CQT.shape[0], fmin=librosa.note_to_hz('A1'))
perceptual_CQT = librosa.perceptual_weighting(CQT**2, freqs, ref=np.max)
melgram = perceptual_CQT[np.newaxis,np.newaxis,:,:]
'''
return melgram
示例11: prepossessingAudio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def prepossessingAudio(audioPath, ppFilePath):
print 'Prepossessing ' + audioPath
featuresArray = []
for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
featuresArray.append(mfcc)
# featuresArray.append(S)
if len(featuresArray) == 599:
break
print 'storing pp file: ' + ppFilePath
f = open(ppFilePath, 'w')
f.write(pickle.dumps(featuresArray))
f.close()
示例12: prepossessingAudio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def prepossessingAudio(audioPath, ppFilePath):
print 'Prepossessing ' + audioPath
featuresArray = []
for i in range(0, SOUND_SAMPLE_LENGTH, HAMMING_STRIDE):
if i + HAMMING_SIZE <= SOUND_SAMPLE_LENGTH - 1:
y, sr = librosa.load(audioPath, offset=i / 1000.0, duration=HAMMING_SIZE / 1000.0)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
mfcc = librosa.feature.mfcc(S=log_S, sr=sr, n_mfcc=13)
# featuresArray.append(mfcc)
featuresArray.append(S)
if len(featuresArray) == 599:
break
print 'storing pp file: ' + ppFilePath
f = open(ppFilePath, 'w')
f.write(pickle.dumps(featuresArray))
f.close()
示例13: extract_features
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def extract_features(basedir,extension='.au'):
features=[]
labels=[]
# iterate over all files in all subdirectories of the base directory
for root, dirs, files in os.walk(basedir):
files = glob.glob(os.path.join(root,'*'+extension))
# apply function to all files
for f in files :
genre = f.split('/')[4].split('.')[0]
if (genre == 'hiphop' or genre == 'rock' or genre == 'pop' or genre == 'country'):
print genre
# Extract the mel-spectrogram
y, sr = librosa.load(f)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
mel_spec = librosa.feature.melspectrogram(y, sr=sr,n_mels=128,hop_length=1024,n_fft=2048)
# Convert to log scale (dB). We'll use the peak power as reference.
log_mel_spec = librosa.logamplitude(mel_spec, ref_power=np.max)
#make dimensions of the array even 128x1292
log_mel_spec = np.resize(log_mel_spec,(128,644))
print log_mel_spec.shape
#store into feature array
features.append(log_mel_spec.flatten())
# print len(np.array(log_mel_spec.T.flatten()))
# Extract label
label = genreDict.get(genre)
labels.append(label)
else:
pass
features = np.asarray(features).reshape(len(features),82432)
print features.shape
print len(labels)
return (features, one_hot_encode(labels))
示例14: extract_one_file
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def extract_one_file(videofile, audiofile):
print (" --- " + videofile)
### return mfcc, fbank
# get video FPS
nFrames, fps = get_fps(videofile)
# load audio
data, sr = librosa.load(audiofile, sr=44100) # data is np.float32
# number of audio samples per video frame
nSamPerFrame = int(math.floor(float(sr) / fps))
# number of samples per 0.025s
n25sSam = int(math.ceil(float(sr) * 0.025))
# number of sample per step
nSamPerStep = 512 #int(math.floor(float(sr) * 0.01))
# number of steps per frame
nStepsPerFrame = 3 #int(math.floor(float(nSamPerFrame) / float(nSamPerStep)))
# real frame size
nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + n25sSam
# initial position in the sound stream
# initPos negative means we need zero padding at the front.
curPos = nSamPerFrame - nFrameSize
mfccs = []
melspecs = []
chromas = []
for f in range(0,nFrames):
# extract features
frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame)
curPos = nextPos
S = librosa.feature.melspectrogram(frameData, sr, n_mels=128, hop_length=nSamPerStep)
# 1st is log mel spectrogram
log_S = librosa.logamplitude(S, ref_power=np.max)
# 2nd is MFCC and its deltas
mfcc = librosa.feature.mfcc(y=frameData, sr=sr, hop_length=nSamPerStep, n_mfcc=13)
delta_mfcc = librosa.feature.delta(mfcc)
delta2_mfcc = librosa.feature.delta(delta_mfcc)
# 3rd is chroma
chroma = librosa.feature.chroma_cqt(frameData, sr, hop_length=nSamPerStep)
full_mfcc = np.concatenate([mfcc[:,0:3].flatten(), delta_mfcc[:,0:3].flatten(), delta2_mfcc[:,0:3].flatten()])
mfccs.append(full_mfcc.tolist())
melspecs.append(log_S[:,0:3].flatten().tolist())
chromas.append(chroma[:,0:3].flatten().tolist())
return (mfccs, melspecs, chromas)
示例15: preprocess_input
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import logamplitude [as 别名]
def preprocess_input(audio_path, dim_ordering='default'):
'''Reads an audio file and outputs a Mel-spectrogram.
'''
if dim_ordering == 'default':
dim_ordering = K.image_dim_ordering()
assert dim_ordering in {'tf', 'th'}
if librosa_exists():
import librosa
else:
raise RuntimeError('Librosa is required to process audio files.\n' +
'Install it via `pip install librosa` \nor visit ' +
'http://librosa.github.io/librosa/ for details.')
# mel-spectrogram parameters
SR = 12000
N_FFT = 512
N_MELS = 96
HOP_LEN = 256
DURA = 29.12
src, sr = librosa.load(audio_path, sr=SR)
n_sample = src.shape[0]
n_sample_wanted = int(DURA * SR)
# trim the signal at the center
if n_sample < n_sample_wanted: # if too short
src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,))))
elif n_sample > n_sample_wanted: # if too long
src = src[(n_sample - n_sample_wanted) / 2:
(n_sample + n_sample_wanted) / 2]
logam = librosa.logamplitude
melgram = librosa.feature.melspectrogram
x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MELS) ** 2,
ref_power=1.0)
if dim_ordering == 'th':
x = np.expand_dims(x, axis=0)
elif dim_ordering == 'tf':
x = np.expand_dims(x, axis=3)
return x