本文整理汇总了Python中librosa.load方法的典型用法代码示例。如果您正苦于以下问题:Python librosa.load方法的具体用法?Python librosa.load怎么用?Python librosa.load使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类librosa
的用法示例。
在下文中一共展示了librosa.load方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: demo_plot
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def demo_plot():
audio = './data/esc10/audio/Dog/1-30226-A.ogg'
y, sr = librosa.load(audio, sr=44100)
y_ps = librosa.effects.pitch_shift(y, sr, n_steps=6) # n_steps控制音调变化尺度
y_ts = librosa.effects.time_stretch(y, rate=1.2) # rate控制时间维度的变换尺度
plt.subplot(311)
plt.plot(y)
plt.title('Original waveform')
plt.axis([0, 200000, -0.4, 0.4])
# plt.axis([88000, 94000, -0.4, 0.4])
plt.subplot(312)
plt.plot(y_ts)
plt.title('Time Stretch transformed waveform')
plt.axis([0, 200000, -0.4, 0.4])
plt.subplot(313)
plt.plot(y_ps)
plt.title('Pitch Shift transformed waveform')
plt.axis([0, 200000, -0.4, 0.4])
# plt.axis([88000, 94000, -0.4, 0.4])
plt.tight_layout()
plt.show()
示例2: read_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def read_audio(file_path):
min_samples = int(config.min_seconds * config.sampling_rate)
try:
y, sr = librosa.load(file_path, sr=config.sampling_rate)
trim_y, trim_idx = librosa.effects.trim(y) # trim, top_db=default(60)
if len(trim_y) < min_samples:
center = (trim_idx[1] - trim_idx[0]) // 2
left_idx = max(0, center - min_samples // 2)
right_idx = min(len(y), center + min_samples // 2)
trim_y = y[left_idx:right_idx]
if len(trim_y) < min_samples:
padding = min_samples - len(trim_y)
offset = padding // 2
trim_y = np.pad(trim_y, (offset, padding - offset), 'constant')
return trim_y
except BaseException as e:
print(f"Exception while reading file {e}")
return np.zeros(min_samples, dtype=np.float32)
示例3: compute_spectrograms
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def compute_spectrograms(filename):
out_rate = 12000
N_FFT = 512
HOP_LEN = 256
frames, rate = librosa.load(filename, sr=out_rate, mono=True)
if len(frames) < out_rate*3:
# if less then 3 second - can't process
raise Exception("Audio duration is too short")
logam = librosa.power_to_db
melgram = librosa.feature.melspectrogram
x = logam(melgram(y=frames, sr=out_rate, hop_length=HOP_LEN,
n_fft=N_FFT, n_mels=N_MEL_BANDS) ** 2,
ref=1.0)
# now going through spectrogram with the stride of the segment duration
for start_idx in range(0, x.shape[1] - SEGMENT_DUR + 1, SEGMENT_DUR):
yield x[:, start_idx:start_idx + SEGMENT_DUR]
示例4: phase_enhance_pred
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def phase_enhance_pred(mix_STFT,pred_file, mode='STFT'):
if mode=='wav':
T_pred, _ = librosa.load(pred_file,sr=16000)
F_pred = fast_stft(T_pred)
if mode =='STFT':
F_pred = pred_file
M = np.sqrt(np.square(F_pred[:,:,0])+np.square(F_pred[:,:,1])) #magnitude
print('shape M:',M.shape)
P = np.arctan(np.divide(mix_STFT[:,:,0],mix_STFT[:,:,1])) #phase
print('shape p:',P.shape)
F_enhance = np.zeros_like(F_pred)
print('shape enhance',F_enhance.shape)
F_enhance[:,:,0] = np.multiply(M,np.cos(P))
F_enhance[:,:,1] = np.multiply(M,np.sin(P))
print('shape enhance', F_enhance.shape)
T_enhance = fast_istft(F_enhance)
return T_enhance
## test code part
示例5: load_generic_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def load_generic_audio(directory, sample_rate):
'''Generator that yields audio waveforms from the directory.'''
files = find_files(directory)
id_reg_exp = re.compile(FILE_PATTERN)
print("files length: {}".format(len(files)))
randomized_files = randomize_files(files)
for filename in randomized_files:
ids = id_reg_exp.findall(filename)
if not ids:
# The file name does not match the pattern containing ids, so
# there is no id.
category_id = None
else:
# The file name matches the pattern for containing ids.
category_id = int(ids[0][0])
audio, _ = librosa.load(filename, sr=sample_rate, mono=True)
audio = audio.reshape(-1, 1)
yield audio, filename, category_id
示例6: extract_features
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def extract_features(files=None):
if files is None:
files = transcode_audio()
audio = []
labels = []
for (wav, srt) in files:
print("Processing audio:", wav)
y, sr = librosa.load(wav, sr=FREQ)
mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=int(HOP_LEN), n_mfcc=int(N_MFCC))
label = extract_labels(srt, len(mfcc[0]))
audio.append(mfcc)
labels.append(label)
return audio, labels
示例7: __extract_melspec
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def __extract_melspec(audio_fpath, audio_fname):
"""
Using librosa to calculate log mel spectrogram values
and scipy.misc to draw and store them (in grayscale).
:param audio_fpath:
:param audio_fname:
:return:
"""
# Load sound file
y, sr = librosa.load(audio_fpath, sr=12000)
# Let's make and display a mel-scaled power (energy-squared) spectrogram
S = librosa.feature.melspectrogram(y, sr=sr, hop_length=256, n_mels=96)
# Convert to log scale (dB). We'll use the peak power as reference.
log_S = librosa.logamplitude(S, ref_power=np.max)
spectr_fname = audio_fname + '.png'
subdir_path = __get_subdir(spectr_fname)
# Draw log values matrix in grayscale
scipy.misc.toimage(log_S).save(subdir_path.format(spectr_fname))
示例8: triangle
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def triangle(t, randfunc=np.random.rand, t0_fac=None): # ramp up then down
height = (0.4 * randfunc() + 0.4) * np.random.choice([-1,1])
width = randfunc()/4 * t[-1] # half-width actually
t0 = 2*width + 0.4 * randfunc()*t[-1] if t0_fac is None else t0_fac*t[-1]
x = height * (1 - np.abs(t-t0)/width)
x[np.where(t < (t0-width))] = 0
x[np.where(t > (t0+width))] = 0
amp_n = (0.1*randfunc()+0.02) # add noise
return x + amp_n*pinknoise(t.shape[0])
# Prelude to read_audio_file
# Tried lots of ways of doing this.. most are slow.
#signal, rate = librosa.load(filename, sr=sr, mono=True, res_type='kaiser_fast') # Librosa's reader is incredibly slow. do not use
#signal, rate = torchaudio.load(filename)#, normalization=True) # Torchaudio's reader is pretty fast but normalization is a problem
#signal = signal.numpy().flatten()
#reader = io_methods.AudioIO # Stylios' file reader. Haven't gotten it working yet
#signal, rate = reader.audioRead(filename, mono=True)
#signal, rate = sf.read('existing_file.wav')
示例9: __call__
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def __call__(self, pkg, cached_file=None):
pkg = format_package(pkg)
wav = pkg['chunk']
if torch.is_tensor(wav):
wav = wav.data.numpy().astype(np.float32)
max_frames = wav.shape[0] // self.hop
if cached_file is not None:
# load pre-computed data
X = torch.load(cached_file)
beg_i = pkg['chunk_beg_i'] // self.hop
end_i = pkg['chunk_end_i'] // self.hop
X = X[:, beg_i:end_i]
pkg[self.name] = X
else:
wav = self.frame_signal(wav, self.window)
#print('wav shape: ', wav.shape)
lpc = pysptk.sptk.lpc(wav, order=self.order)
#print('lpc: ', lpc.shape)
pkg[self.name] = torch.FloatTensor(lpc)
# Overwrite resolution to hop length
pkg['dec_resolution'] = self.hop
return pkg
示例10: load_IR
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def load_IR(self, ir_file, ir_fmt):
ir_file = os.path.join(self.data_root, ir_file)
# print('loading ir_file: ', ir_file)
if hasattr(self, 'cache') and ir_file in self.cache:
return self.cache[ir_file]
else:
if ir_fmt == 'mat':
IR = loadmat(ir_file, squeeze_me=True, struct_as_record=False)
IR = IR['risp_imp']
elif ir_fmt == 'imp' or ir_fmt == 'txt':
IR = np.loadtxt(ir_file)
elif ir_fmt == 'npy':
IR = np.load(ir_file)
elif ir_fmt == 'wav':
IR, _ = sf.read(ir_file)
else:
raise TypeError('Unrecognized IR format: ', ir_fmt)
IR = IR[:self.max_reverb_len]
if np.max(IR)>0:
IR = IR / np.abs(np.max(IR))
p_max = np.argmax(np.abs(IR))
if hasattr(self, 'cache'):
self.cache[ir_file] = (IR, p_max)
return IR, p_max
示例11: load_filter
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def load_filter(self, filt_file, filt_fmt):
filt_file = os.path.join(self.data_root, filt_file)
if filt_fmt == 'mat':
filt_coeff = loadmat(filt_file, squeeze_me=True, struct_as_record=False)
filt_coeff = filt_coeff['filt_coeff']
elif filt_fmt == 'imp' or filt_fmt == 'txt':
filt_coeff = np.loadtxt(filt_file)
elif filt_fmt == 'npy':
filt_coeff = np.load(filt_file)
else:
raise TypeError('Unrecognized filter format: ', filt_fmt)
filt_coeff = filt_coeff / np.abs(np.max(filt_coeff))
return filt_coeff
示例12: read_file_pair
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def read_file_pair(filename_pair, mono=True):
"""
given a pair of file names, read in both waveforms and upsample (through
librosa's default interpolation) the downsampled waveform
assumes the file name pair is of the form ("original", "downsampled")
mono selects whether to read in mono or stereo formatted waveforms
returns a pair of numpy arrays representing the original and upsampled
waveform
"""
channel = 1 if mono else 2
true_waveform, true_br = librosa.load(filename_pair[0], sr=None,
mono=mono)
ds_waveform, _ = librosa.load(filename_pair[1], sr=true_br, mono=mono)
# truth, example
return true_waveform.reshape((-1, channel)), \
ds_waveform.reshape((-1, channel))
示例13: test_compute
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def test_compute(self):
test_file_path = resources.sample_wav_file('wav_1.wav')
y, sr = librosa.load(test_file_path, sr=None)
frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T
# EXPECTED
S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
S = librosa.power_to_db(S)
onsets = librosa.onset.onset_strength(S=S, center=False)
exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T
# ACTUAL
tgram_step = pipeline.Tempogram(win_length=11)
tgrams = tgram_step.process_frames(frames, sr, last=True)
assert np.allclose(tgrams, exp_tgram)
示例14: test_compute_online
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def test_compute_online(self):
# Data: 41523 samples, 16 kHz
# yields 40 frames with frame-size 2048 and hop-size 1024
test_file_path = resources.sample_wav_file('wav_1.wav')
y, sr = librosa.load(test_file_path, sr=None)
# EXPECTED
y_pad = np.pad(y, (0, 1024), mode='constant', constant_values=0)
S = np.abs(librosa.stft(y_pad, center=False, n_fft=2048, hop_length=1024)) ** 2
S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
S = librosa.power_to_db(S)
onsets = librosa.onset.onset_strength(S=S, center=False)
exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=4, center=True).T
# ACTUAL
test_file = tracks.FileTrack('idx', test_file_path)
tgram_step = pipeline.Tempogram(win_length=4)
tgram_gen = tgram_step.process_track_online(test_file, 2048, 1024, chunk_size=5)
chunks = list(tgram_gen)
tgrams = np.vstack(chunks)
assert np.allclose(tgrams, exp_tgram)
示例15: test_compute_cleanup_after_one_utterance
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import load [as 别名]
def test_compute_cleanup_after_one_utterance(self):
test_file_path = resources.sample_wav_file('wav_1.wav')
y, sr = librosa.load(test_file_path, sr=None)
frames = librosa.util.frame(y, frame_length=2048, hop_length=1024).T
# EXPECTED
S = np.abs(librosa.stft(y, center=False, n_fft=2048, hop_length=1024)) ** 2
S = librosa.feature.melspectrogram(S=S, n_mels=128, sr=sr)
S = librosa.power_to_db(S)
onsets = librosa.onset.onset_strength(S=S, center=False)
exp_tgram = librosa.feature.tempogram(onset_envelope=onsets, sr=sr, win_length=11, center=True).T
# ACTUAL
tgram_step = pipeline.Tempogram(win_length=11)
# FIRST RUN
tgrams = tgram_step.process_frames(frames, sr, last=True)
assert np.allclose(tgrams, exp_tgram)
# SECOND RUN
tgrams = tgram_step.process_frames(frames, sr, last=True)
assert np.allclose(tgrams, exp_tgram)