本文整理汇总了Python中librosa.magphase方法的典型用法代码示例。如果您正苦于以下问题:Python librosa.magphase方法的具体用法?Python librosa.magphase怎么用?Python librosa.magphase使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类librosa
的用法示例。
在下文中一共展示了librosa.magphase方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: griffin_lim
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
"""Iterative algorithm for phase retrieval from a magnitude spectrogram.
Args:
mag: Magnitude spectrogram.
phase_angle: Initial condition for phase.
n_fft: Size of the FFT.
hop: Stride of FFT. Defaults to n_fft/2.
num_iters: Griffin-Lim iterations to perform.
Returns:
audio: 1-D array of float32 sound samples.
"""
fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
complex_specgram = inv_magphase(mag, phase_angle)
for i in range(num_iters):
audio = librosa.istft(complex_specgram, **ifft_config)
if i != num_iters - 1:
complex_specgram = librosa.stft(audio, **fft_config)
_, phase = librosa.magphase(complex_specgram)
phase_angle = np.angle(phase)
complex_specgram = inv_magphase(mag, phase_angle)
return audio
示例2: __getitem__
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def __getitem__(self, idx):
clean_y, _ = librosa.load(self.clean_f_paths[idx], sr=16000)
snr = random.choice(self.snr_list)
noise_data = random.choice(self.all_noise_data)
noise_name = noise_data["name"]
noise_y = noise_data["y"]
name = f"{str(idx).zfill(5)}_{noise_name}_{snr}"
clean_y, noise_y, noisy_y = synthesis_noisy_y(clean_y, noise_y, snr)
if self.mode == "train":
clean_mag, _ = librosa.magphase(librosa.stft(clean_y, n_fft=320, hop_length=160, win_length=320))
noise_mag, _ = librosa.magphase(librosa.stft(noise_y, n_fft=320, hop_length=160, win_length=320))
noisy_mag, _ = librosa.magphase(librosa.stft(noisy_y, n_fft=320, hop_length=160, win_length=320))
mask = np.sqrt(clean_mag ** 2 / (clean_mag + noise_mag) ** 2)
n_frames = clean_mag.shape[-1]
return noisy_mag, clean_mag, mask, n_frames
elif self.mode == "validation":
return noisy_y, clean_y, name
else:
return noisy_y, name
示例3: split_vocal
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def split_vocal(self, y):
S_full, phase = librosa.magphase(librosa.stft(y))
# To avoid being biased by local continuity, we constrain similar frames to be
# separated by at least 1.2 seconds.
S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine',
width=int(librosa.time_to_frames(self._constrained, sr=self._sr)))
S_filter = np.minimum(S_full, S_filter)
margin_v = 10
power = 2
mask_v = librosa.util.softmask(S_full - S_filter,
margin_v * S_filter,
power=power)
S_foreground = mask_v * S_full
foreground = griffinlim(S_foreground)
return foreground
示例4: griffin_lim
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(magnitudes, n_iters=50, n_fft=1024):
"""
Griffin-Lim algorithm to convert magnitude spectrograms to audio signals
"""
phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
complex_spec = magnitudes * phase
signal = librosa.istft(complex_spec)
if not np.isfinite(signal).all():
print("WARNING: audio was not finite, skipping audio saving")
return np.array([0])
for _ in range(n_iters):
_, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft))
complex_spec = magnitudes * phase
signal = librosa.istft(complex_spec)
return signal
示例5: griffin_lim
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(magnitudes, n_iters=50, n_fft=1024):
"""
Griffin-Lim algorithm to convert magnitude spectrograms to audio signals
"""
phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
complex_spec = magnitudes * phase
signal = librosa.istft(complex_spec)
if not np.isfinite(signal).all():
logging.warning("audio was not finite, skipping audio saving")
return np.array([0])
for _ in range(n_iters):
_, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft))
complex_spec = magnitudes * phase
signal = librosa.istft(complex_spec)
return signal
示例6: parse_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path):
if self.augment:
y = load_randomly_augmented_audio(audio_path, self.sample_rate)
else:
y = load_audio(audio_path)
if self.noiseInjector:
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# STFT
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(D)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
示例7: extract_features
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def extract_features(audio_path, sample_rate, truncate, window_size,
window_stride, window, normalize_audio):
global torchaudio, librosa, np
import torchaudio
import librosa
import numpy as np
sound, sample_rate_ = torchaudio.load(audio_path)
if truncate and truncate > 0:
if sound.size(0) > truncate:
sound = sound[:truncate]
assert sample_rate_ == sample_rate, \
'Sample rate of %s != -sample_rate (%d vs %d)' \
% (audio_path, sample_rate_, sample_rate)
sound = sound.numpy()
if len(sound.shape) > 1:
if sound.shape[1] == 1:
sound = sound.squeeze()
else:
sound = sound.mean(axis=1) # average multiple channels
n_fft = int(sample_rate * window_size)
win_length = n_fft
hop_length = int(sample_rate * window_stride)
# STFT
d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=window)
spect, _ = librosa.magphase(d)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if normalize_audio:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
示例8: compute_mfcc_features
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def compute_mfcc_features(y,sr):
mfcc_feat = librosa.feature.mfcc(y,sr,n_mfcc=12,n_mels=12,hop_length=int(sr/100), n_fft=int(sr/40)).T
S, phase = librosa.magphase(librosa.stft(y,hop_length=int(sr/100)))
rms = librosa.feature.rms(S=S).T
return np.hstack([mfcc_feat,rms])
示例9: parse_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path):
if self.augment:
y = load_randomly_augmented_audio(audio_path, self.sample_rate)
else:
y = load_audio(audio_path)
if self.noiseInjector:
logging.info("inject noise")
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# Short-time Fourier transform (STFT)
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(D)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
示例10: transform_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def transform_audio(self, y):
'''Compute the STFT magnitude and phase.
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT magnitude
data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
STFT phase
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
D = stft(y, hop_length=self.hop_length,
n_fft=self.n_fft)
D = fix_length(D, n_frames)
mag, phase = magphase(D)
if self.log:
mag = amplitude_to_db(mag, ref=np.max)
return {'mag': to_dtype(mag.T[self.idx], self.dtype),
'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)}
示例11: transform_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def transform_audio(self, y):
'''Compute the CQT
Parameters
----------
y : np.ndarray
The audio buffer
Returns
-------
data : dict
data['mag'] : np.ndarray, shape = (n_frames, n_bins)
The CQT magnitude
data['phase']: np.ndarray, shape = mag.shape
The CQT phase
'''
n_frames = self.n_frames(get_duration(y=y, sr=self.sr))
C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
fmin=self.fmin,
n_bins=(self.n_octaves * self.over_sample * 12),
bins_per_octave=(self.over_sample * 12))
C = fix_length(C, n_frames)
cqtm, phase = magphase(C)
if self.log:
cqtm = amplitude_to_db(cqtm, ref=np.max)
return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)}
示例12: phase
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def phase(y):
D = librosa.stft(y, n_fft=512, hop_length=256, window='hamming')
_, phase = librosa.magphase(D)
return phase
开发者ID:haoxiangsnr,项目名称:SNR-Based-Progressive-Learning-of-Deep-Neural-Network-for-Speech-Enhancement,代码行数:6,代码来源:utils.py
示例13: process_segment
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def process_segment(self, audio_segment):
self.augmentor.perturb(audio_segment)
n_fft = int(self.cfg['sample_rate'] * self.cfg['window_size'])
hop_length = int(self.cfg['sample_rate'] * self.cfg['window_stride'])
dfft = librosa.stft(audio_segment.samples, n_fft=n_fft, hop_length=hop_length, win_length=n_fft, window=self.window)
spect, _ = librosa.magphase(dfft)
spect = torch.FloatTensor(spect).log1p()
if self.cfg['normalize']:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
示例14: parse_audio
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path, frame_start=0, frame_end=-1):
if self.augment:
y = load_randomly_augmented_audio(audio_path, self.sample_rate, frame_start=frame_start, frame_end=frame_end)
else:
y = load_audio(audio_path, frame_start=frame_start, frame_end=frame_end)
if self.noiseInjector:
add_noise = np.random.binomial(1, self.noise_prob)
if add_noise:
y = self.noiseInjector.inject_noise(y)
n_fft = int(self.sample_rate * self.window_size)
win_length = n_fft
hop_length = int(self.sample_rate * self.window_stride)
# STFT
d = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
win_length=win_length, window=self.window)
spect, phase = librosa.magphase(d)
# S = log(S+1)
spect = np.log1p(spect)
spect = torch.FloatTensor(spect)
if self.normalize:
mean = spect.mean()
std = spect.std()
spect.add_(-mean)
spect.div_(std)
return spect
示例15: extract_one_file
# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def extract_one_file(videofile, audiofile):
print (" --- " + audiofile)
# get video FPS
nFrames, fps = get_fps(videofile)
# load audio
data, sr = librosa.load(audiofile, sr=44100) # data is np.float32
# number of audio samples per video frame
nSamPerFrame = int(math.floor(float(sr) / fps))
# number of samples per 20ms
#nSamPerFFTWindow = NFFT #int(math.ceil(float(sr) * 0.02))
# number of samples per step 8ms
#nSamPerStep = FREQ_DIM #int(math.floor(float(sr) * 0.008))
# number of steps per frame
#nStepsPerFrame = TIME_DIM #int(math.floor(float(nSamPerFrame) / float(nSamPerStep)))
# real frame size
#nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + nSamPerFFTWindow
# initial position in the sound stream
# initPos negative means we need zero padding at the front.
curPos = nSamPerFrame - nFrameSize
dbspecs = []
for f in range(0,nFrames):
frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame)
curPos = nextPos
# spectrogram transform
FD = librosa.core.stft(y=frameData, n_fft=NFFT, hop_length=FREQ_DIM)
FD, phase = librosa.magphase(FD)
DB = librosa.core.amplitude_to_db(FD, ref=np.max)
# scale dB-spectrogram in [0,1]
DB = np.divide(np.absolute(DB), 80.0)
# remove the last row
newDB = DB[0:-1,:]
# store
dbspecs.append(newDB.flatten().tolist())
return dbspecs