当前位置: 首页>>代码示例>>Python>>正文


Python librosa.magphase方法代码示例

本文整理汇总了Python中librosa.magphase方法的典型用法代码示例。如果您正苦于以下问题:Python librosa.magphase方法的具体用法?Python librosa.magphase怎么用?Python librosa.magphase使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在librosa的用法示例。


在下文中一共展示了librosa.magphase方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: griffin_lim

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(mag, phase_angle, n_fft, hop, num_iters):
  """Iterative algorithm for phase retrieval from a magnitude spectrogram.

  Args:
    mag: Magnitude spectrogram.
    phase_angle: Initial condition for phase.
    n_fft: Size of the FFT.
    hop: Stride of FFT. Defaults to n_fft/2.
    num_iters: Griffin-Lim iterations to perform.

  Returns:
    audio: 1-D array of float32 sound samples.
  """
  fft_config = dict(n_fft=n_fft, win_length=n_fft, hop_length=hop, center=True)
  ifft_config = dict(win_length=n_fft, hop_length=hop, center=True)
  complex_specgram = inv_magphase(mag, phase_angle)
  for i in range(num_iters):
    audio = librosa.istft(complex_specgram, **ifft_config)
    if i != num_iters - 1:
      complex_specgram = librosa.stft(audio, **fft_config)
      _, phase = librosa.magphase(complex_specgram)
      phase_angle = np.angle(phase)
      complex_specgram = inv_magphase(mag, phase_angle)
  return audio 
开发者ID:magenta,项目名称:magenta,代码行数:26,代码来源:utils.py

示例2: __getitem__

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def __getitem__(self, idx):
        clean_y, _ = librosa.load(self.clean_f_paths[idx], sr=16000)
        snr = random.choice(self.snr_list)

        noise_data = random.choice(self.all_noise_data)
        noise_name = noise_data["name"]
        noise_y = noise_data["y"]

        name = f"{str(idx).zfill(5)}_{noise_name}_{snr}"
        clean_y, noise_y, noisy_y = synthesis_noisy_y(clean_y, noise_y, snr)

        if self.mode == "train":
            clean_mag, _ = librosa.magphase(librosa.stft(clean_y, n_fft=320, hop_length=160, win_length=320))
            noise_mag, _ = librosa.magphase(librosa.stft(noise_y, n_fft=320, hop_length=160, win_length=320))
            noisy_mag, _ = librosa.magphase(librosa.stft(noisy_y, n_fft=320, hop_length=160, win_length=320))
            mask = np.sqrt(clean_mag ** 2 / (clean_mag + noise_mag) ** 2)
            n_frames = clean_mag.shape[-1]
            return noisy_mag, clean_mag, mask, n_frames
        elif self.mode == "validation":
            return noisy_y, clean_y, name
        else:
            return noisy_y, name 
开发者ID:haoxiangsnr,项目名称:IRM-based-Speech-Enhancement-using-LSTM,代码行数:24,代码来源:irm_dataset.py

示例3: split_vocal

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def split_vocal(self, y):
        S_full, phase = librosa.magphase(librosa.stft(y))

        # To avoid being biased by local continuity, we constrain similar frames to be
        # separated by at least 1.2 seconds.
        S_filter = librosa.decompose.nn_filter(S_full, aggregate=np.median, metric='cosine',
                                               width=int(librosa.time_to_frames(self._constrained, sr=self._sr)))

        S_filter = np.minimum(S_full, S_filter)

        margin_v = 10
        power = 2

        mask_v = librosa.util.softmask(S_full - S_filter,
                                       margin_v * S_filter,
                                       power=power)

        S_foreground = mask_v * S_full

        foreground = griffinlim(S_foreground)

        return foreground 
开发者ID:RayanWang,项目名称:Speech_emotion_recognition_BLSTM,代码行数:24,代码来源:audio.py

示例4: griffin_lim

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(magnitudes, n_iters=50, n_fft=1024):
  """
  Griffin-Lim algorithm to convert magnitude spectrograms to audio signals
  """

  phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
  complex_spec = magnitudes * phase
  signal = librosa.istft(complex_spec)
  if not np.isfinite(signal).all():
    print("WARNING: audio was not finite, skipping audio saving")
    return np.array([0])

  for _ in range(n_iters):
    _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft))
    complex_spec = magnitudes * phase
    signal = librosa.istft(complex_spec)
  return signal 
开发者ID:NVIDIA,项目名称:OpenSeq2Seq,代码行数:19,代码来源:text2speech.py

示例5: griffin_lim

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def griffin_lim(magnitudes, n_iters=50, n_fft=1024):
    """
    Griffin-Lim algorithm to convert magnitude spectrograms to audio signals
    """
    phase = np.exp(2j * np.pi * np.random.rand(*magnitudes.shape))
    complex_spec = magnitudes * phase
    signal = librosa.istft(complex_spec)
    if not np.isfinite(signal).all():
        logging.warning("audio was not finite, skipping audio saving")
        return np.array([0])

    for _ in range(n_iters):
        _, phase = librosa.magphase(librosa.stft(signal, n_fft=n_fft))
        complex_spec = magnitudes * phase
        signal = librosa.istft(complex_spec)
    return signal 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:18,代码来源:tts_infer.py

示例6: parse_audio

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path, self.sample_rate)
        else:
            y = load_audio(audio_path)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect 
开发者ID:joseph-zhong,项目名称:LipReading,代码行数:28,代码来源:data_loader.py

示例7: extract_features

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def extract_features(audio_path, sample_rate, truncate, window_size,
                         window_stride, window, normalize_audio):
        global torchaudio, librosa, np
        import torchaudio
        import librosa
        import numpy as np

        sound, sample_rate_ = torchaudio.load(audio_path)
        if truncate and truncate > 0:
            if sound.size(0) > truncate:
                sound = sound[:truncate]

        assert sample_rate_ == sample_rate, \
            'Sample rate of %s != -sample_rate (%d vs %d)' \
            % (audio_path, sample_rate_, sample_rate)

        sound = sound.numpy()
        if len(sound.shape) > 1:
            if sound.shape[1] == 1:
                sound = sound.squeeze()
            else:
                sound = sound.mean(axis=1)  # average multiple channels

        n_fft = int(sample_rate * window_size)
        win_length = n_fft
        hop_length = int(sample_rate * window_stride)
        # STFT
        d = librosa.stft(sound, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=window)
        spect, _ = librosa.magphase(d)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if normalize_audio:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)
        return spect 
开发者ID:lizekang,项目名称:ITDD,代码行数:40,代码来源:audio_dataset.py

示例8: compute_mfcc_features

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def compute_mfcc_features(y,sr):
    mfcc_feat = librosa.feature.mfcc(y,sr,n_mfcc=12,n_mels=12,hop_length=int(sr/100), n_fft=int(sr/40)).T
    S, phase = librosa.magphase(librosa.stft(y,hop_length=int(sr/100)))
    rms = librosa.feature.rms(S=S).T
    return np.hstack([mfcc_feat,rms]) 
开发者ID:jrgillick,项目名称:laughter-detection,代码行数:7,代码来源:compute_features.py

示例9: parse_audio

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path, self.sample_rate)
        else:
            y = load_audio(audio_path)

        if self.noiseInjector:
            logging.info("inject noise")
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)

        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)

        # Short-time Fourier transform (STFT)
        D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(D)

        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)

        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect 
开发者ID:gentaiscool,项目名称:end2end-asr-pytorch,代码行数:34,代码来源:data_loader.py

示例10: transform_audio

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def transform_audio(self, y):
        '''Compute the STFT magnitude and phase.

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT magnitude

            data['phase'] : np.ndarray, shape=(n_frames, 1 + n_fft//2)
                STFT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        D = stft(y, hop_length=self.hop_length,
                 n_fft=self.n_fft)

        D = fix_length(D, n_frames)

        mag, phase = magphase(D)
        if self.log:
            mag = amplitude_to_db(mag, ref=np.max)

        return {'mag': to_dtype(mag.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase.T)[self.idx], self.dtype)} 
开发者ID:bmcfee,项目名称:pumpp,代码行数:32,代码来源:fft.py

示例11: transform_audio

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def transform_audio(self, y):
        '''Compute the CQT

        Parameters
        ----------
        y : np.ndarray
            The audio buffer

        Returns
        -------
        data : dict
            data['mag'] : np.ndarray, shape = (n_frames, n_bins)
                The CQT magnitude

            data['phase']: np.ndarray, shape = mag.shape
                The CQT phase
        '''
        n_frames = self.n_frames(get_duration(y=y, sr=self.sr))

        C = cqt(y=y, sr=self.sr, hop_length=self.hop_length,
                fmin=self.fmin,
                n_bins=(self.n_octaves * self.over_sample * 12),
                bins_per_octave=(self.over_sample * 12))

        C = fix_length(C, n_frames)

        cqtm, phase = magphase(C)
        if self.log:
            cqtm = amplitude_to_db(cqtm, ref=np.max)

        return {'mag': to_dtype(cqtm.T[self.idx], self.dtype),
                'phase': to_dtype(np.angle(phase).T[self.idx], self.dtype)} 
开发者ID:bmcfee,项目名称:pumpp,代码行数:34,代码来源:cqt.py

示例12: phase

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def phase(y):
    D = librosa.stft(y, n_fft=512, hop_length=256, window='hamming')
    _, phase = librosa.magphase(D)
    return phase 
开发者ID:haoxiangsnr,项目名称:SNR-Based-Progressive-Learning-of-Deep-Neural-Network-for-Speech-Enhancement,代码行数:6,代码来源:utils.py

示例13: process_segment

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def process_segment(self, audio_segment):
        self.augmentor.perturb(audio_segment)

        n_fft = int(self.cfg['sample_rate'] * self.cfg['window_size'])
        hop_length = int(self.cfg['sample_rate'] * self.cfg['window_stride'])
        dfft = librosa.stft(audio_segment.samples, n_fft=n_fft, hop_length=hop_length, win_length=n_fft, window=self.window)
        spect, _ = librosa.magphase(dfft)
        spect = torch.FloatTensor(spect).log1p()
        if self.cfg['normalize']:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)
        return spect 
开发者ID:ryanleary,项目名称:patter,代码行数:16,代码来源:features.py

示例14: parse_audio

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def parse_audio(self, audio_path, frame_start=0, frame_end=-1):
        if self.augment:
            y = load_randomly_augmented_audio(audio_path, self.sample_rate, frame_start=frame_start, frame_end=frame_end)
        else:
            y = load_audio(audio_path, frame_start=frame_start, frame_end=frame_end)
        if self.noiseInjector:
            add_noise = np.random.binomial(1, self.noise_prob)
            if add_noise:
                y = self.noiseInjector.inject_noise(y)
        n_fft = int(self.sample_rate * self.window_size)
        win_length = n_fft
        hop_length = int(self.sample_rate * self.window_stride)
        # STFT
        d = librosa.stft(y, n_fft=n_fft, hop_length=hop_length,
                         win_length=win_length, window=self.window)
        spect, phase = librosa.magphase(d)
        # S = log(S+1)
        spect = np.log1p(spect)
        spect = torch.FloatTensor(spect)
        if self.normalize:
            mean = spect.mean()
            std = spect.std()
            spect.add_(-mean)
            spect.div_(std)

        return spect 
开发者ID:mlperf,项目名称:inference,代码行数:28,代码来源:data_loader.py

示例15: extract_one_file

# 需要导入模块: import librosa [as 别名]
# 或者: from librosa import magphase [as 别名]
def extract_one_file(videofile, audiofile):
    print (" --- " + audiofile)
    # get video FPS
    nFrames, fps = get_fps(videofile)
    # load audio
    data, sr = librosa.load(audiofile, sr=44100) # data is np.float32
    # number of audio samples per video frame
    nSamPerFrame = int(math.floor(float(sr) / fps))
    # number of samples per 20ms
    #nSamPerFFTWindow = NFFT #int(math.ceil(float(sr) * 0.02))
    # number of samples per step 8ms
    #nSamPerStep = FREQ_DIM #int(math.floor(float(sr) * 0.008))
    # number of steps per frame
    #nStepsPerFrame = TIME_DIM #int(math.floor(float(nSamPerFrame) / float(nSamPerStep)))
    # real frame size
    #nFrameSize = (nStepsPerFrame - 1) * nSamPerStep + nSamPerFFTWindow
    # initial position in the sound stream
    # initPos negative means we need zero padding at the front.
    curPos = nSamPerFrame - nFrameSize
    dbspecs = []
    for f in range(0,nFrames):
        frameData, nextPos = extract_one_frame_data(data, curPos, nFrameSize, nSamPerFrame)
        curPos = nextPos
        # spectrogram transform
        FD = librosa.core.stft(y=frameData, n_fft=NFFT, hop_length=FREQ_DIM)
        FD, phase = librosa.magphase(FD)
        DB = librosa.core.amplitude_to_db(FD, ref=np.max)
        # scale dB-spectrogram in [0,1]
        DB = np.divide(np.absolute(DB), 80.0)
        # remove the last row
        newDB = DB[0:-1,:]
        # store
        dbspecs.append(newDB.flatten().tolist())
    return dbspecs 
开发者ID:haixpham,项目名称:end2end_AU_speech,代码行数:36,代码来源:extract_spectrogram.py


注:本文中的librosa.magphase方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。