本文整理匯總了Python中hparams.hparams.silence_threshold方法的典型用法代碼示例。如果您正苦於以下問題:Python hparams.silence_threshold方法的具體用法?Python hparams.silence_threshold怎麽用?Python hparams.silence_threshold使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類hparams.hparams
的用法示例。
在下文中一共展示了hparams.silence_threshold方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: adjust_time_resolution
# 需要導入模塊: from hparams import hparams [as 別名]
# 或者: from hparams.hparams import silence_threshold [as 別名]
def adjust_time_resolution(quantized, mel):
"""Adjust time resolution by repeating features
Args:
quantized (ndarray): (T,)
mel (ndarray): (N, D)
Returns:
tuple: Tuple of (T,) and (T, D)
"""
assert len(quantized.shape) == 1
assert len(mel.shape) == 2
upsample_factor = quantized.size // mel.shape[0]
mel = np.repeat(mel, upsample_factor, axis=0)
n_pad = quantized.size - mel.shape[0]
if n_pad != 0:
assert n_pad > 0
mel = np.pad(mel, [(0, n_pad), (0, 0)], mode="constant", constant_values=0)
# trim
start, end = start_and_end_indices(quantized, hparams.silence_threshold)
return quantized[start:end], mel[start:end, :]
示例2: trim
# 需要導入模塊: from hparams import hparams [as 別名]
# 或者: from hparams.hparams import silence_threshold [as 別名]
def trim(quantized):
start, end = start_and_end_indices(quantized, hparams.silence_threshold)
return quantized[start:end]
示例3: start_and_end_indices
# 需要導入模塊: from hparams import hparams [as 別名]
# 或者: from hparams.hparams import silence_threshold [as 別名]
def start_and_end_indices(quantized, silence_threshold=2):
for start in range(quantized.size):
if abs(quantized[start] - 127) > silence_threshold:
break
for end in range(quantized.size - 1, 1, -1):
if abs(quantized[end] - 127) > silence_threshold:
break
assert abs(quantized[start] - 127) > silence_threshold
assert abs(quantized[end] - 127) > silence_threshold
return start, end
示例4: _process_utterance
# 需要導入模塊: from hparams import hparams [as 別名]
# 或者: from hparams.hparams import silence_threshold [as 別名]
def _process_utterance(out_dir, index, wav_path, text):
# Load the audio to a numpy array:
wav = audio.load_wav(wav_path)
if hparams.rescaling:
wav = wav / np.abs(wav).max() * hparams.rescaling_max
# Mu-law quantize
if is_mulaw_quantize(hparams.input_type):
# [0, quantize_channels)
out = P.mulaw_quantize(wav, hparams.quantize_channels)
# Trim silences
start, end = audio.start_and_end_indices(out, hparams.silence_threshold)
wav = wav[start:end]
out = out[start:end]
constant_values = P.mulaw_quantize(0, hparams.quantize_channels)
out_dtype = np.int16
elif is_mulaw(hparams.input_type):
# [-1, 1]
out = P.mulaw(wav, hparams.quantize_channels)
constant_values = P.mulaw(0.0, hparams.quantize_channels)
out_dtype = np.float32
else:
# [-1, 1]
out = wav
constant_values = 0.0
out_dtype = np.float32
# Compute a mel-scale spectrogram from the trimmed wav:
# (N, D)
mel_spectrogram = audio.melspectrogram(wav).astype(np.float32).T
# lws pads zeros internally before performing stft
# this is needed to adjust time resolution between audio and mel-spectrogram
l, r = audio.lws_pad_lr(wav, hparams.fft_size, audio.get_hop_size())
# zero pad for quantized signal
out = np.pad(out, (l, r), mode="constant", constant_values=constant_values)
N = mel_spectrogram.shape[0]
assert len(out) >= N * audio.get_hop_size()
# time resolution adjustment
# ensure length of raw audio is multiple of hop_size so that we can use
# transposed convolution to upsample
out = out[:N * audio.get_hop_size()]
assert len(out) % audio.get_hop_size() == 0
timesteps = len(out)
# Write the spectrograms to disk:
audio_filename = 'ljspeech-audio-%05d.npy' % index
mel_filename = 'ljspeech-mel-%05d.npy' % index
np.save(os.path.join(out_dir, audio_filename),
out.astype(out_dtype), allow_pickle=False)
np.save(os.path.join(out_dir, mel_filename),
mel_spectrogram.astype(np.float32), allow_pickle=False)
# Return a tuple describing this training example:
return (audio_filename, mel_filename, timesteps, text)