本文整理汇总了Python中pyworld.cheaptrick方法的典型用法代码示例。如果您正苦于以下问题:Python pyworld.cheaptrick方法的具体用法?Python pyworld.cheaptrick怎么用?Python pyworld.cheaptrick使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyworld
的用法示例。
在下文中一共展示了pyworld.cheaptrick方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: world_decompose
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def world_decompose(wav, fs, frame_period = 5.0):
# Decompose speech signal into f0, spectral envelope and aperiodicity using WORLD
wav = wav.astype(np.float64)
f0, timeaxis = pyworld.harvest(wav, fs, frame_period = frame_period, f0_floor = 71.0, f0_ceil = 800.0)
sp = pyworld.cheaptrick(wav, f0, timeaxis, fs)
ap = pyworld.d4c(wav, f0, timeaxis, fs)
return f0, timeaxis, sp, ap
示例2: analyze
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def analyze(self, x):
"""Analyze acoustic features based on WORLD
analyze F0, spectral envelope, aperiodicity
Paramters
---------
x : array, shape (`T`)
monoral speech signal in time domain
Returns
---------
f0 : array, shape (`T`,)
F0 sequence
spc : array, shape (`T`, `fftl / 2 + 1`)
Spectral envelope sequence
ap: array, shape (`T`, `fftl / 2 + 1`)
aperiodicity sequence
"""
f0, time_axis = pyworld.harvest(x, self.fs, f0_floor=self.minf0,
f0_ceil=self.maxf0, frame_period=self.shiftms)
spc = pyworld.cheaptrick(x, f0, time_axis, self.fs,
fft_size=self.fftl)
ap = pyworld.d4c(x, f0, time_axis, self.fs, fft_size=self.fftl)
assert spc.shape == ap.shape
return f0, spc, ap
示例3: cal_mcep
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def cal_mcep(wav_ori, fs=SAMPLE_RATE, ispad=False, frame_period=0.005, dim=FEATURE_DIM, fft_size=FFTSIZE):
'''cal mcep given wav singnal
the frame_period used only for pad_wav_to_get_fixed_frames
'''
if ispad:
wav, pad_length = pad_wav_to_get_fixed_frames(
wav_ori, frames=FRAMES, frame_period=frame_period, sr=fs)
else:
wav = wav_ori
# Harvest F0 extraction algorithm.
f0, timeaxis = pyworld.harvest(wav, fs)
# CheapTrick harmonic spectral envelope estimation algorithm.
sp = pyworld.cheaptrick(wav, f0, timeaxis, fs, fft_size=fft_size)
# D4C aperiodicity estimation algorithm.
ap = pyworld.d4c(wav, f0, timeaxis, fs, fft_size=fft_size)
# feature reduction nxdim
coded_sp = pyworld.code_spectral_envelope(sp, fs, dim)
# log
coded_sp = coded_sp.T # dim x n
res = {
'f0': f0, # n
'ap': ap, # n*fftsize//2+1
'sp': sp, # n*fftsize//2+1
'coded_sp': coded_sp, # dim * n
}
return res
示例4: __call__
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def __call__(self, data: Wave, test=None):
x = data.wave.astype(numpy.float64)
fs = data.sampling_rate
if self._f0_estimating_method == 'dio':
_f0, t = pyworld.dio(
x,
fs,
frame_period=self._frame_period,
f0_floor=self._f0_floor,
f0_ceil=self._f0_ceil,
)
else:
from world4py.np import apis
_f0, t = apis.harvest(
x,
fs,
frame_period=self._frame_period,
f0_floor=self._f0_floor,
f0_ceil=self._f0_ceil,
)
f0 = pyworld.stonemask(x, _f0, t, fs)
spectrogram = pyworld.cheaptrick(x, f0, t, fs)
aperiodicity = pyworld.d4c(x, f0, t, fs)
mfcc = pysptk.sp2mc(spectrogram, order=self._order, alpha=self._alpha)
voiced = ~(f0 == 0) # type: numpy.ndarray
feature = AcousticFeature(
f0=f0[:, None].astype(self._dtype),
spectrogram=spectrogram.astype(self._dtype),
aperiodicity=aperiodicity.astype(self._dtype),
mfcc=mfcc.astype(self._dtype),
voiced=voiced[:, None],
)
feature.validate()
return feature
示例5: world_decompose
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def world_decompose(wav, fs, frame_period=5.0):
# Decompose speech signal into f0, spectral envelope and aperiodicity using WORLD
wav = wav.astype(np.float64)
f0, timeaxis = pyworld.harvest(wav, fs, frame_period=frame_period, f0_floor=71.0, f0_ceil=800.0)
sp = pyworld.cheaptrick(wav, f0, timeaxis, fs)
ap = pyworld.d4c(wav, f0, timeaxis, fs)
return f0, timeaxis, sp, ap
示例6: extract
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def extract(cls, wave: Wave, frame_period, f0_floor, f0_ceil, fft_length, order, alpha, dtype):
x = wave.wave.astype(numpy.float64)
fs = wave.sampling_rate
f0, t = cls.extract_f0(x=x, fs=fs, frame_period=frame_period, f0_floor=f0_floor, f0_ceil=f0_ceil)
sp = pyworld.cheaptrick(x, f0, t, fs, fft_size=fft_length)
ap = pyworld.d4c(x, f0, t, fs, fft_size=fft_length)
mc = pysptk.sp2mc(sp, order=order, alpha=alpha)
coded_ap = pyworld.code_aperiodicity(ap, fs)
voiced: numpy.ndarray = ~(f0 == 0)
if len(x) % fft_length > 0:
f0 = f0[:-1]
t = t[:-1]
sp = sp[:-1]
ap = ap[:-1]
mc = mc[:-1]
coded_ap = coded_ap[:-1]
voiced = voiced[:-1]
feature = AcousticFeature(
f0=f0[:, None],
sp=sp,
ap=ap,
coded_ap=coded_ap,
mc=mc,
voiced=voiced[:, None],
)
feature = feature.astype_only_float(dtype)
return feature
示例7: main
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def main(args):
if os.path.isdir('test'):
rmtree('test')
os.mkdir('test')
x, fs = sf.read('utterance/vaiueo2d.wav')
# x, fs = librosa.load('utterance/vaiueo2d.wav', dtype=np.float64)
# 1. A convient way
f0, sp, ap = pw.wav2world(x, fs) # use default options
y = pw.synthesize(f0, sp, ap, fs, pw.default_frame_period)
# 2. Step by step
# 2-1 Without F0 refinement
_f0, t = pw.dio(x, fs, f0_floor=50.0, f0_ceil=600.0,
channels_in_octave=2,
frame_period=args.frame_period,
speed=args.speed)
_sp = pw.cheaptrick(x, _f0, t, fs)
_ap = pw.d4c(x, _f0, t, fs)
_y = pw.synthesize(_f0, _sp, _ap, fs, args.frame_period)
# librosa.output.write_wav('test/y_without_f0_refinement.wav', _y, fs)
sf.write('test/y_without_f0_refinement.wav', _y, fs)
# 2-2 DIO with F0 refinement (using Stonemask)
f0 = pw.stonemask(x, _f0, t, fs)
sp = pw.cheaptrick(x, f0, t, fs)
ap = pw.d4c(x, f0, t, fs)
y = pw.synthesize(f0, sp, ap, fs, args.frame_period)
# librosa.output.write_wav('test/y_with_f0_refinement.wav', y, fs)
sf.write('test/y_with_f0_refinement.wav', y, fs)
# 2-3 Harvest with F0 refinement (using Stonemask)
_f0_h, t_h = pw.harvest(x, fs)
f0_h = pw.stonemask(x, _f0_h, t_h, fs)
sp_h = pw.cheaptrick(x, f0_h, t_h, fs)
ap_h = pw.d4c(x, f0_h, t_h, fs)
y_h = pw.synthesize(f0_h, sp_h, ap_h, fs, pw.default_frame_period)
# librosa.output.write_wav('test/y_harvest_with_f0_refinement.wav', y_h, fs)
sf.write('test/y_harvest_with_f0_refinement.wav', y_h, fs)
# Comparison
savefig('test/wavform.png', [x, _y, y])
savefig('test/sp.png', [_sp, sp])
savefig('test/ap.png', [_ap, ap], log=False)
savefig('test/f0.png', [_f0, f0])
print('Please check "test" directory for output files')
示例8: analysis_spec
# 需要导入模块: import pyworld [as 别名]
# 或者: from pyworld import cheaptrick [as 别名]
def analysis_spec(wav, fs, f0s,
shift=0.005, # Usually 5ms
dftlen=4096, # You can adapt this one according to your pipeline
verbose=1):
'''
Estimate the amplitude spectral envelope.
'''
if sp.pystraight.isanalysiseavailable(): # pragma: no cover
# Cannot be tested since STRAIGHT
# is not openly available.
warnings.warn('''\n\nWARNING: straight_mcep is available,
STRAIGHT vocoder will thus be used instead of WORLD.
Note that PML-related publications present results using STRAIGHT vocoder.
''', RuntimeWarning)
# Use STRAIGHT's envelope if available (as in PML's publications)
SPEC = sigproc.pystraight.analysis_spec(wav, fs, f0s, shift, dftlen, keeplen=True)
elif sigproc.interfaces.worldvocoder_is_available():
# Then try WORLD vocoder
import pyworld
wav = np.ascontiguousarray(wav)
#_f0, ts = pyworld.dio(x, fs, frame_period=shift*1000) # raw pitch extractor # Use REAPER instead
pwts = np.ascontiguousarray(f0s[:,0])
pwf0 = pyworld.stonemask(wav, np.ascontiguousarray(f0s[:,1]), pwts, fs) # pitch refinement
SPEC = pyworld.cheaptrick(wav, pwf0, pwts, fs, fft_size=dftlen) # extract smoothed spectrogram
SPEC = 10.0*np.sqrt(SPEC) # TODO Best gain correction I could find. Hard to find the good one between PML and WORLD different syntheses
else: # pragma: no cover
# This a safeguard that should never happend since WORLD is embeded in
# pulsemodel.
# Estimate the sinusoidal parameters at regular intervals in order
# to build the amplitude spectral envelope
sinsreg, _ = sp.sinusoidal.estimate_sinusoidal_params(wav, fs, f0s, nbper=3, quadraticfit=True, verbose=verbose-1)
warnings.warn('''\n\nWARNING: Neither straight_mcep nor WORLD's cheaptrick spectral envelope estimators are available.
Thus, a SIMPLISTIC Linear interpolation will be used for the spectral envelope.
Do _NOT_ use this envelope for speech synthesis!
Please use a better one (e.g. STRAIGHT's or WORLD's).
If you use this simplistic envelope, the TTS quality will
be lower than that in the results reported.
''', RuntimeWarning)
SPEC = sp.multi_linear(sinsreg, fs, dftlen)
SPEC = np.exp(SPEC)*np.sqrt(float(dftlen))
return SPEC