本文整理汇总了Python中kaldi_io.write_mat方法的典型用法代码示例。如果您正苦于以下问题:Python kaldi_io.write_mat方法的具体用法?Python kaldi_io.write_mat怎么用?Python kaldi_io.write_mat使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kaldi_io
的用法示例。
在下文中一共展示了kaldi_io.write_mat方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _run_kaldi
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def _run_kaldi(command, input_type, input_value):
"""Run provided Kaldi command, pass a tensor and get the resulting tensor
Arguments:
input_type: str
'ark' or 'scp'
input_value:
Tensor for 'ark'
string for 'scp' (path to an audio file)
"""
key = 'foo'
process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
if input_type == 'ark':
kaldi_io.write_mat(process.stdin, input_value.cpu().numpy(), key=key)
elif input_type == 'scp':
process.stdin.write(f'{key} {input_value}'.encode('utf8'))
else:
raise NotImplementedError('Unexpected type')
process.stdin.close()
result = dict(kaldi_io.read_mat_ark(process.stdout))['foo']
return torch.from_numpy(result.copy()) # copy supresses some torch warning
示例2: testPipeReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def testPipeReadWrite(self):
"""
Test read/write for pipes.
Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct.
"""
# the following line disables 'stderr' forwarding, comment it for DEBUG,
with open("/dev/null","w") as sys.stderr:
# read,
flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') }
# write to pipe,
with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f:
for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k)
# read it again and compare,
for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'):
self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe")
# read some other formats from pipe,
i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') }
flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
示例3: forwardXXreverse
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def forwardXXreverse(args, cpc_model, device, data_loader, output_ark, output_scp):
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data, data_r] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data_r = data_r.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
data_r = data.contiguous()
hidden1 = cpc_model.init_hidden1(len(data))
hidden2 = cpc_model.init_hidden2(len(data))
output = cpc_model.predict(data, data_r, hidden1, hidden2)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
ko.write_mat(f, mat, key=utt_id[0])
示例4: forward_dct
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def forward_dct(args, cpc_model, device, data_loader, output_ark, output_scp, dct_dim=24):
''' forward with dct '''
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
hidden = cpc_model.init_hidden(len(data))
output, hidden = cpc_model.predict(data, hidden)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
dct_mat = fft.dct(mat, type=2, n=dct_dim) # apply dct
ko.write_mat(f, dct_mat, key=utt_id[0])
示例5: write_kaldi
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def write_kaldi(orig_feat_scp, ark_scp_output, max_len):
"""Write the slice feature matrix to ark_scp_output
"""
with ko.open_or_fd(ark_scp_output,'wb') as f:
for key,mat in ko.read_mat_scp(orig_feat_scp):
tensor = tensor_cnn_utt(mat, max_len)
if tensor.shape[1] != max_len:
print(tensor.shape)
ko.write_mat(f, tensor, key=key)
示例6: testMatrixReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def testMatrixReadWrite(self):
"""
Test read/write for float matrices.
"""
# read,
flt_mat = { k:m for k,m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp,
flt_mat2 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark,
flt_mat3 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark,
# store,
with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark','wb') as f:
for k,m in flt_mat3.items(): kaldi_io.write_mat(f, m, k)
# read and compare,
for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'):
self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")
示例7: forward
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def forward(cpc_model, device, data_loader, output_ark, output_scp):
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
hidden = cpc_model.init_hidden(len(data), use_gpu=False)
output, hidden = cpc_model.predict(data, hidden)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
ko.write_mat(f, mat, key=utt_id[0])
示例8: main
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import write_mat [as 别名]
def main():
if not os.path.isdir(KALDI_ROOT):
print('CHANGE THIS TO YOUR OWN KALDI ROOT: ', KALDI_ROOT)
exit()
if not os.path.isdir(INPUT_PATH):
print('Invalid path for the preprocessed timit dataset: ', INPUT_PATH)
print('Please run \'preprocess_timit.py\' first!')
exit()
if not os.path.isdir(SOURCE_DIR):
print('Invalid path for the source directory: ', SOURCE_DIR)
print('Please read the Wiki page for instructions!')
exit()
if not os.path.isdir(OUTPUT_PATH):
os.mkdir(OUTPUT_PATH)
# read train and test from the preprocessed directory
x, ids = [], []
for s in INPUT_SETS:
with open(os.path.join(INPUT_PATH, s + '_x.pkl'), 'rb') as fp:
x += pickle.load(fp)
with open(os.path.join(INPUT_PATH, s + '_id.pkl'), 'rb') as fp:
ids += pickle.load(fp)
assert len(x)==len(ids)
print('[TIMIT-to-ARK] - ', 'Total Dataset len:', len(x))
# construct all input dict
all_inputs = {}
for idx, i in enumerate(ids):
i = str(i).strip('.wav').split('/')
i = i[-2].upper() + '_' + i[-1].upper()
all_inputs[i] = np.asarray(x[idx])
# filter all input with kaldi generated files
for s in OUTPUT_SETS:
if not os.path.isdir(SOURCE_DIR):
raise NotADirectoryError('Source directory does not exist!', SOURCE_DIR)
if not os.path.isdir(OUTPUT_PATH + '/' + str(s)):
os.mkdir(OUTPUT_PATH + '/' + str(s))
# read train / dev / test from the kaldi generated directory
partial_outputs = {}
with open(os.path.join(SOURCE_DIR, s + '/feats.scp'), 'r') as f:
lines = f.readlines()
for line in lines:
line = line.split(' ')[0]
if line in all_inputs:
partial_outputs[line] = all_inputs[line]
assert len(lines) == len(partial_outputs)
# writiing output with kaldi_io
ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{}/raw_mel_{}.ark,{}/{}/feats.scp'.format(OUTPUT_PATH, str(s), OUTPUT_PATH, str(s))
with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f:
for key, mat in tqdm(partial_outputs.items()):
kaldi_io.write_mat(f, mat, key=key)
print('[TIMIT-to-ARK] - All done, saved at \'' + str(OUTPUT_PATH) + '\' exit.')
开发者ID:andi611,项目名称:Self-Supervised-Speech-Pretraining-and-Representation-Learning,代码行数:62,代码来源:timit2ark.py