本文整理汇总了Python中kaldi_io.open_or_fd方法的典型用法代码示例。如果您正苦于以下问题:Python kaldi_io.open_or_fd方法的具体用法?Python kaldi_io.open_or_fd怎么用?Python kaldi_io.open_or_fd使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类kaldi_io
的用法示例。
在下文中一共展示了kaldi_io.open_or_fd方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testFloatVectorReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def testFloatVectorReadWrite(self):
"""
Test read/write for float vectors.
"""
# read,
flt_vec = { k:v for k,v in kaldi_io.read_vec_flt_scp('tests/data/conf.scp') } # scp,
return
flt_vec2 = { k:v for k,v in kaldi_io.read_vec_flt_ark('tests/data/conf.ark') } # binary-ark,
flt_vec3 = { k:v for k,v in kaldi_io.read_vec_flt_ark('tests/data/conf_ascii.ark') } # ascii-ark,
# store,
with kaldi_io.open_or_fd('tests/data_re-saved/conf.ark','wb') as f:
for k,v in flt_vec.items(): kaldi_io.write_vec_flt(f, v, k)
# read and compare,
for k,v in kaldi_io.read_vec_flt_ark('tests/data_re-saved/conf.ark'):
self.assertTrue(np.array_equal(v,flt_vec[k]), msg="flt. vector same after re-saving")
示例2: testPipeReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def testPipeReadWrite(self):
"""
Test read/write for pipes.
Note: make sure the "os.environ['KALDI_ROOT']" in "kaldi_io/kaldi_io.py" is correct.
"""
# the following line disables 'stderr' forwarding, comment it for DEBUG,
with open("/dev/null","w") as sys.stderr:
# read,
flt_mat4 = { k:m for k,m in kaldi_io.read_mat_ark('ark:copy-feats ark:tests/data/feats.ark ark:- |') }
# write to pipe,
with kaldi_io.open_or_fd('ark:| copy-feats ark:- ark:tests/data_re-saved/mat_pipe.ark','wb') as f:
for k,m in flt_mat4.items(): kaldi_io.write_mat(f, m, k)
# read it again and compare,
for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat_pipe.ark'):
self.assertTrue(np.array_equal(m, flt_mat4[k]),"flt. matrix same after read/write via pipe")
# read some other formats from pipe,
i32_vec3 = { k:v for k,v in kaldi_io.read_vec_int_ark('ark:copy-int-vector ark:tests/data/ali.ark ark:- |') }
flt_vec4 = { k:v for k,v in kaldi_io.read_vec_flt_ark('ark:copy-vector ark:tests/data/conf.ark ark:- |') }
示例3: testWriteReadPosteriors
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def testWriteReadPosteriors(self):
data = [[(0, 0.0), (1, 0.1), (2, 0.2)],
[(0, 0.00), (1, 0.11), (2, 0.22)],
[(0, 0.000), (1, 0.111), (3, 0.333)]]
key = 'posterior_test1'
with kaldi_io.open_or_fd('tests/data_re-saved/posterior_tests.ark','wb') as w:
kaldi_io.write_post(w, data, key=key)
with kaldi_io.open_or_fd('tests/data_re-saved/posterior_tests.ark', 'rb') as r:
posts = [(k, posteriors) for k, posteriors in kaldi_io.read_post_ark(r)]
self.assertEqual(len(posts), 1)
self.assertEqual(posts[0][0], key)
rdata = posts[0][1]
self.assertEqual(len(rdata), len(data))
for a1, a2 in zip(rdata, data):
self.assertEqual(len(a1), len(a2))
for ((idx1, p1), (idx, p)) in zip(a1, a2):
self.assertEqual(idx1, idx)
self.assertAlmostEqual(p1, p)
# if stand-alone, run this...
示例4: forwardXXreverse
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def forwardXXreverse(args, cpc_model, device, data_loader, output_ark, output_scp):
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data, data_r] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data_r = data_r.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
data_r = data.contiguous()
hidden1 = cpc_model.init_hidden1(len(data))
hidden2 = cpc_model.init_hidden2(len(data))
output = cpc_model.predict(data, data_r, hidden1, hidden2)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
ko.write_mat(f, mat, key=utt_id[0])
示例5: forward_dct
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def forward_dct(args, cpc_model, device, data_loader, output_ark, output_scp, dct_dim=24):
''' forward with dct '''
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
hidden = cpc_model.init_hidden(len(data))
output, hidden = cpc_model.predict(data, hidden)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
dct_mat = fft.dct(mat, type=2, n=dct_dim) # apply dct
ko.write_mat(f, dct_mat, key=utt_id[0])
示例6: write_kaldi
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def write_kaldi(orig_feat_scp, ark_scp_output, max_len):
"""Write the slice feature matrix to ark_scp_output
"""
with ko.open_or_fd(ark_scp_output,'wb') as f:
for key,mat in ko.read_mat_scp(orig_feat_scp):
tensor = tensor_cnn_utt(mat, max_len)
if tensor.shape[1] != max_len:
print(tensor.shape)
ko.write_mat(f, tensor, key=key)
示例7: read_all_key
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def read_all_key(file):
"""return all keys/utterances of a kaldi scp file
"""
key_list = []
fd = ko.open_or_fd(file)
try:
for line in fd:
(key,_) = line.decode().split(' ')
key_list.append(key)
finally:
if fd is not file: fd.close()
return key_list
示例8: read_mat_key
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def read_mat_key(file, target_key):
"""read the matrix of the target key/utterance from a kaldi scp file
"""
fd = ko.open_or_fd(file)
try:
for line in fd:
(key,rxfile) = line.decode().split(' ')
if key == target_key:
return ko.read_mat(rxfile)
finally:
if fd is not file: fd.close()
示例9: __execute_command__
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def __execute_command__(self, datain, cmd):
#try:
fin, fout = kio.open_or_fd(cmd, 'wb')
kio.write_wav(fin, datain, self.sr, key='utt')
fin.close() #so its clear nothing new arrives
feats_ark = kio.read_mat_ark(fout)
for _, feats in feats_ark:
fout.close()
return feats.T #there is only one to read
#except Exception as e:
# print (e)
# return None
示例10: testInt32VectorReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def testInt32VectorReadWrite(self):
"""
Test read/write for int32 vectors.
"""
# read,
i32_vec = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali.ark') } # binary,
i32_vec2 = { k:v for k,v in kaldi_io.read_vec_int_ark('tests/data/ali_ascii.ark') } # ascii,
# re-save the data,
with kaldi_io.open_or_fd('tests/data_re-saved/ali.ark','wb') as f:
for k,v in i32_vec.items(): kaldi_io.write_vec_int(f, v, k)
# read and make sure it is the same,
for k,v in kaldi_io.read_vec_int_ark('tests/data_re-saved/ali.ark'):
self.assertTrue(np.array_equal(v,i32_vec[k]), msg="int32 vector same after re-saving")
示例11: testMatrixReadWrite
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def testMatrixReadWrite(self):
"""
Test read/write for float matrices.
"""
# read,
flt_mat = { k:m for k,m in kaldi_io.read_mat_scp('tests/data/feats_ascii.scp') } # ascii-scp,
flt_mat2 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats_ascii.ark') } # ascii-ark,
flt_mat3 = { k:m for k,m in kaldi_io.read_mat_ark('tests/data/feats.ark') } # ascii-ark,
# store,
with kaldi_io.open_or_fd('tests/data_re-saved/mat.ark','wb') as f:
for k,m in flt_mat3.items(): kaldi_io.write_mat(f, m, k)
# read and compare,
for k,m in kaldi_io.read_mat_ark('tests/data_re-saved/mat.ark'):
self.assertTrue(np.array_equal(m, flt_mat3[k]), msg="flt. matrix same after re-saving")
示例12: forward
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def forward(cpc_model, device, data_loader, output_ark, output_scp):
logger.info("Starting Forward Passing")
cpc_model.eval() # not training cdc model
ark_scp_output='ark:| copy-feats --compress=true ark:- ark,scp:' + output_ark + ',' + output_scp
with torch.no_grad():
with ko.open_or_fd(ark_scp_output,'wb') as f:
for [utt_id, data] in data_loader:
data = data.float().unsqueeze(1).to(device) # add channel dimension
data = data.contiguous()
hidden = cpc_model.init_hidden(len(data), use_gpu=False)
output, hidden = cpc_model.predict(data, hidden)
mat = output.squeeze(0).cpu().numpy() # kaldi io does not accept torch tensor
ko.write_mat(f, mat, key=utt_id[0])
示例13: eval_dnn
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def eval_dnn(args):
""" The main function for doing evaluation on a trained network.
Args:
args: a Namespace object with the required parameters
obtained from the function process_args()
"""
model_dir = args.model_dir
use_gpu = args.use_gpu == 'yes'
min_chunk_size = args.min_chunk_size
chunk_size = args.chunk_size
# First change the output files temp ones and at the end rename them
wspecifier, ark, scp = process_wspecifier(args.vector_wspecifier)
if ark is not None and os.path.exists(ark) and scp is not None and os.path.exists(scp):
logger.info('Both output ark and scp files exist. Return from this call.')
return
model = Model()
with kaldi_io.open_or_fd(args.feature_rspecifier) as input_fid:
with kaldi_io.open_or_fd(wspecifier) as output_fid:
model.make_embedding(input_fid, output_fid, model_dir, min_chunk_size, chunk_size, use_gpu, logger)
# rename output files
if ark is not None:
os.rename(ark + '.tmp.ark', ark)
# first load scp and correct them to point to renamed ark file.
if scp is not None:
with open(scp + '.tmp.scp', 'rt') as fid_in:
with open(scp + '.tmp', 'wr') as fid_out:
text = fid_in.read()
text = text.replace('ark.tmp.ark', 'ark')
# Sometimes there is no \n at the end of file ank cause a Kaldi error.
# For preventing this error juts check the last char and append \n if not exist
if text[-1] != '\n':
text += '\n'
fid_out.write(text)
os.rename(scp + '.tmp', scp)
# after create scp file now we can delete temp file
# os.remove(scp + '.tmp.scp')
示例14: main
# 需要导入模块: import kaldi_io [as 别名]
# 或者: from kaldi_io import open_or_fd [as 别名]
def main():
if not os.path.isdir(KALDI_ROOT):
print('CHANGE THIS TO YOUR OWN KALDI ROOT: ', KALDI_ROOT)
exit()
if not os.path.isdir(INPUT_PATH):
print('Invalid path for the preprocessed timit dataset: ', INPUT_PATH)
print('Please run \'preprocess_timit.py\' first!')
exit()
if not os.path.isdir(SOURCE_DIR):
print('Invalid path for the source directory: ', SOURCE_DIR)
print('Please read the Wiki page for instructions!')
exit()
if not os.path.isdir(OUTPUT_PATH):
os.mkdir(OUTPUT_PATH)
# read train and test from the preprocessed directory
x, ids = [], []
for s in INPUT_SETS:
with open(os.path.join(INPUT_PATH, s + '_x.pkl'), 'rb') as fp:
x += pickle.load(fp)
with open(os.path.join(INPUT_PATH, s + '_id.pkl'), 'rb') as fp:
ids += pickle.load(fp)
assert len(x)==len(ids)
print('[TIMIT-to-ARK] - ', 'Total Dataset len:', len(x))
# construct all input dict
all_inputs = {}
for idx, i in enumerate(ids):
i = str(i).strip('.wav').split('/')
i = i[-2].upper() + '_' + i[-1].upper()
all_inputs[i] = np.asarray(x[idx])
# filter all input with kaldi generated files
for s in OUTPUT_SETS:
if not os.path.isdir(SOURCE_DIR):
raise NotADirectoryError('Source directory does not exist!', SOURCE_DIR)
if not os.path.isdir(OUTPUT_PATH + '/' + str(s)):
os.mkdir(OUTPUT_PATH + '/' + str(s))
# read train / dev / test from the kaldi generated directory
partial_outputs = {}
with open(os.path.join(SOURCE_DIR, s + '/feats.scp'), 'r') as f:
lines = f.readlines()
for line in lines:
line = line.split(' ')[0]
if line in all_inputs:
partial_outputs[line] = all_inputs[line]
assert len(lines) == len(partial_outputs)
# writiing output with kaldi_io
ark_scp_output = 'ark:| copy-feats --compress=true ark:- ark,scp:{}/raw_mel_{}.ark,{}/{}/feats.scp'.format(OUTPUT_PATH, str(s), OUTPUT_PATH, str(s))
with kaldi_io.open_or_fd(ark_scp_output, 'wb') as f:
for key, mat in tqdm(partial_outputs.items()):
kaldi_io.write_mat(f, mat, key=key)
print('[TIMIT-to-ARK] - All done, saved at \'' + str(OUTPUT_PATH) + '\' exit.')
开发者ID:andi611,项目名称:Self-Supervised-Speech-Pretraining-and-Representation-Learning,代码行数:62,代码来源:timit2ark.py