本文整理汇总了Python中util.convert_tokens方法的典型用法代码示例。如果您正苦于以下问题:Python util.convert_tokens方法的具体用法?Python util.convert_tokens怎么用?Python util.convert_tokens使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类util
的用法示例。
在下文中一共展示了util.convert_tokens方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: evaluate_batch
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
answer_dict = {}
losses = []
for _ in tqdm(range(1, num_batches + 1)):
qa_id, loss, yp1, yp2, = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle: str_handle})
answer_dict_, _ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
metrics["loss"] = loss
loss_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
f1_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
em_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
return metrics, [loss_sum, f1_sum, em_sum]
示例2: evaluate_batch
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def evaluate_batch(model, num_batches, eval_file, sess, data_type, handle, str_handle):
answer_dict = {}
losses = []
for _ in tqdm(range(1, num_batches + 1)):
qa_id, loss, yp1, yp2 = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2], feed_dict={handle:str_handle})
answer_dict_, _ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
metrics["loss"] = loss
loss_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/loss".format(data_type), simple_value=loss), ])
f1_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
em_sum = tf.Summary(value=[tf.Summary.Value(
tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
print("[{}] loss:{}, f1:{}, em:{}".format(data_type, loss, metrics["f1"], metrics["exact_match"]))
return metrics, [loss_sum, f1_sum, em_sum]
示例3: evaluate_batch
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def evaluate_batch(data_source, model, max_batches, eval_file, config):
answer_dict = {}
sp_dict = {}
total_loss, step_cnt = 0, 0
iter = data_source
for step, data in enumerate(iter):
if step >= max_batches and max_batches > 0: break
context_idxs = Variable(data['context_idxs'], volatile=True)
ques_idxs = Variable(data['ques_idxs'], volatile=True)
context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
context_lens = Variable(data['context_lens'], volatile=True)
y1 = Variable(data['y1'], volatile=True)
y2 = Variable(data['y2'], volatile=True)
q_type = Variable(data['q_type'], volatile=True)
is_support = Variable(data['is_support'], volatile=True)
start_mapping = Variable(data['start_mapping'], volatile=True)
end_mapping = Variable(data['end_mapping'], volatile=True)
all_mapping = Variable(data['all_mapping'], volatile=True)
logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, return_yp=True)
loss = (nll_sum(predict_type, q_type) + nll_sum(logit1, y1) + nll_sum(logit2, y2)) / context_idxs.size(0) + config.sp_lambda * nll_average(predict_support.view(-1, 2), is_support.view(-1))
answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1))
answer_dict.update(answer_dict_)
total_loss += loss.data[0]
step_cnt += 1
loss = total_loss / step_cnt
metrics = evaluate(eval_file, answer_dict)
metrics['loss'] = loss
return metrics
示例4: predict
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def predict(data_source, model, eval_file, config, prediction_file):
answer_dict = {}
sp_dict = {}
sp_th = config.sp_threshold
for step, data in enumerate(tqdm(data_source)):
context_idxs = Variable(data['context_idxs'], volatile=True)
ques_idxs = Variable(data['ques_idxs'], volatile=True)
context_char_idxs = Variable(data['context_char_idxs'], volatile=True)
ques_char_idxs = Variable(data['ques_char_idxs'], volatile=True)
context_lens = Variable(data['context_lens'], volatile=True)
start_mapping = Variable(data['start_mapping'], volatile=True)
end_mapping = Variable(data['end_mapping'], volatile=True)
all_mapping = Variable(data['all_mapping'], volatile=True)
logit1, logit2, predict_type, predict_support, yp1, yp2 = model(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, context_lens, start_mapping, end_mapping, all_mapping, return_yp=True)
answer_dict_ = convert_tokens(eval_file, data['ids'], yp1.data.cpu().numpy().tolist(), yp2.data.cpu().numpy().tolist(), np.argmax(predict_type.data.cpu().numpy(), 1))
answer_dict.update(answer_dict_)
predict_support_np = torch.sigmoid(predict_support[:, :, 1]).data.cpu().numpy()
for i in range(predict_support_np.shape[0]):
cur_sp_pred = []
cur_id = data['ids'][i]
for j in range(predict_support_np.shape[1]):
if j >= len(eval_file[cur_id]['sent2title_ids']): break
if predict_support_np[i, j] > sp_th:
cur_sp_pred.append(eval_file[cur_id]['sent2title_ids'][j])
sp_dict.update({cur_id: cur_sp_pred})
prediction = {'answer': answer_dict, 'sp': sp_dict}
with open(prediction_file, 'w') as f:
json.dump(prediction, f)
示例5: test
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def test(config):
with open(config.word_emb_file, "r") as fh:
word_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.char_emb_file, "r") as fh:
char_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.test_eval_file, "r") as fh:
eval_file = json.load(fh)
with open(config.test_meta, "r") as fh:
meta = json.load(fh)
total = meta["total"]
print("Loading model...")
test_batch = get_dataset(config.test_record_file, get_record_parser(
config, is_test=True), config).make_one_shot_iterator()
model = Model(config, test_batch, word_mat, char_mat, trainable=False)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
losses = []
answer_dict = {}
remapped_dict = {}
for step in tqdm(range(total // config.batch_size + 1)):
qa_id, loss, yp1, yp2 = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2])
answer_dict_, remapped_dict_ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
remapped_dict.update(remapped_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
with open(config.answer_file, "w") as fh:
json.dump(remapped_dict, fh)
print("Exact Match: {}, F1: {}".format(
metrics['exact_match'], metrics['f1']))
示例6: test
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def test(config):
os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
with open(config.word_emb_file, "r") as fh:
word_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.char_emb_file, "r") as fh:
char_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.test_eval_file, "r") as fh:
eval_file = json.load(fh)
with open(config.test_meta, "r") as fh:
meta = json.load(fh)
total = meta["total"]
print("Loading model...")
test_batch = get_dataset(config.test_record_file, get_record_parser(
config, is_test=True), config).make_one_shot_iterator()
model = Model(config, test_batch, word_mat, char_mat, trainable=False)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
try:
sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
except:
sess_config.gpu_options.per_process_gpu_memory_fraction = 0.5
with tf.Session(config=sess_config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
losses = []
answer_dict = {}
remapped_dict = {}
for step in tqdm(range(total // config.batch_size + 1)):
qa_id, loss, yp1, yp2 = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2])
answer_dict_, remapped_dict_ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
remapped_dict.update(remapped_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
with open(config.answer_file, "w") as fh:
json.dump(remapped_dict, fh)
print("Exact Match: {}, F1: {}".format(
metrics['exact_match'], metrics['f1']))
示例7: test
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def test(config):
os.environ["CUDA_VISIBLE_DEVICES"] = config.choose_gpu
with open(config.word_emb_file, "r") as fh:
word_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.char_emb_file, "r") as fh:
char_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.test_eval_file, "r") as fh:
eval_file = json.load(fh)
with open(config.test_meta, "r") as fh:
meta = json.load(fh)
total = meta["total"]
graph = tf.Graph()
print("Loading model...")
with graph.as_default() as g:
test_batch = get_dataset(config.test_record_file, get_record_parser(
config, is_test=True), config).make_one_shot_iterator()
model = QANet(config, test_batch, word_mat, char_mat, trainable=False, graph = g)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
sess_config.gpu_options.per_process_gpu_memory_fraction = config.gpu_memory_fraction
with tf.Session(config=sess_config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
if config.decay < 1.0:
sess.run(model.assign_vars)
losses = []
answer_dict = {}
remapped_dict = {}
for step in tqdm(range(total // config.batch_size + 1)):
qa_id, loss, yp1, yp2 = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2])
answer_dict_, remapped_dict_ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
remapped_dict.update(remapped_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
with open(config.answer_file, "w") as fh:
json.dump(remapped_dict, fh)
print("Exact Match: {}, F1: {}".format(
metrics['exact_match'], metrics['f1']))
示例8: test
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def test(config):
with open(config.word_emb_file, "r") as fh:
word_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.char_emb_file, "r") as fh:
char_mat = np.array(json.load(fh), dtype=np.float32)
with open(config.test_eval_file, "r") as fh:
eval_file = json.load(fh)
with open(config.test_meta, "r") as fh:
meta = json.load(fh)
total = meta["total"]
graph = tf.Graph()
print("Loading model...")
with graph.as_default() as g:
test_batch = get_dataset(config.test_record_file, get_record_parser(
config, is_test=True), config).make_one_shot_iterator()
model = Model(config, test_batch, word_mat, char_mat, trainable=False, graph = g)
sess_config = tf.ConfigProto(allow_soft_placement=True)
sess_config.gpu_options.allow_growth = True
with tf.Session(config=sess_config) as sess:
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
if config.decay < 1.0:
sess.run(model.assign_vars)
losses = []
answer_dict = {}
remapped_dict = {}
for step in tqdm(range(total // config.batch_size + 1)):
qa_id, loss, yp1, yp2 = sess.run(
[model.qa_id, model.loss, model.yp1, model.yp2])
answer_dict_, remapped_dict_ = convert_tokens(
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist())
answer_dict.update(answer_dict_)
remapped_dict.update(remapped_dict_)
losses.append(loss)
loss = np.mean(losses)
metrics = evaluate(eval_file, answer_dict)
with open(config.answer_file, "w") as fh:
json.dump(remapped_dict, fh)
print("Exact Match: {}, F1: {}".format(
metrics['exact_match'], metrics['f1']))
示例9: evaluate
# 需要导入模块: import util [as 别名]
# 或者: from util import convert_tokens [as 别名]
def evaluate(model, data_loader, device, eval_file, max_len, use_squad_v2):
nll_meter = util.AverageMeter()
model.eval()
pred_dict = {}
with open(eval_file, 'r') as fh:
gold_dict = json_load(fh)
with torch.no_grad(), \
tqdm(total=len(data_loader.dataset)) as progress_bar:
for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader:
# Setup for forward
cw_idxs = cw_idxs.to(device)
qw_idxs = qw_idxs.to(device)
batch_size = cw_idxs.size(0)
# Forward
log_p1, log_p2 = model(cw_idxs, qw_idxs)
y1, y2 = y1.to(device), y2.to(device)
loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2)
nll_meter.update(loss.item(), batch_size)
# Get F1 and EM scores
p1, p2 = log_p1.exp(), log_p2.exp()
starts, ends = util.discretize(p1, p2, max_len, use_squad_v2)
# Log info
progress_bar.update(batch_size)
progress_bar.set_postfix(NLL=nll_meter.avg)
preds, _ = util.convert_tokens(gold_dict,
ids.tolist(),
starts.tolist(),
ends.tolist(),
use_squad_v2)
pred_dict.update(preds)
model.train()
results = util.eval_dicts(gold_dict, pred_dict, use_squad_v2)
results_list = [('NLL', nll_meter.avg),
('F1', results['F1']),
('EM', results['EM'])]
if use_squad_v2:
results_list.append(('AvNA', results['AvNA']))
results = OrderedDict(results_list)
return results, pred_dict