本文整理汇总了Python中data_utils.print_out方法的典型用法代码示例。如果您正苦于以下问题:Python data_utils.print_out方法的具体用法?Python data_utils.print_out怎么用?Python data_utils.print_out使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类data_utils
的用法示例。
在下文中一共展示了data_utils.print_out方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: print_vectors
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def print_vectors(embedding_key, vocab_path, word_vector_file):
"""Print vectors from the given variable."""
_, rev_vocab = wmt.initialize_vocabulary(vocab_path)
vectors_variable = [v for v in tf.trainable_variables()
if embedding_key == v.name]
if len(vectors_variable) != 1:
data.print_out("Word vector variable not found or too many.")
sys.exit(1)
vectors_variable = vectors_variable[0]
vectors = vectors_variable.eval()
l, s = vectors.shape[0], vectors.shape[1]
data.print_out("Printing %d word vectors from %s to %s."
% (l, embedding_key, word_vector_file))
with tf.gfile.GFile(word_vector_file, mode="w") as f:
# Lines have format: dog 0.045123 -0.61323 0.413667 ...
for i in xrange(l):
f.write(rev_vocab[i])
for j in xrange(s):
f.write(" %.8f" % vectors[i][j])
f.write("\n")
示例2: multi_test
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def multi_test(l, model, sess, task, nprint, batch_size, offset=None,
ensemble=None):
"""Run multiple tests at lower batch size to save memory."""
errors, seq_err = 0.0, 0.0
to_print = nprint
low_batch = FLAGS.low_batch_size
low_batch = min(low_batch, batch_size)
for mstep in xrange(batch_size / low_batch):
cur_offset = None if offset is None else offset + mstep * low_batch
err, sq_err, _ = single_test(l, model, sess, task, to_print, low_batch,
False, cur_offset, ensemble=ensemble)
to_print = max(0, to_print - low_batch)
errors += err
seq_err += sq_err
if FLAGS.mode > 0:
cur_errors = float(low_batch * errors) / ((mstep+1) * low_batch)
cur_seq_err = float(low_batch * seq_err) / ((mstep+1) * low_batch)
data.print_out(" %s multitest current errors %.2f sequence-errors %.2f"
% (task, 100*cur_errors, 100*cur_seq_err))
errors = float(low_batch) * float(errors) / batch_size
seq_err = float(low_batch) * float(seq_err) / batch_size
data.print_out(" %s len %d errors %.2f sequence-errors %.2f"
% (task, l, 100*errors, 100*seq_err))
return errors, seq_err
示例3: read_data
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def read_data(source_path, target_path, buckets, max_size=None, print_out=True):
"""Read data from source and target files and put into buckets.
Args:
source_path: path to the files with token-ids for the source language.
target_path: path to the file with token-ids for the target language;
it must be aligned with the source file: n-th line contains the desired
output for n-th line from the source_path.
buckets: the buckets to use.
max_size: maximum number of lines to read, all other will be ignored;
if 0 or None, data files will be read completely (no limit).
If set to 1, no data will be returned (empty lists of the right form).
print_out: whether to print out status or not.
Returns:
data_set: a list of length len(_buckets); data_set[n] contains a list of
(source, target) pairs read from the provided data files that fit
into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
len(target) < _buckets[n][1]; source and target are lists of token-ids.
"""
data_set = [[] for _ in buckets]
counter = 0
if max_size != 1:
with tf.gfile.GFile(source_path, mode="r") as source_file:
with tf.gfile.GFile(target_path, mode="r") as target_file:
source, target = source_file.readline(), target_file.readline()
while source and target and (not max_size or counter < max_size):
counter += 1
if counter % 100000 == 0 and print_out:
print " reading data line %d" % counter
sys.stdout.flush()
source_ids = [int(x) for x in source.split()]
target_ids = [int(x) for x in target.split()]
source_ids, source_len = zero_split(source_ids)
target_ids, target_len = zero_split(target_ids, append=wmt.EOS_ID)
for bucket_id, size in enumerate(buckets):
if source_len <= size and target_len <= size:
data_set[bucket_id].append([source_ids, target_ids])
break
source, target = source_file.readline(), target_file.readline()
return data_set
示例4: read_data_into_global
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def read_data_into_global(source_path, target_path, buckets,
max_size=None, print_out=True):
"""Read data into the global variables (can be in a separate thread)."""
# pylint: disable=global-variable-not-assigned
global global_train_set, train_buckets_scale
# pylint: enable=global-variable-not-assigned
data_set = read_data(source_path, target_path, buckets, max_size, print_out)
global_train_set["wmt"].append(data_set)
train_total_size = calculate_buckets_scale(data_set, buckets, "wmt")
if print_out:
print " Finished global data reading (%d)." % train_total_size
示例5: single_test
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def single_test(bin_id, model, sess, nprint, batch_size, dev, p, print_out=True,
offset=None, beam_model=None):
"""Test model on test data of length l using the given session."""
if not dev[p][bin_id]:
data.print_out(" bin %d (%d)\t%s\tppl NA errors NA seq-errors NA"
% (bin_id, data.bins[bin_id], p))
return 1.0, 1.0, 0.0
inpt, target = data.get_batch(
bin_id, batch_size, dev[p], FLAGS.height, offset)
if FLAGS.beam_size > 1 and beam_model:
loss, res, new_tgt, scores = m_step(
model, beam_model, sess, batch_size, inpt, target, bin_id,
FLAGS.eval_beam_steps, p)
score_avgs = [sum(s) / float(len(s)) for s in scores]
score_maxs = [max(s) for s in scores]
score_str = ["(%.2f, %.2f)" % (score_avgs[i], score_maxs[i])
for i in xrange(FLAGS.eval_beam_steps)]
data.print_out(" == scores (avg, max): %s" % "; ".join(score_str))
errors, total, seq_err = data.accuracy(inpt, res, target, batch_size,
nprint, new_tgt, scores[-1])
else:
loss, res, _, _ = model.step(sess, inpt, target, False)
errors, total, seq_err = data.accuracy(inpt, res, target, batch_size,
nprint)
seq_err = float(seq_err) / batch_size
if total > 0:
errors = float(errors) / total
if print_out:
data.print_out(" bin %d (%d)\t%s\tppl %.2f errors %.2f seq-errors %.2f"
% (bin_id, data.bins[bin_id], p, data.safe_exp(loss),
100 * errors, 100 * seq_err))
return (errors, seq_err, loss)
示例6: assign_vectors
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def assign_vectors(word_vector_file, embedding_key, vocab_path, sess):
"""Assign the embedding_key variable from the given word vectors file."""
# For words in the word vector file, set their embedding at start.
if not tf.gfile.Exists(word_vector_file):
data.print_out("Word vector file does not exist: %s" % word_vector_file)
sys.exit(1)
vocab, _ = wmt.initialize_vocabulary(vocab_path)
vectors_variable = [v for v in tf.trainable_variables()
if embedding_key == v.name]
if len(vectors_variable) != 1:
data.print_out("Word vector variable not found or too many.")
sys.exit(1)
vectors_variable = vectors_variable[0]
vectors = vectors_variable.eval()
data.print_out("Pre-setting word vectors from %s" % word_vector_file)
with tf.gfile.GFile(word_vector_file, mode="r") as f:
# Lines have format: dog 0.045123 -0.61323 0.413667 ...
for line in f:
line_parts = line.split()
# The first part is the word.
word = line_parts[0]
if word in vocab:
# Remaining parts are components of the vector.
word_vector = np.array(map(float, line_parts[1:]))
if len(word_vector) != FLAGS.vec_size:
data.print_out("Warn: Word '%s', Expecting vector size %d, "
"found %d" % (word, FLAGS.vec_size,
len(word_vector)))
else:
vectors[vocab[word]] = word_vector
# Assign the modified vectors to the vectors_variable in the graph.
sess.run([vectors_variable.initializer],
{vectors_variable.initializer.inputs[1]: vectors})
示例7: read_data
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def read_data(source_path, target_path, buckets, max_size=None, print_out=True):
"""Read data from source and target files and put into buckets.
Args:
source_path: path to the files with token-ids for the source language.
target_path: path to the file with token-ids for the target language;
it must be aligned with the source file: n-th line contains the desired
output for n-th line from the source_path.
buckets: the buckets to use.
max_size: maximum number of lines to read, all other will be ignored;
if 0 or None, data files will be read completely (no limit).
If set to 1, no data will be returned (empty lists of the right form).
print_out: whether to print out status or not.
Returns:
data_set: a list of length len(_buckets); data_set[n] contains a list of
(source, target) pairs read from the provided data files that fit
into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
len(target) < _buckets[n][1]; source and target are lists of token-ids.
"""
data_set = [[] for _ in buckets]
counter = 0
if max_size != 1:
with tf.gfile.GFile(source_path, mode="r") as source_file:
with tf.gfile.GFile(target_path, mode="r") as target_file:
source, target = source_file.readline(), target_file.readline()
while source and target and (not max_size or counter < max_size):
counter += 1
if counter % 100000 == 0 and print_out:
print(" reading data line %d" % counter)
sys.stdout.flush()
source_ids = [int(x) for x in source.split()]
target_ids = [int(x) for x in target.split()]
source_ids, source_len = zero_split(source_ids)
target_ids, target_len = zero_split(target_ids, append=wmt.EOS_ID)
for bucket_id, size in enumerate(buckets):
if source_len <= size and target_len <= size:
data_set[bucket_id].append([source_ids, target_ids])
break
source, target = source_file.readline(), target_file.readline()
return data_set
示例8: read_data_into_global
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def read_data_into_global(source_path, target_path, buckets,
max_size=None, print_out=True):
"""Read data into the global variables (can be in a separate thread)."""
# pylint: disable=global-variable-not-assigned
global global_train_set, train_buckets_scale
# pylint: enable=global-variable-not-assigned
data_set = read_data(source_path, target_path, buckets, max_size, print_out)
global_train_set["wmt"].append(data_set)
train_total_size = calculate_buckets_scale(data_set, buckets, "wmt")
if print_out:
print(" Finished global data reading (%d)." % train_total_size)
示例9: single_test
# 需要导入模块: import data_utils [as 别名]
# 或者: from data_utils import print_out [as 别名]
def single_test(l, model, sess, task, nprint, batch_size, print_out=True,
offset=None, ensemble=None, get_steps=False):
"""Test model on test data of length l using the given session."""
inpt, target = data.get_batch(l, batch_size, False, task, offset)
_, res, _, steps = model.step(sess, inpt, target, False, get_steps=get_steps)
errors, total, seq_err = data.accuracy(inpt, res, target, batch_size, nprint)
seq_err = float(seq_err) / batch_size
if total > 0:
errors = float(errors) / total
if print_out:
data.print_out(" %s len %d errors %.2f sequence-errors %.2f"
% (task, l, 100*errors, 100*seq_err))
# Ensemble eval.
if ensemble:
results = []
for m in ensemble:
model.saver.restore(sess, m)
_, result, _, _ = model.step(sess, inpt, target, False)
m_errors, m_total, m_seq_err = data.accuracy(inpt, result, target,
batch_size, nprint)
m_seq_err = float(m_seq_err) / batch_size
if total > 0:
m_errors = float(m_errors) / m_total
data.print_out(" %s len %d m-errors %.2f m-sequence-errors %.2f"
% (task, l, 100*m_errors, 100*m_seq_err))
results.append(result)
ens = [sum(o) for o in zip(*results)]
errors, total, seq_err = data.accuracy(inpt, ens, target,
batch_size, nprint)
seq_err = float(seq_err) / batch_size
if total > 0:
errors = float(errors) / total
if print_out:
data.print_out(" %s len %d ens-errors %.2f ens-sequence-errors %.2f"
% (task, l, 100*errors, 100*seq_err))
return errors, seq_err, (steps, inpt, [np.argmax(o, axis=1) for o in res])