本文整理汇总了Python中tensorflow.models.rnn.translate.data_utils.EOS_ID属性的典型用法代码示例。如果您正苦于以下问题:Python data_utils.EOS_ID属性的具体用法?Python data_utils.EOS_ID怎么用?Python data_utils.EOS_ID使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类tensorflow.models.rnn.translate.data_utils
的用法示例。
在下文中一共展示了data_utils.EOS_ID属性的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: read_data
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def read_data(source_path, target_path, max_size=None):
"""Read data from source and target files and put into buckets.
Args:
source_path: path to the files with token-ids for the source language.
target_path: path to the file with token-ids for the target language;
it must be aligned with the source file: n-th line contains the desired
output for n-th line from the source_path.
max_size: maximum number of lines to read, all other will be ignored;
if 0 or None, data files will be read completely (no limit).
Returns:
data_set: a list of length len(_buckets); data_set[n] contains a list of
(source, target) pairs read from the provided data files that fit
into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
len(target) < _buckets[n][1]; source and target are lists of token-ids.
"""
data_set = [[] for _ in _buckets]
with tf.gfile.GFile(source_path, mode="r") as source_file:
with tf.gfile.GFile(target_path, mode="r") as target_file:
source, target = source_file.readline(), target_file.readline()
counter = 0
while source and target and (not max_size or counter < max_size):
counter += 1
if counter % 100000 == 0:
print(" reading data line %d" % counter)
sys.stdout.flush()
source_ids = [int(x) for x in source.split()]
target_ids = [int(x) for x in target.split()]
target_ids.append(data_utils.EOS_ID)
for bucket_id, (source_size, target_size) in enumerate(_buckets):
if len(source_ids) < source_size and len(target_ids) < target_size:
data_set[bucket_id].append([source_ids, target_ids])
break
source, target = source_file.readline(), target_file.readline()
return data_set
示例2: decode
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def decode():
with tf.Session() as sess:
# Create model and load parameters.
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
en_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.en" % FLAGS.en_vocab_size)
fr_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.fr" % FLAGS.fr_vocab_size)
en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
_, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
# Decode from standard input.
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
# Get token-ids for the input sentence.
token_idsgb = data_utils.sentence_to_token_ids(sentence, en_vocab)
token_ids = token_idsgb[0:99]
# Which bucket does it belong to?
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
print(" ".join([rev_fr_vocab[output] for output in outputs]))
print("> ", end="")
sys.stdout.flush()
sentence = sys.stdin.readline()
示例3: decode
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def decode():
with tf.Session() as sess:
# Create model and load parameters.
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
src_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.src_lang + "_mapping%d.txt" % FLAGS.src_lang_vocab_size
dst_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.dst_lang + "_mapping%d.txt" % FLAGS.dst_lang_vocab_size
src_lang_vocab, _ = data_utils.initialize_vocabulary(src_lang_vocab_path)
_, rev_dst_lang_vocab = data_utils.initialize_vocabulary(dst_lang_vocab_path)
# Decode from standard input.
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), src_lang_vocab)
# Which bucket does it belong to?
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
print(" ".join([tf.compat.as_str(rev_dst_lang_vocab[output]) for output in outputs]))
print("> ", end="")
sys.stdout.flush()
sentence = sys.stdin.readline()
示例4: decode
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def decode():
with tf.Session() as sess:
# Create model and load parameters.
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
en_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.en" % FLAGS.en_vocab_size)
fr_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.fr" % FLAGS.fr_vocab_size)
en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
_, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
# Decode from standard input.
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab)
# Which bucket does it belong to?
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs]))
print("> ", end="")
sys.stdout.flush()
sentence = sys.stdin.readline()
示例5: decode
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def decode():
with tf.Session() as sess:
# Create model and load parameters.
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
en_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.en" % FLAGS.en_vocab_size)
fr_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.fr" % FLAGS.fr_vocab_size)
en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
_, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
# Decode from standard input.
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab)
# Which bucket does it belong to?
bucket_id = len(_buckets) - 1
for i, bucket in enumerate(_buckets):
if bucket[0] >= len(token_ids):
bucket_id = i
break
else:
logging.warning("Sentence truncated: %s", sentence)
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs]))
print("> ", end="")
sys.stdout.flush()
sentence = sys.stdin.readline()
示例6: decode
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def decode():
with tf.Session() as sess:
# Create model and load parameters.
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
en_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.en" % FLAGS.en_vocab_size)
fr_vocab_path = os.path.join(FLAGS.data_dir,
"vocab%d.fr" % FLAGS.fr_vocab_size)
en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path)
_, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path)
# Decode from standard input.
sys.stdout.write("> ")
sys.stdout.flush()
sentence = sys.stdin.readline()
while sentence:
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab)
# Which bucket does it belong to?
bucket_id = len(_buckets) - 1
for i, bucket in enumerate(_buckets):
if bucket[0] >= len(token_ids):
bucket_id = i
break
else:
logging.warning("Sentence truncated: %s", sentence)
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs,
target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out French sentence corresponding to outputs.
print(" ".join([tf.compat.as_str(rev_fr_vocab[output]) for output in outputs]))
print("> ", end="")
sys.stdout.flush()
sentence = sys.stdin.readline()
示例7: test
# 需要导入模块: from tensorflow.models.rnn.translate import data_utils [as 别名]
# 或者: from tensorflow.models.rnn.translate.data_utils import EOS_ID [as 别名]
def test():
"""Test the translation model."""
nltk.download('punkt')
with tf.Session() as sess:
model = create_model(sess, True)
model.batch_size = 1 # We decode one sentence at a time.
# Load vocabularies.
src_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.src_lang + "_mapping%d.txt" % FLAGS.src_lang_vocab_size
dst_lang_vocab_path = PATH_TO_DATA_FILES + FLAGS.dst_lang + "_mapping%d.txt" % FLAGS.dst_lang_vocab_size
src_lang_vocab, _ = data_utils.initialize_vocabulary(src_lang_vocab_path)
_, rev_dst_lang_vocab = data_utils.initialize_vocabulary(dst_lang_vocab_path)
weights = [0.25, 0.25, 0.25, 0.25]
first_lang_file = open(generate_src_lang_sentences_file_name(FLAGS.src_lang))
second_lang_file = open(generate_src_lang_sentences_file_name(FLAGS.dst_lang))
total_bleu_value = 0.0
computing_bleu_iterations = 0
for first_lang_raw in first_lang_file:
second_lang_gold_raw = second_lang_file.readline()
# Get token-ids for the input sentence.
token_ids = data_utils.sentence_to_token_ids(tf.compat.as_bytes(first_lang_raw), src_lang_vocab)
# Which bucket does it belong to?
try:
bucket_id = min([b for b in xrange(len(_buckets))
if _buckets[b][0] > len(token_ids)])
except ValueError:
continue
# Get a 1-element batch to feed the sentence to the model.
encoder_inputs, decoder_inputs, target_weights = model.get_batch(
{bucket_id: [(token_ids, [])]}, bucket_id)
# Get output logits for the sentence.
_, _, output_logits = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True)
# This is a greedy decoder - outputs are just argmaxes of output_logits.
outputs = [int(np.argmax(logit, axis=1)) for logit in output_logits]
# If there is an EOS symbol in outputs, cut them at that point.
if data_utils.EOS_ID in outputs:
outputs = outputs[:outputs.index(data_utils.EOS_ID)]
# Print out sentence corresponding to outputs.
model_tran_res = " ".join([tf.compat.as_str(rev_dst_lang_vocab[output]) for output in outputs])
second_lang_gold_tokens = word_tokenize(second_lang_gold_raw)
model_tran_res_tokens = word_tokenize(model_tran_res)
try:
current_bleu_value = sentence_bleu([model_tran_res_tokens], second_lang_gold_tokens, weights)
total_bleu_value += current_bleu_value
computing_bleu_iterations += 1
except ZeroDivisionError:
pass
if computing_bleu_iterations % 10 == 0:
print("BLEU value after %d iterations: %.2f"
% (computing_bleu_iterations, total_bleu_value / computing_bleu_iterations))
final_bleu_value = total_bleu_value / computing_bleu_iterations
print("Final BLEU value after %d iterations: %.2f" % (computing_bleu_iterations, final_bleu_value))
return