本文整理汇总了Python中data.show_abs_oovs方法的典型用法代码示例。如果您正苦于以下问题:Python data.show_abs_oovs方法的具体用法?Python data.show_abs_oovs怎么用?Python data.show_abs_oovs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类data
的用法示例。
在下文中一共展示了data.show_abs_oovs方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: decode
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def decode(self):
"""Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
# t0 = time.time()
batch = self._batcher.next_batch() # 1 example repeated across batch
original_article = batch.original_articles[0] # string
original_abstract = batch.original_abstracts[0] # string
# input data
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string
# Run beam search to get best Hypothesis
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch)
# Extract the output ids from the hypothesis and convert back to words
output_ids = [int(t) for t in best_hyp.tokens[1:]]
decoded_words = data.outputids2words(output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None))
# Remove the [STOP] token from decoded_words, if necessary
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
# tf.logging.info('ARTICLE: %s', article)
# tf.logging.info('GENERATED SUMMARY: %s', decoded_output)
sys.stdout.write(decoded_output)
示例2: process_one_article
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def process_one_article(self, original_article_sents, original_abstract_sents, \
original_selected_ids, output_ids, oovs, attn_dists_norescale, \
attn_dists, p_gens, log_probs, sent_probs, counter):
# Remove the [STOP] token from decoded_words, if necessary
decoded_words = data.outputids2words(output_ids, self._vocab, oovs)
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
decoded_sents = data.words2sents(decoded_words)
if FLAGS.single_pass:
verbose = False if FLAGS.mode == 'eval' else True
self.write_for_rouge(original_abstract_sents, decoded_sents, counter, verbose) # write ref summary and decoded summary to file, to eval with pyrouge later
if FLAGS.decode_method == 'beam' and FLAGS.save_vis:
sent_probs_per_word = []
for sent_id, sent in enumerate(original_article_sents):
sent_len = len(sent.split(' '))
for _ in range(sent_len):
if sent_id < FLAGS.max_art_len:
sent_probs_per_word.append(sent_probs[sent_id])
else:
sent_probs_per_word.append(0)
original_article = ' '.join(original_article_sents)
original_abstract = ' '.join(original_abstract_sents)
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, oovs)
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, attn_dists_norescale, \
attn_dists, p_gens, log_probs, sent_probs_per_word, counter, verbose)
if FLAGS.save_pkl:
self.save_result(original_article_sents, original_abstract_sents, \
original_selected_ids, decoded_sents, counter, verbose)
示例3: process_one_article
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def process_one_article(self, original_article_sents, original_abstract_sents, \
original_selected_ids, output_ids, oovs, \
attn_dists, p_gens, log_probs, counter):
# Remove the [STOP] token from decoded_words, if necessary
decoded_words = data.outputids2words(output_ids, self._vocab, oovs)
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
decoded_sents = data.words2sents(decoded_words)
if FLAGS.single_pass:
verbose = False if FLAGS.mode == 'eval' else True
self.write_for_rouge(original_abstract_sents, decoded_sents, counter, verbose) # write ref summary and decoded summary to file, to eval with pyrouge later
if FLAGS.decode_method == 'beam' and FLAGS.save_vis:
original_article = ' '.join(original_article_sents)
original_abstract = ' '.join(original_abstract_sents)
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, oovs)
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, \
attn_dists, p_gens, log_probs, counter, verbose)
if FLAGS.save_pkl:
self.save_result(original_article_sents, original_abstract_sents, \
original_selected_ids, decoded_sents, counter, verbose)
示例4: decode
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def decode(self):
"""Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
t0 = time.time()
counter = FLAGS.decode_after
while True:
tf.reset_default_graph()
batch = self._batcher.next_batch() # 1 example repeated across batch
if batch is None: # finished decoding dataset in single_pass mode
assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode"
tf.logging.info("Decoder has finished reading dataset for single_pass.")
tf.logging.info("Output has been saved in %s and %s. Now starting ROUGE eval...", self._rouge_ref_dir, self._rouge_dec_dir)
results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
rouge_log(results_dict, self._decode_dir)
return
original_article = batch.original_articles[0] # string
original_abstract = batch.original_abstracts[0] # string
original_abstract_sents = batch.original_abstracts_sents[0] # list of strings
if len(original_abstract_sents) == 0:
print("NOOOOO!!!!, An empty abstract :(")
continue
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string
# Run beam search to get best Hypothesis
if FLAGS.ac_training:
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch, self._dqn, self._dqn_sess, self._dqn_graph)
else:
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch)
# Extract the output ids from the hypothesis and convert back to words
output_ids = [int(t) for t in best_hyp.tokens[1:]]
decoded_words = data.outputids2words(output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None))
# Remove the [STOP] token from decoded_words, if necessary
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
if FLAGS.single_pass:
self.write_for_rouge(original_abstract_sents, decoded_words, counter) # write ref summary and decoded summary to file, to eval with pyrouge later
counter += 1 # this is how many examples we've decoded
else:
print_results(article_withunks, abstract_withunks, decoded_output) # log output to screen
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens) # write info to .json file for visualization tool
# Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint
t1 = time.time()
if t1-t0 > SECS_UNTIL_NEW_CKPT:
tf.logging.info('We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint', t1-t0)
_ = util.load_ckpt(self._saver, self._sess, FLAGS.decode_from)
t0 = time.time()
示例5: decode
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def decode(self):
"""Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
t0 = time.time()
counter = FLAGS.decode_after
while True:
tf.reset_default_graph()
batch = self._batcher.next_batch() # 1 example repeated across batch
if batch is None: # finished decoding dataset in single_pass mode
assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode"
tf.logging.info("Decoder has finished reading dataset for single_pass.")
tf.logging.info("Output has been saved in %s and %s. Now starting ROUGE eval...", self._rouge_ref_dir, self._rouge_dec_dir)
results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
rouge_log(results_dict, self._decode_dir)
return
original_article = batch.original_articles[0] # string
original_abstract = batch.original_abstracts[0] # string
original_abstract_sents = batch.original_abstracts_sents[0] # list of strings
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string
# Run beam search to get best Hypothesis
if FLAGS.ac_training:
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch, self._dqn, self._dqn_sess, self._dqn_graph)
else:
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch)
# Extract the output ids from the hypothesis and convert back to words
output_ids = [int(t) for t in best_hyp.tokens[1:]]
decoded_words = data.outputids2words(output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None))
# Remove the [STOP] token from decoded_words, if necessary
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
if FLAGS.single_pass:
self.write_for_rouge(original_abstract_sents, decoded_words, counter) # write ref summary and decoded summary to file, to eval with pyrouge later
counter += 1 # this is how many examples we've decoded
else:
print_results(article_withunks, abstract_withunks, decoded_output) # log output to screen
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens) # write info to .json file for visualization tool
# Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint
t1 = time.time()
if t1-t0 > SECS_UNTIL_NEW_CKPT:
tf.logging.info('We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint', t1-t0)
_ = util.load_ckpt(self._saver, self._sess, FLAGS.decode_from)
t0 = time.time()
示例6: decode
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def decode(self):
"""Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
t0 = time.time()
counter = 0
all_decoded = {} # a dictionary keeping the decoded files to be written for visualization
while True:
batch = self._batcher.next_batch() # 1 example repeated across batch
if batch is None: # finished decoding dataset in single_pass mode
assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode"
tf.logging.info("Decoder has finished reading dataset for single_pass.")
tf.logging.info("Output has been saved in %s and %s. Now starting ROUGE eval...", self._rouge_ref_dir, self._rouge_dec_dir)
results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
rouge_log(results_dict, self._decode_dir)
if FLAGS.single_pass:
self.write_all_for_attnvis(all_decoded)
return
original_article = batch.original_articles[0] # string
original_abstract = batch.original_abstracts[0] # string
original_abstract_sents = batch.original_abstracts_sents[0] # list of strings
article_id = batch.article_ids[0] #string
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string
# Run beam search to get best Hypothesis
# import pdb; pdb.set_trace()
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch)
# Extract the output ids from the hypothesis and convert back to words
output_ids = [int(t) for t in best_hyp.tokens[1:]]
decoded_words = data.outputids2words(output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None))
# Remove the [STOP] token from decoded_words, if necessary
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
if FLAGS.single_pass:
self.write_for_rouge(original_abstract_sents, decoded_words, article_id) # write ref summary and decoded summary to file, to eval with pyrouge later
print_results(article_withunks, abstract_withunks, decoded_output, article_id) # log output to screen
all_decoded[article_id] = self.prepare_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, best_hyp.attn_dists_sec)
counter += 1 # this is how many examples we've decoded
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, best_hyp.attn_dists_sec) # write info to .json file for visualization tool
else:
print_results(article_withunks, abstract_withunks, decoded_output, article_id) # log output to screen
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens, best_hyp.attn_dists_sec) # write info to .json file for visualization tool
# Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint
t1 = time.time()
if t1-t0 > SECS_UNTIL_NEW_CKPT:
tf.logging.info('We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint', t1-t0)
_ = util.load_ckpt(self._saver, self._sess)
t0 = time.time()
示例7: decode
# 需要导入模块: import data [as 别名]
# 或者: from data import show_abs_oovs [as 别名]
def decode(self):
"""Decode examples until data is exhausted (if FLAGS.single_pass) and return, or decode indefinitely, loading latest checkpoint at regular intervals"""
t0 = time.time()
counter = 0
while True:
batch = self._batcher.next_batch() # 1 example repeated across batch
if batch is None: # finished decoding dataset in single_pass mode
assert FLAGS.single_pass, "Dataset exhausted, but we are not in single_pass mode"
tf.logging.info("Decoder has finished reading dataset for single_pass.")
tf.logging.info("Output has been saved in %s and %s. Now starting ROUGE eval...", self._rouge_ref_dir, self._rouge_dec_dir)
results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir)
rouge_log(results_dict, self._decode_dir)
return
original_article = batch.original_articles[0] # string
original_abstract = batch.original_abstracts[0] # string
original_abstract_sents = batch.original_abstracts_sents[0] # list of strings
article_withunks = data.show_art_oovs(original_article, self._vocab) # string
abstract_withunks = data.show_abs_oovs(original_abstract, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None)) # string
# Run beam search to get best Hypothesis
best_hyp = beam_search.run_beam_search(self._sess, self._model, self._vocab, batch)
# Extract the output ids from the hypothesis and convert back to words
output_ids = [int(t) for t in best_hyp.tokens[1:]]
decoded_words = data.outputids2words(output_ids, self._vocab, (batch.art_oovs[0] if FLAGS.pointer_gen else None))
# Remove the [STOP] token from decoded_words, if necessary
try:
fst_stop_idx = decoded_words.index(data.STOP_DECODING) # index of the (first) [STOP] symbol
decoded_words = decoded_words[:fst_stop_idx]
except ValueError:
decoded_words = decoded_words
decoded_output = ' '.join(decoded_words) # single string
if FLAGS.single_pass:
self.write_for_rouge(original_abstract_sents, decoded_words, counter) # write ref summary and decoded summary to file, to eval with pyrouge later
counter += 1 # this is how many examples we've decoded
else:
print_results(article_withunks, abstract_withunks, decoded_output) # log output to screen
self.write_for_attnvis(article_withunks, abstract_withunks, decoded_words, best_hyp.attn_dists, best_hyp.p_gens) # write info to .json file for visualization tool
# Check if SECS_UNTIL_NEW_CKPT has elapsed; if so return so we can load a new checkpoint
t1 = time.time()
if t1-t0 > SECS_UNTIL_NEW_CKPT:
tf.logging.info('We\'ve been decoding with same checkpoint for %i seconds. Time to load new checkpoint', t1-t0)
_ = util.load_ckpt(self._saver, self._sess)
t0 = time.time()