本文整理汇总了Python中data.SENTENCE_END属性的典型用法代码示例。如果您正苦于以下问题:Python data.SENTENCE_END属性的具体用法?Python data.SENTENCE_END怎么用?Python data.SENTENCE_END使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类data
的用法示例。
在下文中一共展示了data.SENTENCE_END属性的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _DecodeBatch
# 需要导入模块: import data [as 别名]
# 或者: from data import SENTENCE_END [as 别名]
def _DecodeBatch(self, article, abstract, output_ids):
"""Convert id to words and writing results.
Args:
article: The original article string.
abstract: The human (correct) abstract string.
output_ids: The abstract word ids output by machine.
"""
decoded_output = ' '.join(data.Ids2Words(output_ids, self._vocab))
end_p = decoded_output.find(data.SENTENCE_END, 0)
if end_p != -1:
decoded_output = decoded_output[:end_p]
tf.logging.info('article: %s', article)
tf.logging.info('abstract: %s', abstract)
tf.logging.info('decoded: %s', decoded_output)
self._decode_io.Write(abstract, decoded_output.strip())
示例2: _AddSentenceBoundary
# 需要导入模块: import data [as 别名]
# 或者: from data import SENTENCE_END [as 别名]
def _AddSentenceBoundary(self, text):
"""Pads text with start end end of sentence token iff needed.
Args:
text: text to be padded.
Returns:
A text with start and end tokens.
"""
if not text.startswith(data.SENTENCE_START):
text = data.SENTENCE_START + ' ' + text
if not text.endswith(data.SENTENCE_END):
text = text + ' ' + data.SENTENCE_END
return text
示例3: _Decode
# 需要导入模块: import data [as 别名]
# 或者: from data import SENTENCE_END [as 别名]
def _Decode(self, saver, sess):
"""Restore a checkpoint and decode it.
Args:
saver: Tensorflow checkpoint saver.
sess: Tensorflow session.
Returns:
If success, returns true, otherwise, false.
"""
ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root)
if not (ckpt_state and ckpt_state.model_checkpoint_path):
tf.logging.info('No model to decode yet at %s', FLAGS.log_root)
return False
tf.logging.info('checkpoint path %s', ckpt_state.model_checkpoint_path)
ckpt_path = os.path.join(
FLAGS.log_root, os.path.basename(ckpt_state.model_checkpoint_path))
tf.logging.info('renamed checkpoint path %s', ckpt_path)
saver.restore(sess, ckpt_path)
self._decode_io.ResetFiles()
for _ in xrange(FLAGS.decode_batches_per_ckpt):
(article_batch, _, _, article_lens, _, _, origin_articles,
origin_abstracts) = self._batch_reader.NextBatch()
for i in xrange(self._hps.batch_size):
bs = beam_search.BeamSearch(
self._model, self._hps.batch_size,
self._vocab.WordToId(data.SENTENCE_START),
self._vocab.WordToId(data.SENTENCE_END),
self._hps.dec_timesteps)
article_batch_cp = article_batch.copy()
article_batch_cp[:] = article_batch[i:i+1]
article_lens_cp = article_lens.copy()
article_lens_cp[:] = article_lens[i:i+1]
best_beam = bs.BeamSearch(sess, article_batch_cp, article_lens_cp)[0]
decode_output = [int(t) for t in best_beam.tokens[1:]]
self._DecodeBatch(
origin_articles[i], origin_abstracts[i], decode_output)
return True
示例4: main
# 需要导入模块: import data [as 别名]
# 或者: from data import SENTENCE_END [as 别名]
def main(unused_argv):
vocab = data.Vocab(FLAGS.vocab_path, 1000000)
# Check for presence of required special tokens.
assert vocab.CheckVocab(data.PAD_TOKEN) > 0
assert vocab.CheckVocab(data.UNKNOWN_TOKEN) >= 0
assert vocab.CheckVocab(data.SENTENCE_START) > 0
assert vocab.CheckVocab(data.SENTENCE_END) > 0
batch_size = 4
if FLAGS.mode == 'decode':
batch_size = FLAGS.beam_size
hps = seq2seq_attention_model.HParams(
mode=FLAGS.mode, # train, eval, decode
min_lr=0.01, # min learning rate.
lr=0.15, # learning rate
batch_size=batch_size,
enc_layers=4,
enc_timesteps=120,
dec_timesteps=30,
min_input_len=2, # discard articles/summaries < than this
num_hidden=256, # for rnn cell
emb_dim=128, # If 0, don't use embedding
max_grad_norm=2,
num_softmax_samples=4096) # If 0, no sampled softmax.
batcher = batch_reader.Batcher(
FLAGS.data_path, vocab, hps, FLAGS.article_key,
FLAGS.abstract_key, FLAGS.max_article_sentences,
FLAGS.max_abstract_sentences, bucketing=FLAGS.use_bucketing,
truncate_input=FLAGS.truncate_input)
tf.set_random_seed(FLAGS.random_seed)
if hps.mode == 'train':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Train(model, batcher)
elif hps.mode == 'eval':
model = seq2seq_attention_model.Seq2SeqAttentionModel(
hps, vocab, num_gpus=FLAGS.num_gpus)
_Eval(model, batcher, vocab=vocab)
elif hps.mode == 'decode':
decode_mdl_hps = hps
# Only need to restore the 1st step and reuse it since
# we keep and feed in state for each step's output.
decode_mdl_hps = hps._replace(dec_timesteps=1)
model = seq2seq_attention_model.Seq2SeqAttentionModel(
decode_mdl_hps, vocab, num_gpus=FLAGS.num_gpus)
decoder = seq2seq_attention_decode.BSDecoder(model, batcher, hps, vocab)
decoder.DecodeLoop()
示例5: _fill_example_queue
# 需要导入模块: import data [as 别名]
# 或者: from data import SENTENCE_END [as 别名]
def _fill_example_queue(self):
"""Reads data from file and processes into Examples which are then placed into the example queue."""
input_gen = self.text_generator(
data.example_generator(self._data_path, self._single_pass))
cnt = 0
fail = 0
while True:
try:
# read the next example from file. article and abstract are
# both strings.
(article_id, article_text, abstract_sents, labels,
section_names, sections) = six.next(input_gen)
except StopIteration: # if there are no more examples:
tf.logging.info(
"The example generator for this example queue filling thread has exhausted data.")
if self._single_pass:
tf.logging.info(
"single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
self._finished_reading = True
break
else:
raise Exception(
"single_pass mode is off but the example generator is out of data; error.")
# Use the <s> and </s> tags in abstract to get a list of sentences.
# abstract_sentences = [sent.strip() for sent in data.abstract2sents(''.join(abstract_sents))]
abstract_sentences = [e.replace(data.SENTENCE_START, '').replace(data.SENTENCE_END, '').strip()
for e in abstract_sents]
# at least 2 sections, some articles do not have sections
if "_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ __ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _" in article_text:
continue
if len(sections) <= 1:
continue
if not sections or len(sections) == 0:
continue
# do not process that are too long
if len(article_text) > self._hps.max_article_sents:
continue
# Do not process documents with unusually long or short abstracts
abst_len = len(' '.join(abstract_sentences).split())
if abst_len > self._hps.max_abstract_len or\
abst_len < self._hps.min_abstract_len:
continue
# Process into an Example.
example = Example(article_text, abstract_sentences, article_id, sections, section_names, labels,
self._vocab, self._hps)
# place the Example in the example queue.
if example.discard:
fail += 1
cnt += 1
if example is not None and not example.discard:
self._example_queue.put(example)
if cnt % 100 == 0:
print('total in queue: {} of {}'.format(cnt - fail, cnt))