本文整理汇总了Python中syntaxnet.ops.gen_parser_ops.document_source方法的典型用法代码示例。如果您正苦于以下问题:Python gen_parser_ops.document_source方法的具体用法?Python gen_parser_ops.document_source怎么用?Python gen_parser_ops.document_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类syntaxnet.ops.gen_parser_ops
的用法示例。
在下文中一共展示了gen_parser_ops.document_source方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: ValidateDocuments
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def ValidateDocuments(self):
doc_source = gen_parser_ops.document_source(
task_context=self.context_file, batch_size=1)
with self.test_session() as sess:
logging.info('Reading document1')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertEqual(len(doc.token), 12)
self.assertEqual(u'लाजमी', doc.token[9].word)
self.assertFalse(last)
logging.info('Reading document2')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertEqual(len(doc.token), 13)
self.assertEqual(u'भंग', doc.token[9].word)
self.assertFalse(last)
logging.info('Hitting end of the dataset')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertTrue(doc is None)
self.assertTrue(last)
示例2: CheckUntokenizedDoc
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def CheckUntokenizedDoc(self, sentence, words, starts, ends):
self.WriteContext('untokenized-text')
logging.info('Writing text file to: %s', self.corpus_file)
with open(self.corpus_file, 'w') as f:
f.write(sentence)
sentence, _ = gen_parser_ops.document_source(
task_context=self.context_file, batch_size=1)
with self.test_session() as sess:
sentence_doc = self.ReadNextDocument(sess, sentence)
self.assertEqual(len(sentence_doc.token), len(words))
self.assertEqual(len(sentence_doc.token), len(starts))
self.assertEqual(len(sentence_doc.token), len(ends))
for i, token in enumerate(sentence_doc.token):
self.assertEqual(token.word.encode('utf-8'), words[i])
self.assertEqual(token.start, starts[i])
self.assertEqual(token.end, ends[i])
示例3: CheckSegmentationTrainingData
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def CheckSegmentationTrainingData(self, doc_lines, doc_text, doc_words,
break_levels):
# Prepare context.
self.WriteContext('segment-train-data')
# Prepare test sentence.
with open(self.corpus_file, 'w') as f:
f.write(''.join(doc_lines))
# Test converted sentence.
sentence, _ = gen_parser_ops.document_source(
task_context=self.context_file, batch_size=1)
with self.test_session() as sess:
sentence_doc = self.ReadNextDocument(sess, sentence)
self.assertEqual(doc_text.decode('utf-8'), sentence_doc.text)
self.assertEqual([t.decode('utf-8') for t in doc_words],
[t.word for t in sentence_doc.token])
self.assertEqual(break_levels,
[t.break_level for t in sentence_doc.token])
示例4: main
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def main(unused_argv):
logging.set_verbosity(logging.INFO)
with tf.Session() as sess:
src = gen_parser_ops.document_source(batch_size=32,
corpus_name=FLAGS.corpus_name,
task_context=FLAGS.task_context)
sentence = sentence_pb2.Sentence()
while True:
documents, finished = sess.run(src)
logging.info('Read %d documents', len(documents))
for d in documents:
sentence.ParseFromString(d)
tr = asciitree.LeftAligned()
d = to_dict(sentence)
print 'Input: %s' % sentence.text
print 'Parse:'
tr_str = tr(d)
pat = re.compile(r'\s*@\d+$')
for tr_ln in tr_str.splitlines():
print pat.sub('', tr_ln)
if finished:
break
示例5: main
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def main(unused_argv):
logging.set_verbosity(logging.INFO)
with tf.Session() as sess:
src = gen_parser_ops.document_source(batch_size=32,
corpus_name=FLAGS.corpus_name,
task_context=FLAGS.task_context)
sentence = sentence_pb2.Sentence()
while True:
documents, finished = sess.run(src)
logging.info('Read %d documents', len(documents))
for d in documents:
sentence.ParseFromString(d)
tr = asciitree.LeftAligned()
d = to_dict(sentence)
print('Input: %s' % sentence.text)
print('Parse:')
tr_str = tr(d)
pat = re.compile(r'\s*@\d+$')
for tr_ln in tr_str.splitlines():
print(pat.sub('', tr_ln))
if finished:
break
示例6: ValidateDocuments
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def ValidateDocuments(self):
doc_source = gen_parser_ops.document_source(self.context_file, batch_size=1)
with self.test_session() as sess:
logging.info('Reading document1')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertEqual(len(doc.token), 12)
self.assertEqual(u'लाजमी', doc.token[9].word)
self.assertFalse(last)
logging.info('Reading document2')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertEqual(len(doc.token), 13)
self.assertEqual(u'भंग', doc.token[9].word)
self.assertFalse(last)
logging.info('Hitting end of the dataset')
doc, last = self.ReadNextDocument(sess, doc_source)
self.assertTrue(doc is None)
self.assertTrue(last)
示例7: CheckUntokenizedDoc
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def CheckUntokenizedDoc(self, sentence, words, starts, ends):
self.WriteContext('untokenized-text')
logging.info('Writing text file to: %s', self.corpus_file)
with open(self.corpus_file, 'w') as f:
f.write(sentence)
sentence, _ = gen_parser_ops.document_source(
self.context_file, batch_size=1)
with self.test_session() as sess:
sentence_doc = self.ReadNextDocument(sess, sentence)
self.assertEqual(len(sentence_doc.token), len(words))
self.assertEqual(len(sentence_doc.token), len(starts))
self.assertEqual(len(sentence_doc.token), len(ends))
for i, token in enumerate(sentence_doc.token):
self.assertEqual(token.word.encode('utf-8'), words[i])
self.assertEqual(token.start, starts[i])
self.assertEqual(token.end, ends[i])
示例8: CheckSegmentationTrainingData
# 需要导入模块: from syntaxnet.ops import gen_parser_ops [as 别名]
# 或者: from syntaxnet.ops.gen_parser_ops import document_source [as 别名]
def CheckSegmentationTrainingData(self, doc_lines, doc_text, doc_words,
break_levels):
# Prepare context.
self.WriteContext('segment-train-data')
# Prepare test sentence.
with open(self.corpus_file, 'w') as f:
f.write(''.join(doc_lines))
# Test converted sentence.
sentence, _ = gen_parser_ops.document_source(
self.context_file, batch_size=1)
with self.test_session() as sess:
sentence_doc = self.ReadNextDocument(sess, sentence)
self.assertEqual(doc_text.decode('utf-8'), sentence_doc.text)
self.assertEqual([t.decode('utf-8') for t in doc_words],
[t.word for t in sentence_doc.token])
self.assertEqual(break_levels,
[t.break_level for t in sentence_doc.token])