本文整理汇总了Python中syntaxnet.sentence_pb2.Sentence方法的典型用法代码示例。如果您正苦于以下问题:Python sentence_pb2.Sentence方法的具体用法?Python sentence_pb2.Sentence怎么用?Python sentence_pb2.Sentence使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类syntaxnet.sentence_pb2
的用法示例。
在下文中一共展示了sentence_pb2.Sentence方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def main(unused_argv):
logging.set_verbosity(logging.INFO)
with tf.Session() as sess:
src = gen_parser_ops.document_source(batch_size=32,
corpus_name=FLAGS.corpus_name,
task_context=FLAGS.task_context)
sentence = sentence_pb2.Sentence()
while True:
documents, finished = sess.run(src)
logging.info('Read %d documents', len(documents))
for d in documents:
sentence.ParseFromString(d)
tr = asciitree.LeftAligned()
d = to_dict(sentence)
print 'Input: %s' % sentence.text
print 'Parse:'
tr_str = tr(d)
pat = re.compile(r'\s*@\d+$')
for tr_ln in tr_str.splitlines():
print pat.sub('', tr_ln)
if finished:
break
示例2: print_output
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def print_output(output_file, use_text_format, use_gold_segmentation, output):
"""Writes a set of sentences in CoNLL format.
Args:
output_file: The file to write to.
use_text_format: Whether this computation used text-format input.
use_gold_segmentation: Whether this computation used gold segmentation.
output: A list of sentences to write to the output file.
"""
with gfile.GFile(output_file, 'w') as f:
f.write('## tf:{}\n'.format(use_text_format))
f.write('## gs:{}\n'.format(use_gold_segmentation))
for serialized_sentence in output:
sentence = sentence_pb2.Sentence()
sentence.ParseFromString(serialized_sentence)
f.write('# text = {}\n'.format(sentence.text.encode('utf-8')))
for i, token in enumerate(sentence.token):
head = token.head + 1
f.write('%s\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_\n' %
(i + 1, token.word.encode('utf-8'), head,
token.label.encode('utf-8')))
f.write('\n')
示例3: main
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def main(unused_argv):
logging.set_verbosity(logging.INFO)
with tf.Session() as sess:
src = gen_parser_ops.document_source(batch_size=32,
corpus_name=FLAGS.corpus_name,
task_context=FLAGS.task_context)
sentence = sentence_pb2.Sentence()
while True:
documents, finished = sess.run(src)
logging.info('Read %d documents', len(documents))
for d in documents:
sentence.ParseFromString(d)
tr = asciitree.LeftAligned()
d = to_dict(sentence)
print('Input: %s' % sentence.text)
print('Parse:')
tr_str = tr(d)
pat = re.compile(r'\s*@\d+$')
for tr_ln in tr_str.splitlines():
print(pat.sub('', tr_ln))
if finished:
break
示例4: annotate_text
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def annotate_text(text):
"""
Segment and parse input text using syntaxnet models.
"""
sentence = sentence_pb2.Sentence(
text=text,
token=[sentence_pb2.Token(word=text, start=-1, end=-1)]
)
# preprocess
with tf.Session(graph=tf.Graph()) as tmp_session:
char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()])
preprocessed = tmp_session.run(char_input)[0]
segmented, _ = SEGMENTER_MODEL(preprocessed)
annotations, traces = PARSER_MODEL(segmented[0])
assert len(annotations) == 1
assert len(traces) == 1
return sentence_pb2.Sentence.FromString(annotations[0]), traces[0]
示例5: calculate_parse_metrics
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def calculate_parse_metrics(gold_corpus, annotated_corpus):
"""Calculate POS/UAS/LAS accuracy based on gold and annotated sentences."""
check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
num_tokens = 0
num_correct_pos = 0
num_correct_uas = 0
num_correct_las = 0
for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
gold = sentence_pb2.Sentence()
annotated = sentence_pb2.Sentence()
gold.ParseFromString(gold_str)
annotated.ParseFromString(annotated_str)
check.Eq(gold.text, annotated.text, 'Text is not aligned')
check.Eq(len(gold.token), len(annotated.token), 'Tokens are not aligned')
tokens = zip(gold.token, annotated.token)
num_tokens += len(tokens)
num_correct_pos += sum(1 for x, y in tokens if x.tag == y.tag)
num_correct_uas += sum(1 for x, y in tokens if x.head == y.head)
num_correct_las += sum(1 for x, y in tokens
if x.head == y.head and x.label == y.label)
tf.logging.info('Total num documents: %d', len(annotated_corpus))
tf.logging.info('Total num tokens: %d', num_tokens)
pos = num_correct_pos * 100.0 / num_tokens
uas = num_correct_uas * 100.0 / num_tokens
las = num_correct_las * 100.0 / num_tokens
tf.logging.info('POS: %.2f%%', pos)
tf.logging.info('UAS: %.2f%%', uas)
tf.logging.info('LAS: %.2f%%', las)
return pos, uas, las
示例6: _add_sentence
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def _add_sentence(self, tags, heads, labels, corpus):
"""Adds a sentence to the corpus."""
sentence = sentence_pb2.Sentence()
for tag, head, label in zip(tags, heads, labels):
sentence.token.add(word='x', start=0, end=0,
tag=tag, head=head, label=label)
corpus.append(sentence.SerializeToString())
示例7: testCalculateSegmentationMetrics
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testCalculateSegmentationMetrics(self):
self._gold_corpus = []
self._test_corpus = []
def add_sentence_for_segment_eval(starts, ends, corpus):
"""Adds a sentence to the corpus."""
sentence = sentence_pb2.Sentence()
for start, end in zip(starts, ends):
sentence.token.add(word='x', start=start, end=end)
corpus.append(sentence.SerializeToString())
# A test case with 5 gold words, 4 test words and 3 are correct.
# -gold tokens: 'This is a gold sentence'
# -test tokens: 'Thisis a gold sentence'
add_sentence_for_segment_eval(
[0, 5, 8, 10, 15], [3, 6, 8, 13, 22], self._gold_corpus)
add_sentence_for_segment_eval(
[0, 8, 10, 15], [6, 8, 13, 22], self._test_corpus)
# Another test case with 3 gold words, 5 test words and 2 correct words.
# -gold tokens: 'another gold sentence'
# -test tokens: 'another gold sen tence'
add_sentence_for_segment_eval([0, 8, 13], [6, 11, 20], self._gold_corpus)
add_sentence_for_segment_eval([0, 8, 13, 17, 21], [6, 11, 15, 19, 22],
self._test_corpus)
prec, rec, f1 = evaluation.calculate_segmentation_metrics(self._gold_corpus,
self._test_corpus)
self.assertEqual(55.56, prec)
self.assertEqual(62.50, rec)
self.assertEqual(58.82, f1)
summaries = evaluation.segmentation_summaries(self._gold_corpus,
self._test_corpus)
self.assertEqual({
'precision': 55.56,
'recall': 62.50,
'f1': 58.82,
'eval_metric': 58.82
}, summaries)
示例8: testGiveMeAName
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testGiveMeAName(self):
document = sentence_pb2.Sentence()
document.token.add(start=0, end=0, word='hi', head=1, label='something')
document.token.add(start=1, end=1, word='there')
contents = render_parse_tree_graphviz.parse_tree_graph(document)
self.assertIn('<polygon', contents)
self.assertIn('text/html;charset=utf-8;base64', contents)
self.assertIn('something', contents)
self.assertIn('hi', contents)
self.assertIn('there', contents)
示例9: assertParseable
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def assertParseable(self, reader, expected_num, expected_last):
sentences, last = reader.read()
self.assertEqual(expected_num, len(sentences))
self.assertEqual(expected_last, last)
for s in sentences:
pb = sentence_pb2.Sentence()
pb.ParseFromString(s)
self.assertGreater(len(pb.token), 0)
示例10: testReadFirstSentence
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testReadFirstSentence(self):
reader = sentence_io.ConllSentenceReader(self.filepath, 1)
sentences, last = reader.read()
self.assertEqual(1, len(sentences))
pb = sentence_pb2.Sentence()
pb.ParseFromString(sentences[0])
self.assertFalse(last)
self.assertEqual(
u'I knew I could do it properly if given the right kind of support .',
pb.text)
示例11: _create_fake_corpus
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def _create_fake_corpus():
"""Returns a list of fake serialized sentences for tests."""
num_docs = 4
corpus = []
for num_tokens in range(1, num_docs + 1):
sentence = sentence_pb2.Sentence()
sentence.text = 'x' * num_tokens
for i in range(num_tokens):
token = sentence.token.add()
token.word = 'x'
token.start = i
token.end = i
corpus.append(sentence.SerializeToString())
return corpus
示例12: ReadNextDocument
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def ReadNextDocument(self, sess, doc_source):
doc_str, last = sess.run(doc_source)
if doc_str:
doc = sentence_pb2.Sentence()
doc.ParseFromString(doc_str[0])
else:
doc = None
return doc, last
示例13: to_dict
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def to_dict(sentence):
"""Builds a dictionary representing the parse tree of a sentence.
Note that the suffix "@id" (where 'id' is a number) is appended to each
element to handle the sentence that has multiple elements with identical
representation. Those suffix needs to be removed after the asciitree is
rendered.
Args:
sentence: Sentence protocol buffer to represent.
Returns:
Dictionary mapping tokens to children.
"""
token_str = list()
children = [[] for token in sentence.token]
root = -1
for i in range(0, len(sentence.token)):
token = sentence.token[i]
token_str.append('%s %s %s @%d' %
(token.word, token.tag, token.label, (i+1)))
if token.head == -1:
root = i
else:
children[token.head].append(i)
def _get_dict(i):
d = collections.OrderedDict()
for c in children[i]:
d[token_str[c]] = _get_dict(c)
return d
tree = collections.OrderedDict()
tree[token_str[root]] = _get_dict(root)
return tree
示例14: ReadNextDocument
# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def ReadNextDocument(self, sess, sentence):
sentence_str, = sess.run([sentence])
if sentence_str:
sentence_doc = sentence_pb2.Sentence()
sentence_doc.ParseFromString(sentence_str[0])
else:
sentence_doc = None
return sentence_doc