当前位置: 首页>>代码示例>>Python>>正文


Python sentence_pb2.Sentence方法代码示例

本文整理汇总了Python中syntaxnet.sentence_pb2.Sentence方法的典型用法代码示例。如果您正苦于以下问题:Python sentence_pb2.Sentence方法的具体用法?Python sentence_pb2.Sentence怎么用?Python sentence_pb2.Sentence使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在syntaxnet.sentence_pb2的用法示例。


在下文中一共展示了sentence_pb2.Sentence方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  with tf.Session() as sess:
    src = gen_parser_ops.document_source(batch_size=32,
                                         corpus_name=FLAGS.corpus_name,
                                         task_context=FLAGS.task_context)
    sentence = sentence_pb2.Sentence()
    while True:
      documents, finished = sess.run(src)
      logging.info('Read %d documents', len(documents))
      for d in documents:
        sentence.ParseFromString(d)
        tr = asciitree.LeftAligned()
        d = to_dict(sentence)
        print 'Input: %s' % sentence.text
        print 'Parse:'
        tr_str = tr(d)
        pat = re.compile(r'\s*@\d+$')
        for tr_ln in tr_str.splitlines():
          print pat.sub('', tr_ln)

      if finished:
        break 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:25,代码来源:conll2tree.py

示例2: print_output

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def print_output(output_file, use_text_format, use_gold_segmentation, output):
  """Writes a set of sentences in CoNLL format.

  Args:
    output_file: The file to write to.
    use_text_format: Whether this computation used text-format input.
    use_gold_segmentation: Whether this computation used gold segmentation.
    output: A list of sentences to write to the output file.
  """
  with gfile.GFile(output_file, 'w') as f:
    f.write('## tf:{}\n'.format(use_text_format))
    f.write('## gs:{}\n'.format(use_gold_segmentation))
    for serialized_sentence in output:
      sentence = sentence_pb2.Sentence()
      sentence.ParseFromString(serialized_sentence)
      f.write('# text = {}\n'.format(sentence.text.encode('utf-8')))
      for i, token in enumerate(sentence.token):
        head = token.head + 1
        f.write('%s\t%s\t_\t_\t_\t_\t%d\t%s\t_\t_\n' %
                (i + 1, token.word.encode('utf-8'), head,
                 token.label.encode('utf-8')))
      f.write('\n') 
开发者ID:rky0930,项目名称:yolo_v2,代码行数:24,代码来源:parse_to_conll.py

示例3: main

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def main(unused_argv):
  logging.set_verbosity(logging.INFO)
  with tf.Session() as sess:
    src = gen_parser_ops.document_source(batch_size=32,
                                         corpus_name=FLAGS.corpus_name,
                                         task_context=FLAGS.task_context)
    sentence = sentence_pb2.Sentence()
    while True:
      documents, finished = sess.run(src)
      logging.info('Read %d documents', len(documents))
      for d in documents:
        sentence.ParseFromString(d)
        tr = asciitree.LeftAligned()
        d = to_dict(sentence)
        print('Input: %s' % sentence.text)
        print('Parse:')
        tr_str = tr(d)
        pat = re.compile(r'\s*@\d+$')
        for tr_ln in tr_str.splitlines():
          print(pat.sub('', tr_ln))

      if finished:
        break 
开发者ID:itsamitgoel,项目名称:Gun-Detector,代码行数:25,代码来源:conll2tree.py

示例4: annotate_text

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def annotate_text(text):
    """
    Segment and parse input text using syntaxnet models.
    """
    sentence = sentence_pb2.Sentence(
        text=text,
        token=[sentence_pb2.Token(word=text, start=-1, end=-1)]
    )

    # preprocess
    with tf.Session(graph=tf.Graph()) as tmp_session:
        char_input = gen_parser_ops.char_token_generator([sentence.SerializeToString()])
        preprocessed = tmp_session.run(char_input)[0]
    segmented, _ = SEGMENTER_MODEL(preprocessed)

    annotations, traces = PARSER_MODEL(segmented[0])
    assert len(annotations) == 1
    assert len(traces) == 1
    return sentence_pb2.Sentence.FromString(annotations[0]), traces[0] 
开发者ID:hltcoe,项目名称:PredPatt,代码行数:21,代码来源:ParseyPredFace.py

示例5: calculate_parse_metrics

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def calculate_parse_metrics(gold_corpus, annotated_corpus):
  """Calculate POS/UAS/LAS accuracy based on gold and annotated sentences."""
  check.Eq(len(gold_corpus), len(annotated_corpus), 'Corpora are not aligned')
  num_tokens = 0
  num_correct_pos = 0
  num_correct_uas = 0
  num_correct_las = 0
  for gold_str, annotated_str in zip(gold_corpus, annotated_corpus):
    gold = sentence_pb2.Sentence()
    annotated = sentence_pb2.Sentence()
    gold.ParseFromString(gold_str)
    annotated.ParseFromString(annotated_str)
    check.Eq(gold.text, annotated.text, 'Text is not aligned')
    check.Eq(len(gold.token), len(annotated.token), 'Tokens are not aligned')
    tokens = zip(gold.token, annotated.token)
    num_tokens += len(tokens)
    num_correct_pos += sum(1 for x, y in tokens if x.tag == y.tag)
    num_correct_uas += sum(1 for x, y in tokens if x.head == y.head)
    num_correct_las += sum(1 for x, y in tokens
                           if x.head == y.head and x.label == y.label)

  tf.logging.info('Total num documents: %d', len(annotated_corpus))
  tf.logging.info('Total num tokens: %d', num_tokens)
  pos = num_correct_pos * 100.0 / num_tokens
  uas = num_correct_uas * 100.0 / num_tokens
  las = num_correct_las * 100.0 / num_tokens
  tf.logging.info('POS: %.2f%%', pos)
  tf.logging.info('UAS: %.2f%%', uas)
  tf.logging.info('LAS: %.2f%%', las)
  return pos, uas, las 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:32,代码来源:evaluation.py

示例6: _add_sentence

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def _add_sentence(self, tags, heads, labels, corpus):
    """Adds a sentence to the corpus."""
    sentence = sentence_pb2.Sentence()
    for tag, head, label in zip(tags, heads, labels):
      sentence.token.add(word='x', start=0, end=0,
                         tag=tag, head=head, label=label)
    corpus.append(sentence.SerializeToString()) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:9,代码来源:evaluation_test.py

示例7: testCalculateSegmentationMetrics

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testCalculateSegmentationMetrics(self):
    self._gold_corpus = []
    self._test_corpus = []

    def add_sentence_for_segment_eval(starts, ends, corpus):
      """Adds a sentence to the corpus."""
      sentence = sentence_pb2.Sentence()
      for start, end in zip(starts, ends):
        sentence.token.add(word='x', start=start, end=end)
      corpus.append(sentence.SerializeToString())

    # A test case with 5 gold words, 4 test words and 3 are correct.
    #  -gold tokens: 'This is a gold sentence'
    #  -test tokens: 'Thisis  a gold sentence'
    add_sentence_for_segment_eval(
        [0, 5, 8, 10, 15], [3, 6, 8, 13, 22], self._gold_corpus)
    add_sentence_for_segment_eval(
        [0, 8, 10, 15], [6, 8, 13, 22], self._test_corpus)

    # Another test case with 3 gold words, 5 test words and 2 correct words.
    #  -gold tokens: 'another gold sentence'
    #  -test tokens: 'another gold sen tence'
    add_sentence_for_segment_eval([0, 8, 13], [6, 11, 20], self._gold_corpus)
    add_sentence_for_segment_eval([0, 8, 13, 17, 21], [6, 11, 15, 19, 22],
                                  self._test_corpus)
    prec, rec, f1 = evaluation.calculate_segmentation_metrics(self._gold_corpus,
                                                              self._test_corpus)
    self.assertEqual(55.56, prec)
    self.assertEqual(62.50, rec)
    self.assertEqual(58.82, f1)

    summaries = evaluation.segmentation_summaries(self._gold_corpus,
                                                  self._test_corpus)
    self.assertEqual({
        'precision': 55.56,
        'recall': 62.50,
        'f1': 58.82,
        'eval_metric': 58.82
    }, summaries) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:41,代码来源:evaluation_test.py

示例8: testGiveMeAName

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testGiveMeAName(self):
    document = sentence_pb2.Sentence()
    document.token.add(start=0, end=0, word='hi', head=1, label='something')
    document.token.add(start=1, end=1, word='there')
    contents = render_parse_tree_graphviz.parse_tree_graph(document)
    self.assertIn('<polygon', contents)
    self.assertIn('text/html;charset=utf-8;base64', contents)
    self.assertIn('something', contents)
    self.assertIn('hi', contents)
    self.assertIn('there', contents) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:12,代码来源:render_parse_tree_graphviz_test.py

示例9: assertParseable

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def assertParseable(self, reader, expected_num, expected_last):
    sentences, last = reader.read()
    self.assertEqual(expected_num, len(sentences))
    self.assertEqual(expected_last, last)
    for s in sentences:
      pb = sentence_pb2.Sentence()
      pb.ParseFromString(s)
      self.assertGreater(len(pb.token), 0) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:10,代码来源:sentence_io_test.py

示例10: testReadFirstSentence

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def testReadFirstSentence(self):
    reader = sentence_io.ConllSentenceReader(self.filepath, 1)
    sentences, last = reader.read()
    self.assertEqual(1, len(sentences))
    pb = sentence_pb2.Sentence()
    pb.ParseFromString(sentences[0])
    self.assertFalse(last)
    self.assertEqual(
        u'I knew I could do it properly if given the right kind of support .',
        pb.text) 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:12,代码来源:sentence_io_test.py

示例11: _create_fake_corpus

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def _create_fake_corpus():
  """Returns a list of fake serialized sentences for tests."""
  num_docs = 4
  corpus = []
  for num_tokens in range(1, num_docs + 1):
    sentence = sentence_pb2.Sentence()
    sentence.text = 'x' * num_tokens
    for i in range(num_tokens):
      token = sentence.token.add()
      token.word = 'x'
      token.start = i
      token.end = i
    corpus.append(sentence.SerializeToString())
  return corpus 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:16,代码来源:bulk_component_test.py

示例12: ReadNextDocument

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def ReadNextDocument(self, sess, doc_source):
    doc_str, last = sess.run(doc_source)
    if doc_str:
      doc = sentence_pb2.Sentence()
      doc.ParseFromString(doc_str[0])
    else:
      doc = None
    return doc, last 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:10,代码来源:lexicon_builder_test.py

示例13: to_dict

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def to_dict(sentence):
  """Builds a dictionary representing the parse tree of a sentence.

     Note that the suffix "@id" (where 'id' is a number) is appended to each
     element to handle the sentence that has multiple elements with identical
     representation. Those suffix needs to be removed after the asciitree is
     rendered.

  Args:
    sentence: Sentence protocol buffer to represent.
  Returns:
    Dictionary mapping tokens to children.
  """
  token_str = list()
  children = [[] for token in sentence.token]
  root = -1
  for i in range(0, len(sentence.token)):
    token = sentence.token[i]
    token_str.append('%s %s %s @%d' %
                     (token.word, token.tag, token.label, (i+1)))
    if token.head == -1:
      root = i
    else:
      children[token.head].append(i)

  def _get_dict(i):
    d = collections.OrderedDict()
    for c in children[i]:
      d[token_str[c]] = _get_dict(c)
    return d

  tree = collections.OrderedDict()
  tree[token_str[root]] = _get_dict(root)
  return tree 
开发者ID:ringringyi,项目名称:DOTA_models,代码行数:36,代码来源:conll2tree.py

示例14: ReadNextDocument

# 需要导入模块: from syntaxnet import sentence_pb2 [as 别名]
# 或者: from syntaxnet.sentence_pb2 import Sentence [as 别名]
def ReadNextDocument(self, sess, sentence):
    sentence_str, = sess.run([sentence])
    if sentence_str:
      sentence_doc = sentence_pb2.Sentence()
      sentence_doc.ParseFromString(sentence_str[0])
    else:
      sentence_doc = None
    return sentence_doc 
开发者ID:rky0930,项目名称:yolo_v2,代码行数:10,代码来源:text_formats_test.py


注:本文中的syntaxnet.sentence_pb2.Sentence方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。