Python Tree.leaves方法代码示例

本文整理汇总了Python中nltk.Tree.leaves方法的典型用法代码示例。如果您正苦于以下问题：Python Tree.leaves方法的具体用法？Python Tree.leaves怎么用？Python Tree.leaves使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree的用法示例。

在下文中一共展示了Tree.leaves方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_sentence_and_indexes

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def get_sentence_and_indexes(parsed_sentence):

  sentence_tree = Tree(parsed_sentence)
  if sentence_tree.node == bitpar_top: #remove designated TOP-symbol    
    sentence_tree = sentence_tree[0]
    
  rlist = [0]*len(sentence_tree.leaves())
  slist = [""]*len(sentence_tree.leaves())
  get_sentence_and_indexes_rec_helper(sentence_tree, rlist, slist)
  reordered_sentence = " ".join(slist)
  
  return reordered_sentence, rlist

开发者ID:agnesvanbelle，项目名称:SSLP2，代码行数:14，代码来源:testing.py

示例2: extract_itg

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def extract_itg(alignments_file_name, parses_file_name, inv_extension):
    """Extract a inversion transduction grammar (ITG)
    from the given files.
    
    Keyword arguments:
    alignments_file_name -- name of file containing alignments
        between sentences in l1_file_name and l2_file_name
    parses_file_name -- name of file containing parse trees
        of the sentences in l1_file_name
    inv_extension -- extension denoting whether a node is inverted
        
    Returns a Counter of binary ITG rules and unary rules. Each ITG rule is 
    represented as the tuple (lhs, rhs), where rhs is a tuple of nodes."""
    binary_itg = Counter()
    unary_itg = Counter()
    num_lines = number_of_lines(parses_file_name)
    alignments_file = open(alignments_file_name)
    parses_file = open(parses_file_name)
    
    for i, l1_parse in enumerate(parses_file):
        if i % (num_lines/100) is 0:
            sys.stdout.write('\r%d%%' % (i*100/num_lines,))
            sys.stdout.flush()

        try: # TODO remove try/catch
            reordered_indexes = str_to_reordered_indexes(alignments_file.next())
            # remove outer brackets from Berkeley parse
            l1_parse = l1_parse.strip()
            l1_parse = l1_parse[1:len(l1_parse)-1]
            l1_parse = l1_parse.strip()
            parse_tree = Tree(l1_parse)            
            parse_forest = generate_forest(parse_tree, 
                reordered_indexes, inv_extension)
        except:
            error_log = open('error.log', 'a')
            error_log.write('%s -- in extract_itg/3\n' % time.asctime())
            error_log.write('line: %s\n' % i)
            error_log.write('%s\n' % l1_parse.strip())
            error_log.write('%s\n' % reordered_indexes)
            error_log.write('\n')
            error_log.close()
            print 'Error in extract_itg/3. See error.log'
            raise

        binary_rules, unary_rules = extract_rules(parse_forest, 
                                                  parse_tree.leaves())
        for rule in binary_rules:
            binary_itg[rule] += 1

        for rule in unary_rules:
            unary_itg[rule] += 1

    alignments_file.close()
    parses_file.close()
    return binary_itg, unary_itg

开发者ID:macabot，项目名称:SRITG，代码行数:57，代码来源:sritg.py

示例3: _build_hierplane_tree

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
    def _build_hierplane_tree(self, tree: Tree, index: int, is_root: bool) -> JsonDict:
        """
        Recursively builds a JSON dictionary from an NLTK ``Tree`` suitable for
        rendering trees using the `Hierplane library<https://allenai.github.io/hierplane/>`.

        Parameters
        ----------
        tree : ``Tree``, required.
            The tree to convert into Hierplane JSON.
        index : int, required.
            The character index into the tree, used for creating spans.
        is_root : bool
            An indicator which allows us to add the outer Hierplane JSON which
            is required for rendering.

        Returns
        -------
        A JSON dictionary render-able by Hierplane for the given tree.
        """
        children = []
        for child in tree:
            if isinstance(child, Tree):
                # If the child is a tree, it has children,
                # as NLTK leaves are just strings.
                children.append(self._build_hierplane_tree(child, index, is_root=False))
            else:
                # We're at a leaf, so add the length of
                # the word to the character index.
                index += len(child)

        label = tree.label()
        span = " ".join(tree.leaves())
        hierplane_node = {
                "word": span,
                "nodeType": label,
                "attributes": [label],
                "link": label
        }
        if children:
            hierplane_node["children"] = children
        # TODO(Mark): Figure out how to span highlighting to the leaves.
        if is_root:
            hierplane_node = {
                    "linkNameToLabel": LINK_TO_LABEL,
                    "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
                    "text": span,
                    "root": hierplane_node
            }
        return hierplane_node

开发者ID:apmoore1，项目名称:allennlp，代码行数:51，代码来源:constituency_parser.py

示例4: convert_psd_sent_2_segmentation_2

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def convert_psd_sent_2_segmentation_2(parsed_corpus):

  SegCorpus=[]
  
  for top_k_psd_of_the_sent in parsed_corpus:
    segmentation=[]
    
    best_score, best_parse_tree_str= top_k_psd_of_the_sent[0]
    tree=Tree(best_parse_tree_str)

    # tree=ROOT,  tree[0]=S,  tree[0, ] is the subtrees of S, i.e. POS tags, we can use alternative methods
    # note that it is highly dependent on the format of the parser outputs!!
    for subtree in tree.subtrees(lambda t: t.height()==tree.height()-2):
      segmentation.append(''.join(subtree.leaves()))

    SegCorpus.append(segmentation)

    if not ''.join(segmentation)==''.join(tree.leaves()):
      print('Error! Leaves/characters in thee segmentation != total characters in the tree (as leaves), Double check the format and/or code!')
      break
    
  return SegCorpus

开发者ID:Jianqiang，项目名称:tmp，代码行数:24，代码来源:psd_sent_2_seg.py

示例5: print

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
f=codecs.open(path_annotation, 'rU', 'utf-8')
lines=f.readlines()
f.close()

Production=[]

count=0
total_nth=int(len(lines)/10)
for line in lines:
  if count%total_nth==0:
    print(count/total_nth*10, '% finished')
  count +=1

  tree=Tree(line.strip())
  tag, subscript=decompose_tag(tree.node)
  word=''.join(tree.leaves())

  word_pos2tree_str[(word, tag)]=line.strip()
  

print('done!')

#
# gen single-char annotation from the corpus
#

print('\n\ngenerating rules for single-char words from corpus')

#---> one needs to run 2a_gen_tag_set_for_word_type.py to gen word2newtag.pickle before using it
path_word2newtag='../working_data/word2newtag.pickle'

开发者ID:Jianqiang，项目名称:tmp，代码行数:32，代码来源:D_gcp_training_data_von_baseline_grammar.py

注：本文中的nltk.Tree.leaves方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。