当前位置: 首页>>代码示例>>Python>>正文


Python nltk.Tree类代码示例

本文整理汇总了Python中nltk.Tree的典型用法代码示例。如果您正苦于以下问题:Python Tree类的具体用法?Python Tree怎么用?Python Tree使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Tree类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: read_treefile

def read_treefile(hyptreefile,reftreefile):
    hfile = codecs.open(hyptreefile,"r",encoding='utf-8')
    rfile = codecs.open(reftreefile,"r",encoding='utf-8')
    scoredic = {}
    #store rtree into rtreelist suppose there are more than one reference
    rtreel = []
    for i in rfile:
        refl = []
        if i.strip() != "":
            refl.append(i.strip())
            rstr = " ".join(refl)
            rtree = Tree.fromstring(rstr)
        rtreel.append(rtree)
    #store hyptree into hyplist    
    htreel = []
    senl = []
    for i in hfile:
        if i.strip() != "":
            senl.append(i.strip())
        else:
            htreel.append(Tree.fromstring(" ".join(senl)))
            senl = []
            
    #loop and score
    for r in rtreel:
        for h in htreel:
            score,hword,rword= score_similarity(h,r)
            scoredic[" ".join(hword)] = score
            
    return scoredic     
开发者ID:rillaha,项目名称:similarit,代码行数:30,代码来源:score_similarity.py

示例2: test_pcfg

    def test_pcfg(self):
        o = pcfg.PCFG()
        tree = Tree('S', (Tree('NP', ('foo',)), Tree('VP', ('bar',))))

        o.update_counts(tree)
        self.assertSetEqual(
                set([(p, 1) for p in tree.productions()]),
                set(o.production_counts.items()))
        self.assertSetEqual(set([(p.lhs(), 1) for p in tree.productions()]),
                set(o.lhs_counts.items()))
        o.update_counts(tree)

        tree = Tree('S', (Tree('VP', ('foo',)), Tree('NP', ('bar',))))
        o.update_counts(tree)
        o.update_counts(tree)
        self.assertEqual(6, len(o.production_counts))
        for count in o.production_counts.values():
            self.assertEqual(2, count)
        self.assertEqual(3, len(o.lhs_counts))
        for count in o.lhs_counts.values():
            self.assertEqual(4, count)

        o.compute_scores()
        for production, score in o.scored_productions.items():
            self.assertAlmostEqual(-0.69314718055, score, msg='%s' % production)
开发者ID:divyag9,项目名称:2018-summer-main,代码行数:25,代码来源:pcfg_test.py

示例3: _muc_read_text

def _muc_read_text(s, top_node):
    # The tokenizer sometimes splits within coref tags.
    def __fix_tokenization(sents):
        for index in range(len(sents)):
            next = 1
            while sents[index].count('<COREF') != sents[index].count('</COREF>'):
                sents[index] += ' '
                sents[index] += sents[index + next]
                sents[index + next] = ''
                next += 1
        sents = filter(None, sents)
        return sents
    if s:
        tree = Tree(top_node, [])        
        if _MUC6_PARA_RE.match(s):
            for para in _MUC6_PARA_RE.findall(s):
                if para and para[0] and para[0].strip():
                    tree.append(Tree('P', []))
                    for sent in _MUC6_SENT_RE.findall(para[0]):
                        words = _MUC6_SENT_RE.match(sent[0]).group('sent').strip()
                        # There are empty sentences <s></s> in the MUC6 corpus.
                        if words:
                            tree[-1].append(_muc_read_words(words, 'S'))                
        elif _MUC7_PARA_RE.match(s):
            for para in _MUC7_PARA_SPLIT_RE.split(s):
                if para and para.strip():
                    tree.append(Tree('P', []))
                    for sent in __fix_tokenization(_SENT_TOKENIZER.tokenize(para)):
                        tree[-1].append(_muc_read_words(sent, 'S'))
        return tree
开发者ID:knowlp,项目名称:nltk_contrib,代码行数:30,代码来源:muc.py

示例4: match

    def match(self, tree):
        try:
            if tree.label() != 'ROOT':
                raise IndexError
            if tree[0].label() != 'SBARQ':
                raise IndexError
            if tree[0][0][0].label() != 'WRB':
                raise IndexError
            if tree[0][0][0][0].lower() != 'when':
                raise IndexError
            if tree[0][1].label() != 'SQ':
                raise IndexError
            if tree[0][1][0].label() != 'VBD':
                raise IndexError
            if tree[0][1][1].label() != 'NP':
                raise IndexError
            if tree[0][1][2].label() != 'VP':
                raise IndexError

            part = Pattern.Part()
            part.object = ParentedTree.fromstring(str(tree[0][1][1]))
            part.property = ParentedTree.fromstring(str(Tree('VP', [
                Tree.fromstring(str(tree[0][0][0])),
                Tree.fromstring(str(tree[0][1][0])),
                Tree.fromstring(str(tree[0][1][2]))
            ])))

            return [part]
        except IndexError:
            return []
开发者ID:EIFSDB,项目名称:search-engine,代码行数:30,代码来源:PatternWhenWasObjectAction.py

示例5: parser_output_to_parse_deriv_trees

def parser_output_to_parse_deriv_trees(output):
    lines = output.strip().split("\n")
    deriv_tree_lines = lines[::2]
    parse_tree_lines = lines[1::2]

    parse_trees = [Tree.fromstring(line.replace('\x06', 'epsilon_')) for line in parse_tree_lines if line != '']
    deriv_trees = [Tree.fromstring(line) for line in deriv_tree_lines if line != '']
    return parse_trees, deriv_trees
开发者ID:jonpiffle,项目名称:ltag_parser,代码行数:8,代码来源:investigate_parser_output.py

示例6: test_evalb_correctly_scores_identical_trees

 def test_evalb_correctly_scores_identical_trees(self):
     tree1 = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     tree2 = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     evalb_scorer = EvalbBracketingScorer()
     evalb_scorer([tree1], [tree2])
     metrics = evalb_scorer.get_metric()
     assert metrics["evalb_recall"] == 1.0
     assert metrics["evalb_precision"] == 1.0
     assert metrics["evalb_f1_measure"] == 1.0
开发者ID:apmoore1,项目名称:allennlp,代码行数:9,代码来源:evalb_bracketing_scorer_test.py

示例7: add_top_to_tree

def add_top_to_tree(treebank_file):
    f = open(treebank_file, "r")
    root_set = set([])
    for sentence in f:
        t = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
        top_node = Tree("TOP", [])
        top_node.append(t)
        print NewTree.flat_print(top_node)
    f.close()
开发者ID:srush,项目名称:PhraseDep,代码行数:9,代码来源:PTBRoot.py

示例8: test_evalb_correctly_calculates_bracketing_metrics_over_multiple_trees

 def test_evalb_correctly_calculates_bracketing_metrics_over_multiple_trees(self):
     tree1 = Tree.fromstring("(S (VP (D the) (NP dog)) (VP (V chased) (NP (D the) (N cat))))")
     tree2 = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     evalb_scorer = EvalbBracketingScorer()
     evalb_scorer([tree1, tree2], [tree2, tree2])
     metrics = evalb_scorer.get_metric()
     assert metrics["evalb_recall"] == 0.875
     assert metrics["evalb_precision"] == 0.875
     assert metrics["evalb_f1_measure"] == 0.875
开发者ID:apmoore1,项目名称:allennlp,代码行数:9,代码来源:evalb_bracketing_scorer_test.py

示例9: _construct_node_from_actions

    def _construct_node_from_actions(self,
                                     current_node: Tree,
                                     remaining_actions: List[List[str]],
                                     add_var_function: bool) -> List[List[str]]:
        """
        Given a current node in the logical form tree, and a list of actions in an action sequence,
        this method fills in the children of the current node from the action sequence, then
        returns whatever actions are left.

        For example, we could get a node with type ``c``, and an action sequence that begins with
        ``c -> [<r,c>, r]``.  This method will add two children to the input node, consuming
        actions from the action sequence for nodes of type ``<r,c>`` (and all of its children,
        recursively) and ``r`` (and all of its children, recursively).  This method assumes that
        action sequences are produced `depth-first`, so all actions for the subtree under ``<r,c>``
        appear before actions for the subtree under ``r``.  If there are any actions in the action
        sequence after the ``<r,c>`` and ``r`` subtrees have terminated in leaf nodes, they will be
        returned.
        """
        if not remaining_actions:
            logger.error("No actions left to construct current node: %s", current_node)
            raise ParsingError("Incomplete action sequence")
        left_side, right_side = remaining_actions.pop(0)
        if left_side != current_node.label():
            logger.error("Current node: %s", current_node)
            logger.error("Next action: %s -> %s", left_side, right_side)
            logger.error("Remaining actions were: %s", remaining_actions)
            raise ParsingError("Current node does not match next action")
        if right_side[0] == '[':
            # This is a non-terminal expansion, with more than one child node.
            for child_type in right_side[1:-1].split(', '):
                if child_type.startswith("'lambda"):
                    # We need to special-case the handling of lambda here, because it's handled a
                    # bit weirdly in the action sequence.  This is stripping off the single quotes
                    # around something like `'lambda x'`.
                    child_type = child_type[1:-1]
                child_node = Tree(child_type, [])
                current_node.append(child_node)  # you add a child to an nltk.Tree with `append`
                if not self.is_terminal(child_type):
                    remaining_actions = self._construct_node_from_actions(child_node,
                                                                          remaining_actions,
                                                                          add_var_function)
        elif self.is_terminal(right_side):
            # The current node is a pre-terminal; we'll add a single terminal child.  We need to
            # check first for whether we need to add a (var _) around the terminal node, though.
            if add_var_function and right_side in self._lambda_variables:
                right_side = f"(var {right_side})"
            if add_var_function and right_side == 'var':
                raise ParsingError('add_var_function was true, but action sequence already had var')
            current_node.append(Tree(right_side, []))  # you add a child to an nltk.Tree with `append`
        else:
            # The only way this can happen is if you have a unary non-terminal production rule.
            # That is almost certainly not what you want with this kind of grammar, so we'll crash.
            # If you really do want this, open a PR with a valid use case.
            raise ParsingError(f"Found a unary production rule: {left_side} -> {right_side}. "
                               "Are you sure you want a unary production rule in your grammar?")
        return remaining_actions
开发者ID:pyknife,项目名称:allennlp,代码行数:56,代码来源:world.py

示例10: drawTrees

def drawTrees(chart):
	for state in chart[-1]:
		if state.isParse(grammar):
			treeString = buildTreeString(state,'')
			tree = Tree(treeString)
			print 'Showing parse tree. Close window to continue.'
			tree.draw()
			ans = raw_input('Do you want to see another parse tree?(y/n): ')
			if ans == 'n': return
	print 'No more valid parses'
开发者ID:hillelweintraub,项目名称:NLP,代码行数:10,代码来源:parse.py

示例11: extract_itg

def extract_itg(alignments_file_name, parses_file_name, inv_extension):
    """Extract a inversion transduction grammar (ITG)
    from the given files.
    
    Keyword arguments:
    alignments_file_name -- name of file containing alignments
        between sentences in l1_file_name and l2_file_name
    parses_file_name -- name of file containing parse trees
        of the sentences in l1_file_name
    inv_extension -- extension denoting whether a node is inverted
        
    Returns a Counter of binary ITG rules and unary rules. Each ITG rule is 
    represented as the tuple (lhs, rhs), where rhs is a tuple of nodes."""
    binary_itg = Counter()
    unary_itg = Counter()
    num_lines = number_of_lines(parses_file_name)
    alignments_file = open(alignments_file_name)
    parses_file = open(parses_file_name)
    
    for i, l1_parse in enumerate(parses_file):
        if i % (num_lines/100) is 0:
            sys.stdout.write('\r%d%%' % (i*100/num_lines,))
            sys.stdout.flush()

        try: # TODO remove try/catch
            reordered_indexes = str_to_reordered_indexes(alignments_file.next())
            # remove outer brackets from Berkeley parse
            l1_parse = l1_parse.strip()
            l1_parse = l1_parse[1:len(l1_parse)-1]
            l1_parse = l1_parse.strip()
            parse_tree = Tree(l1_parse)            
            parse_forest = generate_forest(parse_tree, 
                reordered_indexes, inv_extension)
        except:
            error_log = open('error.log', 'a')
            error_log.write('%s -- in extract_itg/3\n' % time.asctime())
            error_log.write('line: %s\n' % i)
            error_log.write('%s\n' % l1_parse.strip())
            error_log.write('%s\n' % reordered_indexes)
            error_log.write('\n')
            error_log.close()
            print 'Error in extract_itg/3. See error.log'
            raise

        binary_rules, unary_rules = extract_rules(parse_forest, 
                                                  parse_tree.leaves())
        for rule in binary_rules:
            binary_itg[rule] += 1

        for rule in unary_rules:
            unary_itg[rule] += 1

    alignments_file.close()
    parses_file.close()
    return binary_itg, unary_itg
开发者ID:macabot,项目名称:SRITG,代码行数:55,代码来源:sritg.py

示例12: test_evalb_correctly_scores_imperfect_trees

 def test_evalb_correctly_scores_imperfect_trees(self):
     # Change to constiutency label (VP ... )should effect scores, but change to POS
     # tag (NP dog) should have no effect.
     tree1 = Tree.fromstring("(S (VP (D the) (NP dog)) (VP (V chased) (NP (D the) (N cat))))")
     tree2 = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     evalb_scorer = EvalbBracketingScorer()
     evalb_scorer([tree1], [tree2])
     metrics = evalb_scorer.get_metric()
     assert metrics["evalb_recall"] == 0.75
     assert metrics["evalb_precision"] == 0.75
     assert metrics["evalb_f1_measure"] == 0.75
开发者ID:apmoore1,项目名称:allennlp,代码行数:11,代码来源:evalb_bracketing_scorer_test.py

示例13: get_sentence_and_indexes

def get_sentence_and_indexes(parsed_sentence):

  sentence_tree = Tree(parsed_sentence)
  if sentence_tree.node == bitpar_top: #remove designated TOP-symbol    
    sentence_tree = sentence_tree[0]
    
  rlist = [0]*len(sentence_tree.leaves())
  slist = [""]*len(sentence_tree.leaves())
  get_sentence_and_indexes_rec_helper(sentence_tree, rlist, slist)
  reordered_sentence = " ".join(slist)
  
  return reordered_sentence, rlist
开发者ID:agnesvanbelle,项目名称:SSLP2,代码行数:12,代码来源:testing.py

示例14: test_evalb_with_terrible_trees_handles_nan_f1

 def test_evalb_with_terrible_trees_handles_nan_f1(self):
     # If precision and recall are zero, evalb returns nan f1.
     # This checks that we handle the zero division.
     tree1 = Tree.fromstring("(PP (VROOT (PP That) (VROOT (PP could) "
                             "(VROOT (PP cost) (VROOT (PP him))))) (PP .))")
     tree2 = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
     evalb_scorer = EvalbBracketingScorer()
     evalb_scorer([tree1], [tree2])
     metrics = evalb_scorer.get_metric()
     assert metrics["evalb_recall"] == 0.0
     assert metrics["evalb_precision"] == 0.0
     assert metrics["evalb_f1_measure"] == 0.0
开发者ID:apmoore1,项目名称:allennlp,代码行数:12,代码来源:evalb_bracketing_scorer_test.py

示例15: _build_hierplane_tree

    def _build_hierplane_tree(self, tree: Tree, index: int, is_root: bool) -> JsonDict:
        """
        Recursively builds a JSON dictionary from an NLTK ``Tree`` suitable for
        rendering trees using the `Hierplane library<https://allenai.github.io/hierplane/>`.

        Parameters
        ----------
        tree : ``Tree``, required.
            The tree to convert into Hierplane JSON.
        index : int, required.
            The character index into the tree, used for creating spans.
        is_root : bool
            An indicator which allows us to add the outer Hierplane JSON which
            is required for rendering.

        Returns
        -------
        A JSON dictionary render-able by Hierplane for the given tree.
        """
        children = []
        for child in tree:
            if isinstance(child, Tree):
                # If the child is a tree, it has children,
                # as NLTK leaves are just strings.
                children.append(self._build_hierplane_tree(child, index, is_root=False))
            else:
                # We're at a leaf, so add the length of
                # the word to the character index.
                index += len(child)

        label = tree.label()
        span = " ".join(tree.leaves())
        hierplane_node = {
                "word": span,
                "nodeType": label,
                "attributes": [label],
                "link": label
        }
        if children:
            hierplane_node["children"] = children
        # TODO(Mark): Figure out how to span highlighting to the leaves.
        if is_root:
            hierplane_node = {
                    "linkNameToLabel": LINK_TO_LABEL,
                    "nodeTypeToStyle": NODE_TYPE_TO_STYLE,
                    "text": span,
                    "root": hierplane_node
            }
        return hierplane_node
开发者ID:apmoore1,项目名称:allennlp,代码行数:49,代码来源:constituency_parser.py


注:本文中的nltk.Tree类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。