Python Tree.parse方法代码示例

本文整理汇总了Python中nltk.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题：Python Tree.parse方法的具体用法？Python Tree.parse怎么用？Python Tree.parse使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree的用法示例。

在下文中一共展示了Tree.parse方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: tag

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
    def tag(self, input_tree):
        """
        Tag an input tree using the rules in parsed grammars.
        """
        #clean input tree:
        input_tree = self.clean(input_tree)
        
        text = self.from_tree_to_text(input_tree)
        #print "INPUT TEXT: "+text
        for rule in self.rules:
            rule_name = rule.keys()[0]
            rule = rule.values()[0]

            matches = re.finditer(rule, text, re.I)
            for match in matches:
                match_text = match.group(rule_name)
                #eliminar espacios al principio y al final del matching text,
                #para controlar que cada subarbol <NAME> está bien delimitado
                #en el texto resultante (no se come espacios opcionales):
                match_text = match_text.strip()
                text = string.replace(text, match_text, "<"+rule_name+">")
                #print "TEXT = "+text
                self.stack.append(match_text)

        #print "OUTPUT TEXT : "+text
        output_tree_str = "(S "+self.from_text_to_tree_str(text)+" )"
        #print "OUTPUT TREE STR: "+output_tree_str
        output_tree = Tree.parse(output_tree_str, parse_leaf=self.from_string_token_to_tuple)
        return output_tree

开发者ID:azizur77，项目名称:ruleparser，代码行数:31，代码来源:ruleparser.py

示例2: test_simple_tags

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
    def test_simple_tags(self):
        grammar = "ANIMAL : {<ANIMAL>}"
        rp = ruleparser.RuleParser(grammar)

        expected = Tree.parse("(S el/DT (ANIMAL perro/NN/ANIMAL) ladra/VB al/DT (ANIMAL gato/NN/ANIMAL))", parse_leaf=rp.from_string_token_to_tuple)
        result = rp.tag(self.text)
        self.assertEqual(result, expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:9，代码来源:test.py

示例3: negra_tree_iter

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def negra_tree_iter(corpus_root):
  pieces = []
  for line in open(corpus_root):
    if line.startswith('%'):
      s = ''.join(pieces).strip()
      if len(s):
        yield Tree.parse(s)

      pieces = []

    else:
      pieces.append(line)

  if len(pieces):
    s = ''.join(pieces).strip()
    yield Tree.parse(s)

开发者ID:OMARI1988，项目名称:upparse，代码行数:18，代码来源:util.py

示例4: parse_ccgbank_tree

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_ccgbank_tree(s):
    t = Tree.parse(s, 
                   parse_node=parse_ccgbank_node, 
                   parse_leaf=parse_ccgbank_leaf, 
                   node_pattern=ccgbank_node_pattern, 
                   leaf_pattern=ccgbank_leaf_pattern)
    return excise_empty_nodes(t)

开发者ID:AlexWang90，项目名称:openccg，代码行数:9，代码来源:ccg_draw_tree.py

示例5: treebank_bracket_parse

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def treebank_bracket_parse(t):
    try:
        return Tree.parse(t, remove_empty_top_bracketing=True)
        # return tree.bracket_parse(t)
    except IndexError:
        # in case it's the real treebank format,
        # strip first and last brackets before parsing
        return tree.bracket_parse(t.strip()[1:-1])

开发者ID:cerisara，项目名称:dmvccm，代码行数:10，代码来源:wsj.py

示例6: test_cascaded_rules_2

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
 def test_cascaded_rules_2(self):
     grammar = """
               EQUIPOS : {<Equipo_Futbol> <CONJ> <Equipo_Futbol>}
               PARTIDO : {<EQUIPOS> <VB>}
               """
     rp = ruleparser.RuleParser(grammar)
     expected = Tree.parse("(S (PARTIDO (EQUIPOS Real_Madrid/NN/NE/Equipo_Futbol y/CONJ F.C._Barcelona/NN/NE/Equipo_Futbol) disputan/VB) hoy/ADV la/DT final/NN de/PP la/DT Copa_del_Rey/NN/NE/Evento)", parse_leaf=rp.from_string_token_to_tuple)
     result = rp.tag(self.text)
     self.assertEqual(result,expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:11，代码来源:test.py

示例7: test_simple_words

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
    def test_simple_words(self):
        grammar = """
                     PERRO : {"el" "perro"}
                     GATO : {"al" "gato"}
                  """
        rp = ruleparser.RuleParser(grammar)

        expected = Tree.parse("(S (PERRO el/DT  perro/NN/ANIMAL) ladra/VB (GATO al/DT gato/NN/ANIMAL))", parse_leaf=rp.from_string_token_to_tuple)
        result = rp.tag(self.text)
        self.assertEqual(result,expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:12，代码来源:test.py

示例8: test_cascaded_rules

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
    def test_cascaded_rules(self):
        grammar = """
                  NP : {<DT>? <NN>+}
	          VP : {<VB> <ADV>}
                  """

        rp = ruleparser.RuleParser(grammar)
        expected = Tree.parse("(S (NP Real_Madrid/NN/NE/Equipo_Futbol) y/CONJ (NP F.C._Barcelona/NN/NE/Equipo_Futbol) (VP disputan/VB hoy/ADV) (NP la/DT final/NN) de/PP (NP la/DT Copa_del_Rey/NN/NE/Evento))", parse_leaf=rp.from_string_token_to_tuple)
        result = rp.tag(self.text)
        self.assertEqual(result,expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:12，代码来源:test.py

示例9: test_repetitive_rules

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
 def test_repetitive_rules(self):
      self.text = [('He',['VB']), ('estudiado',['VB']), ('en',['ADV']), ('la',['DT']), ('Universidad',['NN']), ('Complutense',['NN']), ('y',['CONJ']), ('he',['VB']), ('trabajado',['VB']), ('en',['ADV']), ('Yahoo!',['NN']), ('durante',['ADV']), ('2',['NN']), ('años',['NN'])]
      grammar = """
                   UNIVERSIDAD : {"universidad"}
                   UNIVERSIDAD : {"complutense"}
                   UNIVERSIDAD : {<UNIVERSIDAD> <UNIVERSIDAD>}
                """
      rp = ruleparser.RuleParser(grammar)
      expected = Tree.parse("(S He/VB estudiado/VB en/ADV la/DT (UNIVERSIDAD (UNIVERSIDAD Universidad/NN) (UNIVERSIDAD Complutense/NN)) y/CONJ he/VB trabajado/VB en/ADV Yahoo!/NN durante/ADV 2/NN años/NN)", parse_leaf=rp.from_string_token_to_tuple)
      result = rp.tag(self.text)
      self.assertEqual(result,expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:13，代码来源:test.py

示例10: parse

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
    def parse(self):
        """
        Accesses the parse tree based on the S-expression parse string in the XML

        :getter: Returns the NLTK parse tree
        :type: nltk.Tree

        """
        if self.parse_string is not None and self._parse is None:
            self._parse = Tree.parse(self._parse_string)
        return self._parse

开发者ID:interrogator，项目名称:corenlp-xml-lib，代码行数:13，代码来源:document.py

示例11: test_context_rules

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
 def test_context_rules(self):
      self.text = [('He',['VB']), ('estudiado',['VB']), ('en',['ADV']), ('la',['DT']), ('Universidad',['NN']), ('Complutense',['NN']), ('y',['CONJ']), ('he',['VB']), ('trabajado',['VB']), ('en',['ADV']), ('Yahoo!',['NN']), ('durante',['ADV']), ('2',['NN']), ('años',['NN'])]
      grammar = """
                   EMPRESA : "trabajado" "en" {<NN>+}
                   UNIVERSIDAD : "estudiado" "en" <DT>? {<NN>+}
                   TECNOLOGIA : "trabajado" "con" {<.*>}
                """
      rp = ruleparser.RuleParser(grammar)
      expected = Tree.parse("(S He/VB estudiado/VB en/ADV la/DT (UNIVERSIDAD Universidad/NN Complutense/NN) y/CONJ he/VB trabajado/VB en/ADV (EMPRESA Yahoo!/NN) durante/ADV 2/NN años/NN)", parse_leaf=rp.from_string_token_to_tuple)
      result = rp.tag(self.text)
      self.assertEqual(result,expected)

开发者ID:azizur77，项目名称:ruleparser，代码行数:13，代码来源:test.py

示例12: parse_rst_dt_tree

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_rst_dt_tree(tstr, context=None):
    """
    Read a single RST tree from its RST DT string representation.
    If context is set, align the tree with it. You should really
    try to pass in a context (see `RSTContext` if you can, the
    None case is really intended for testing, or in cases where
    you don't have an original text)
    """
    pstr = _preprocess(tstr)
    tree_ = Tree.parse(pstr, leaf_pattern=_LEAF_PATTERN)
    tree_ = _postprocess(tree_)
    if context:
        tree_ = _align_with_context(tree_, context)
    return tree_

开发者ID:chloebt，项目名称:educe，代码行数:16，代码来源:parse.py

示例13: parse_lightweight_tree

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def parse_lightweight_tree(tstr):
    """
    Parse lightweight RST debug syntax into SimpleRSTTree, eg. ::

        (R:attribution
           (N:elaboration (N foo) (S bar)
           (S quux)))

    This is motly useful for debugging or for knocking out quick
    examples
    """
    _lw_type_re = re.compile(r'(?P<nuc>[RSN])(:(?P<rel>.*)|$)')
    _lw_nuc_map = dict((nuc[0], nuc)
                       for nuc in ["Root", "Nucleus", "Satellite"])
    # pylint: disable=C0103
    PosInfo = collections.namedtuple("PosInfo", "text edu")
    # pylint: enable=C0103

    def walk(subtree, posinfo=PosInfo(text=0, edu=0)):
        """
        walk down first-cut tree, counting span info and returning a
        fancier tree along the way
        """
        if isinstance(subtree, Tree):
            start = copy.copy(posinfo)
            children = []
            for kid in subtree:
                tree, posinfo = walk(kid, posinfo)
                children.append(tree)

            match = _lw_type_re.match(treenode(subtree))
            if not match:
                raise RSTTreeException("Missing nuclearity annotation in ",
                                       subtree)
            nuclearity = _lw_nuc_map[match.group("nuc")]
            rel = match.group("rel") or "leaf"
            edu_span = (start.edu, posinfo.edu - 1)
            span = Span(start.text, posinfo.text)
            node = Node(nuclearity, edu_span, span, rel)
            return SimpleRSTTree(node, children), posinfo
        else:
            text = subtree
            start = posinfo.text
            end = start + len(text)
            posinfo2 = PosInfo(text=end, edu=posinfo.edu+1)
            return EDU(posinfo.edu, Span(start, end), text), posinfo2

    return walk(Tree.parse(tstr))[0]

开发者ID:chloebt，项目名称:educe，代码行数:50，代码来源:parse.py

示例14: read_trees

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def read_trees(iterable):
    """Reads an iterable in order to mount a syntactic tree."""
    from nltk import Tree
    tree_strings = []
    trees = []
    
    for line in iterable:
        uline = unicode(line, 'utf-8')
        data = uline.split()
        
        if len(data) <= 1:
            tree = Tree.parse(' '.join(tree_strings), brackets='[]')
            trees.append(tree)
            tree_strings = []
            continue
        
        word = data[ConllPos.word]
        pos = data[ConllPos.pos]
        parse = data[ConllPos.parse]
        
        # a little workaround.
        # to avoid messing nltk.Tree string parser, we use [] as tree brackets
        # instead of the default (). This is done because "(" and ")" appear as 
        # separate tokens, while "["and "]" do not.
        tree_string = parse.replace('(', '[').replace(')', ']')
        # treat "broken" constituents like VP- and -VP as normal VPs
        tree_string = tree_string.replace('-', '')
        
        # treat multiwords and concatenate their POS with #
        words = [' %s#%s ' % (part, pos) for part in word.split('_')]
        words_string = ' '.join(words)
        tree_string = tree_string.replace('*', words_string)
        
        tree_strings.append(tree_string)
    
    return trees

开发者ID:attardi，项目名称:nlpnet，代码行数:38，代码来源:read_data.py

示例15: ctb_tree_iter_f

# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import parse [as 别名]
def ctb_tree_iter_f(corpus_root):
  in_s = False
  pieces = []
  print >>sys.stderr, corpus_root
  for line in open(corpus_root):
    lowered = line.strip().lower()

    if lowered.startswith('<s '):
      in_s = True

    elif lowered.startswith('</s>'):
      s = ''.join(pieces).strip()
      if len(s):
        # In a couple instances of the CTB, there are two sentences
        # contained in a single <S> node. Deal with that here

        for s1 in split_separate_setences(s):
          yield Tree.parse(s1)

      in_s = False
      pieces = []

    elif in_s:
      pieces.append(line)

开发者ID:OMARI1988，项目名称:upparse，代码行数:26，代码来源:util.py

注：本文中的nltk.Tree.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。