当前位置: 首页>>代码示例>>Python>>正文


Python Tree.parse方法代码示例

本文整理汇总了Python中nltk.tree.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.parse方法的具体用法?Python Tree.parse怎么用?Python Tree.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.tree.Tree的用法示例。


在下文中一共展示了Tree.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def main():
    sents = 0
    words_tot = 0
    yngve_tot = 0
    frazier_tot = 0
    nodes_tot = 0
    for line in sys.stdin:
        if line.strip() == "":
            continue
        t = Tree.parse(line)
        words = calc_words(t)
        words_tot += words
        sents += 1
        yngve = calc_yngve(t, 0)
        yngve_avg = float(yngve)/words
        yngve_tot += yngve_avg
        nodes = calc_nodes(t)
        nodes_avg = float(nodes)/words
        nodes_tot += nodes_avg
        frazier = calc_frazier(t, 0, "")
        frazier_avg = float(frazier)/words
        frazier_tot += frazier_avg
        # print "Sentence=%d\twords=%d\tyngve=%f\tfrazier=%f\tnodes=%f" % (sents, words, yngve_avg, frazier_avg, nodes_avg)
    yngve_avg = float(yngve_tot)/sents
    frazier_avg = float(frazier_tot)/sents
    nodes_avg = float(nodes_tot)/sents
    words_avg = float(words_tot)/sents
    print "Total\tsents=%d\twords=%f\tyngve=%f\tfrazier=%f\tnodes=%f" % (sents, words_avg, yngve_avg, frazier_avg, nodes_avg)
开发者ID:MorinoseiMorizo,项目名称:util-scripts,代码行数:30,代码来源:syntactic-complexity.py

示例2: parse_trees

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def parse_trees(self, flatten=False):
   trees = []
   for sentence in self.result['sentences']:
     ptree = Tree.parse(sentence['parsetree'])
     if flatten:
       ptree = flatten_deeptree(ptree)
     trees.append(ptree)
   return trees
开发者ID:jcccf,项目名称:cs4740,代码行数:10,代码来源:CoreNLPParser.py

示例3: loadHeadTrees

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def loadHeadTrees(self,filename):
     """load trees with head annotated with ps2ds"""
     trees = []
     inf = codecs.open(filename,'r','utf-8')
     for s in inf.readlines():
         head_tree = Tree.parse(s)
         head_tree = Tree('TOP',[head_tree]) # coordinate with original tree structure
         trees.append(head_tree)
     return trees
开发者ID:Juicechuan,项目名称:AMR_graph,代码行数:11,代码来源:chn_head.py

示例4: get_semantics_from_parse_tree

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def get_semantics_from_parse_tree(parse_tree_string):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    parse_tree = Tree.parse(parse_tree_string)
    # parse_tree.draw()

    split_clause_dict = split_clauses(parse_tree)

    for key, (clause, conjunction) in split_clause_dict.items():
        activized_clause = activize_clause(clause)
        split_clause_dict[key] = (activized_clause, conjunction)

    result_list = []

    for position, (clause, conjunction) in split_clause_dict.items():
        split_tree_dict = split_conjunctions(clause)

        if conjunction != "":
            result_list.append(conjunction)

        for split, (split_tree, conjunction) in split_tree_dict.items():
            if conjunction != "":
                result_list.append(conjunction)

            for tree in split_tree:
                tree = existential_there_insertion(tree)
                tree = invert_clause(tree)
                tree = wh_movement(tree)

                tree.draw()

                # Regex for finding verbs
                verb_finder = re.compile(r"(?<=VB[ DGNPZ]) *\w*(?=\))")

                # Get the lemma of the verb for searching verbnet
                verbs = (word.strip().lower() for word in verb_finder.findall(str(tree)))

                for verb in verbs:

                    lemmatized_verb = lemmatizer.lemmatize(verb, "v")
                    vfo_list = create_VerbFrameObjects(lemmatized_verb)

                    match_list = []

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            match_list.append(match)

                    best_match = pick_best_match(match_list)
                    if not best_match is None:
                        result_list.append((best_match, tree))

    return result_list
开发者ID:amareknight,项目名称:SLURP,代码行数:58,代码来源:parsing_nltk.py

示例5: _parse_trees_output

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def _parse_trees_output(output_):
     res = []
     cur_lines = []
     for line in output_.splitlines(False):
         if line == '':
             res.append(Tree.parse('\n'.join(cur_lines)))
             cur_lines = []
         else:
             cur_lines.append(line)
     return res
开发者ID:osu-ling5802-2016,项目名称:Sigmorphon2016,代码行数:12,代码来源:stanford.py

示例6: _parse

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
    def _parse(self, t):
        try:
            return Tree.parse(self._normalize(t))

        except ValueError, e:
            sys.stderr.write("Bad tree detected; trying to recover...\n")
            # Try to recover, if we can:
            if e.args == ('mismatched parens',):
                for n in range(1, 5):
                    try:
                        v = Tree.parse(self._normalize(t+')'*n))
                        sys.stderr.write("  Recovered by adding %d close "
                                         "paren(s)\n" % n)
                        return v
                    except ValueError: pass
            # Try something else:
            sys.stderr.write("  Recovered by returning a flat parse.\n")
            #sys.stderr.write(' '.join(t.split())+'\n')
            return Tree('S', self._tag(t))
开发者ID:AnthonyNystrom,项目名称:nltk,代码行数:21,代码来源:bracket_parse.py

示例7: load_parse_doc

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def load_parse_doc(parse_path):
    parse_path = os.path.abspath(parse_path)
    parses = []
    with open(parse_path, 'r') as fp:
        for line in fp:
            line = line.strip()
            if line == '':
                continue
            parse = Tree.parse(line)
            parses.append(parse)
    return parses
开发者ID:yaocheng-cs,项目名称:misc,代码行数:13,代码来源:coref.py

示例8: build_tagged_sents

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def build_tagged_sents(files):
    """
	Build the corpus of tagged sentences from the files of the sequoia corpus.
	"""
    sents = []
    for fname in files:
        fin = codecs.open(fname, "r", "utf-8")
        for line in fin:
            t = Tree.parse(line)
            sents.append(t.pos())
        fin.close()
    return sents
开发者ID:grdscarabe,项目名称:nlp-lessons,代码行数:14,代码来源:train-brill-on-sequoia.py

示例9: _load_sent_token

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def _load_sent_token(self):
     print "Loading sentences and tokens..."
     sent_elmts = self.c_root.findall(CTAKES_PREFIX + 'textspan.Sentence')
     t_counter = 0
     for sent_elmt in sent_elmts:
         sent_begin = int(sent_elmt.get('begin'))
         sent_end = int(sent_elmt.get('end'))
         sent_num = int(sent_elmt.get('sentenceNumber'))
         cursor = sent_begin
         sent_span = []
         token_offset = 0
         while cursor < sent_end:
             buf = self._find_token_elmt_with_attrib_of_val('begin', cursor)
             if len(buf) == 0:
                 cursor = cursor + 1
                 continue
             elif len(buf) > 1:
                 print 'More than one token appear to begin at ' + str(cursor) + \
                     '\nLoading ctakes xml file terminated'
                 return
             else:
                 token_elmt = buf[0]
                 t = Token(self.ds_id + '_t_' + str(t_counter))
                 t.type = token_elmt.tag.split('.')[-1][:-5]
                 # skipping 'newline' token when counting up tid
                 t_num = int(token_elmt.get('tokenNumber')) - sent_num
                 if t_num != t_counter:
                     print 'CAUTION: t_num does not equal to counter t_counter'
                 t.offset = token_offset
                 t.begin = int(token_elmt.get('begin'))
                 t.end = int(token_elmt.get('end'))
                 t.pos = token_elmt.get('partOfSpeech')
                 t.n_form = token_elmt.get('normalizedForm')
                 #t.c_form = token_elmt.get('canonicalForm')
                 #t.cap = int(token_elmt.get('capitalization'))
                 #t.num_p = int(token_elmt.get('numPosition'))
                 self.tokens.append(t)
             sent_span.append(t)
             cursor = t.end + 1
             token_offset = token_offset + 1
             t_counter += 1
             
         s = Sentence(self.ds_id + '_s_' + str(sent_num))
         s.span = sent_span
         s.num = sent_num
         #s.begin = sent_begin
         #s.end = sent_end
         s.parse = Tree.parse(self.p_fp.next())
         for t in s.span:
             t.sent = s
         self.sents.append(s)  
     return
开发者ID:yaocheng-cs,项目名称:tlink,代码行数:54,代码来源:data.py

示例10: __init__

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
    def __init__(self, json_file):
        data = json.load(json_file)
        for k, v in data.iteritems():
            self.__setattr__(k, v)
        self.__raw_data = data # for future reference

        #print data
        self.spantree = SpanTree.parse(self.goldparse)
        self.spantree.convert()
        self.goldparse = Tree.parse(self.goldparse)

        self.text = data['text'].split()
        self.treebank_sentence = data['treebank_sentence'].split()
开发者ID:christianbuck,项目名称:nlu,代码行数:15,代码来源:add_dep.py

示例11: read

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
    def read(klass, path=KNOWLEDGE_PATH):
       
        if not path:
            raise Exception("Specify a path to the verbframes.json as $WIMKB")

        with open(path, 'rb') as kbfile:
            data = json.load(kbfile, encoding="utf8")

            kwargs = {}
            for frame in data['frames']:
                for mapping in frame['mappings']:
                    # Update mapping with frame object
                    mapping['frame']   = frame['frame']

                    # Convert string reprs of Trees
                    mapping['verbmap'] = Tree.parse(mapping['verbmap'])

                    if 'parse' in mapping:
                        mapping['parse']   = Tree.parse(mapping['parse']) 

                # Convert kwargs
                kwargs[frame['frame']] = frame['mappings']

        return klass(**kwargs)
开发者ID:mindcrime,项目名称:python-openwims.org,代码行数:26,代码来源:frame.py

示例12: findAmbiguities

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def findAmbiguities(self,line):    
     result = self.parse(line) 
     
     #if 'coref' in result:
     #    return 1
 
     trees = []
     retval = 0
     for i in range(len(result['sentences'])):
         tree = Tree.parse(result['sentences'][i]['parsetree'])
         trees.append(tree)
         # Since tree[0] is a S
         for subtree in tree:
             retval = max(retval, self.exploreSubTree(subtree))
     return retval
开发者ID:suryaveer,项目名称:REST-server-segmentation,代码行数:17,代码来源:corenlp.py

示例13: tag_ptree

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def tag_ptree(ptree, coreflist):
    """Tags given parse tree with coreferences

    Args:
        ptree: string, parenthesized str represenation of parse tree
        coreflist: list of tuples, [('1', {'text': 'dog', 'ref': None})]

    Returns:
        string, tagged parse tree

    >>> ptree = '(S NP( (NN He)) VP( (V ran)))'
    >>> coreflist = [('1', {'text': 'He', 'ref': None})]
    >>> tag_ptree(ptree, coreflist)
    '(S NP( COREF_TAG_1( (NN He))) VP( (V ran)))'

    """
    pattern = r"""(?P<lp>\(?\s*)       # left parenthesis
                  (?P<tg>[a-zA-Z$]+)?  # POS tag
                  (?P<data>\s*%s)      # subtree of tag
                  (?P<rp>(?:\s*\))*)   # right parenthesis
               """
    for cid, coref in coreflist[::-1]:
        words = ''.join(word_tokenize(coref['text']))

        nltktree = Tree.parse(ptree)
        nltktree.reverse()  # perform search right to left
        data = None
        for subtree in nltktree.subtrees():  # BFS
            if ''.join(subtree.leaves()) == words:  # equal ignoring whitespace
                data = subtree.pprint()
                break

        # If found via breadth-first search of parse tree
        if data:
            ptree = ptree.replace(data, '( COREF_TAG_%s%s)' % (cid, data))
        else:  # Try finding via regex matching instead
            dpattern = r'\s*'.join([r'\(\s*[a-zA-Z$]+\s+%s\s*\)' % word
                                    for word in word_tokenize(coref['text'])])
            found = re.findall(pattern % dpattern, ptree, re.X)
            if found:
                repl = '%s%s ( COREF_TAG_%s%s) %s' % (found[0][0],
                                                      found[0][1],
                                                      cid,
                                                      found[0][2],
                                                      found[0][3])
                ptree = re.sub(pattern % dpattern, repl, ptree, 1, re.X)

    return ptree
开发者ID:charmainekeck,项目名称:PyCoref,代码行数:50,代码来源:data.py

示例14: parseQuestion

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
 def parseQuestion(self, text):
     question = Question()
     print "RECEIVED DATA IS\n" + text
     wordList = nltk.word_tokenize(text)
     i = 0
     tokens = list()
     for word in wordList:
         print "WORD: "+str(word)
         if not str(word).strip() is "" and not str(word).strip() is "." and not str(word).strip() is "?" and not str(word).strip() is "!" and not str(word).strip() is ",":
             tokens.append(word)
         i+=1
     print tokens
     question.setTokens(tokens)
     result = self.parse(text)
     tree = Tree.parse(result['sentences'][0]['parsetree'])
     print TreeUtils.findPocs(tree)
开发者ID:gitter-badger,项目名称:pyFreya,代码行数:18,代码来源:SimpleStanfordParser.py

示例15: create_trees_nltk

# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def create_trees_nltk(filename):    
    f = open(filename, "r")

    response = f.readlines(); f.close()
    valid_tree_texts = []   
    tree_text = '' 
    for line in response:
        line = line.strip()
        if(line == ""):
            valid_tree_texts.append(tree_text)
            tree_text = ""            
        else:
            tree_text += line+" "        
    trees = [Tree.parse(line) for line in valid_tree_texts]
    
    for i in range(len(trees)):
        trees[i].chomsky_normal_form() 
    
    return trees
开发者ID:5idaidai,项目名称:MVRNN,代码行数:21,代码来源:utils.py


注:本文中的nltk.tree.Tree.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。