当前位置: 首页>>代码示例>>Python>>正文


Python Tree.parse方法代码示例

本文整理汇总了Python中tree.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.parse方法的具体用法?Python Tree.parse怎么用?Python Tree.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tree.Tree的用法示例。


在下文中一共展示了Tree.parse方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: readkparses

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readkparses(f, k):
	for j in xrange(k):
		fvector = FVector.parse(f.readline().strip())   #float(f.readline().strip())
		parse = f.readline().strip()
		tree = Tree.parse(parse, trunc=True, lower=True)
		
		yield (fvector, tree)				
开发者ID:rupenp,项目名称:transforest,代码行数:9,代码来源:readkbest.py

示例2: readonebest

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readonebest(f):
	'''1-best output, or gold'''
							 
	f = getfile(f)
	while True:
		line = f.readline()
		if line == '':
			break
		if line == '\n':
			continue

		yield Tree.parse(line.strip(), trunc=True, lower=True)
开发者ID:rupenp,项目名称:transforest,代码行数:14,代码来源:readkbest.py

示例3: readkbest

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readkbest(f, read_gold=False):

	f = getfile(f)
	while True: #now < len(lines):
		line = f.readline() #lines[now]
		if line == '':
			break
		if line == '\n':
			continue
		try:
			k, tag = line.strip().split("\t")
			k = int(k)
		except:
			break  ## can finish earlier

		kparses = []
		for stuff in readkparses(f, int(k)):
			kparses.append(stuff)
			
		goldtree = Tree.parse(f.readline().strip(), trunc=True, lower=True) if read_gold \
				   else None
 		yield NBestForest(k, tag, kparses, goldtree)
开发者ID:rupenp,项目名称:transforest,代码行数:24,代码来源:readkbest.py

示例4: load

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
    def load(filename, lower=True, sentid=0):
        '''now return a generator! use load().next() for singleton.
           and read the last line as the gold tree -- TODO: optional!
           and there is an empty line at the end
        '''

        file = getfile(filename)
        line = None
        total_time = 0
        num_sents = 0
        
        while True:            
            
            start_time = time.time()
            ##'\tThe complicated language in ...\n"
            ## tag is often missing
            try:
                if line is None or line == "\n":
                    line = "\n"
                    while line == "\n":
                        line = file.readline()  # emulate seek                    
                tag, sent = line.split("\t")
            except:
                ## no more forests
                break

            num_sents += 1
            
            sent = sent.split()
            cased_sent = sent [:]
            if lower:
                sent = [w.lower() for w in sent]   # mark johnson: lowercase all words
            num = int(file.readline())

            forest = Forest(num, sent, cased_sent, tag)
            forest.labelspans = {}
            forest.short_edges = {}

            delta = num_spu = 0
            for i in xrange(1, num+1):

                ## '2\tDT* [0-1]\t1 ||| 1232=2 ...\n'
                ## node-based features here: wordedges, greedyheavy, word(1), [word(2)], ...
                line = file.readline()
                try:
                    keys, fields = line.split(" ||| ")
                except:
                    keys = line
                    fields = ""


                iden, labelspan, size = keys.split("\t") ## iden can be non-ints
                size = int(size)

                fvector = FVector.parse(fields)
                node = Node(iden, labelspan, size, fvector, sent)
                forest.add_node(node)

                if cache_same:
                    if labelspan in forest.labelspans:
                        node.same = forest.labelspans[labelspan]
                        node.fvector = node.same.fvector
                    else:
                        forest.labelspans[labelspan] = node

                for j in xrange(size):
                    is_oracle = False

                    ## '\t1 ||| 0=8.86276 1=2 3\n'
                    tails, fields = file.readline().strip().split(" ||| ")
                    
                    if tails[0] == "*":  #oracle edge
                        is_oracle = True
                        tails = tails[1:]
                        
                    tails = tails.split() ## could be non-integers
                    tailnodes = []

                    for x in tails:
                        assert x in forest.nodes, "BAD TOPOL ORDER: node #%s is referred to " % x + \
                               "(in a hyperedge of node #%s) before being defined" % iden
                        ## topological ordering
                        tail = forest.nodes[x]
                        tailnodes.append(tail)

                    use_same = False
                    if fields[-1] == "~":
                        use_same = True
                        fields = fields[:-1]
                        
                    fvector = FVector.parse(fields)
                    edge = Hyperedge(node, tailnodes, fvector)

                    if cache_same:

                        short_edge = edge.shorter()
                        if short_edge in forest.short_edges:
                            edge.same = forest.short_edges[short_edge]
                            if use_same:
                                edge.fvector += edge.same.fvector
#.........这里部分代码省略.........
开发者ID:rupenp,项目名称:transforest,代码行数:103,代码来源:forest.py

示例5: treebank

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
#!/usr/bin/python

'''
Reads parse trees from a treebank (each line contains one parse tree)
Converts that tree into a binary tree (input is not necessarily binary)
'''


from tree import Tree
import sys

for line in sys.stdin:
    line = line.strip()
    t = Tree.parse(line)

    # convert to binary and print
    t.binarize()
    print t
开发者ID:saliahmed86,项目名称:PCFG-trainer-CYK-parser,代码行数:20,代码来源:binarize.py

示例6: open

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
logs = sys.stderr

import itertools, collections
from tree import Tree

if __name__ == "__main__":
    try:
        _, parsefilename, goldfilename = sys.argv
    except:
        print >> logs, "usage: evalb.py <parse-file> <gold-file>\n"
        sys.exit(1)

    matchcount = parsecount = goldcount = 0

    for parseline, goldline in itertools.izip(open(parsefilename), open(goldfilename)):
        goldtree = Tree.parse(goldline)
        goldbrackets = goldtree.label_span_counts()    
        goldcount += len(goldbrackets)

        if parseline.strip() == "NONE": # parsing failure
            continue

        parsetree = Tree.parse(parseline)
        parsebrackets = parsetree.label_span_counts()
        parsecount += len(parsebrackets)

        for bracket, count in parsebrackets.iteritems():
            matchcount += min(count, goldbrackets[bracket])

    print "%s\t%d brackets" % (parsefilename, parsecount)
    print "%s\t%d brackets" % (goldfilename, goldcount)
开发者ID:saliahmed86,项目名称:PCFG-trainer-CYK-parser,代码行数:33,代码来源:evalb.py

示例7: int

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
        
    print features.prep_features(sys.argv[1:])

    f = sys.stdin
    while True:
        line = f.readline()
        if line == '':
            break
        if line == '\n':
            continue
        try:
            k, tag = line.strip().split("\t")
        except:
            break  ## can finish earlier
         print k, tag
        k = int(k)
        best_w = None
        for j in xrange(k):
            logprob = float(f.readline().strip())
            parse = f.readline().strip()
            tree = Tree.parse(parse)
            ##print tree

            if j < maxk:
##                print tree
                features.evaluate(tree, tree.get_sent(), j)
                ##print features.pp_fv(fvector, j)
                ##fvector = features.extract(tree, tree.get_sent())
                
                
开发者ID:rupenp,项目名称:transforest,代码行数:30,代码来源:extract_features_nbest.py

示例8: load

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]

#.........这里部分代码省略.........
                    tailnodes = []
                    lhsstr = [] # 123 "thank" 456

                    lmstr = []
                    lmscore = 0
                    lmlhsstr = []
                    
                    for x in tails:
                        if x[0]=='"': # word
                            word = desymbol(x[1:-1])
                            lhsstr.append(word)  ## desymbol here and only here; ump will call quoteattr
                            
                            if lm is not None:
                                this = lm.word2index(word)
                                lmscore += lm.ngram.wordprob(this, lmstr)
                                lmlhsstr.append(this)
                                lmstr += [this,]
                                
                        else: # variable

                            assert x in forest.nodes, "BAD TOPOL ORDER: node #%s is referred to " % x + \
                                         "(in a hyperedge of node #%s) before being defined" % iden
                            tail = forest.nodes[x]
                            tailnodes.append(tail)
                            lhsstr.append(tail)                            

                            if lm is not None:
                                lmstr = []  # "..." "..." x0 "..."
                                lmlhsstr.append(tail) # sync with lhsstr

                    fvector = Vector(fields)
                    if lm is not None:
                        fvector["lm1"] = lmscore # hack

                    edge = Hyperedge(node, tailnodes, fvector, lhsstr)
                    edge.lmlhsstr = lmlhsstr

                    ## new
                    x = rule.split()
                    edge.ruleid = int(x[0])
                    if len(x) > 1:
                        edge.rule = Rule.parse(" ".join(x[1:]) + " ### " + fields)
                        forest.rules[edge.ruleid] = edge.rule #" ".join(x[1:]) #, None)
                    else:
                        edge.rule = forest.rules[edge.ruleid] # cahced rule

                    node.add_edge(edge)
                    if is_oracle:
                        node.oracle_edge = edge
                    
                if node.sp_terminal():
                    node.word = node.edges[0].subs[0].word

            ## splitted nodes 12-3-4 => (12, 3, 4)
            tmp = sorted([(map(int, x.iden.split("-")), x) for x in forest.nodeorder])   
            forest.nodeorder = [x for (_, x) in tmp]

            forest.rehash()
            sentid += 1
            
##            print >> logs, "sent #%d %s, %d words, %d nodes, %d edges, loaded in %.2lf secs" \
##                  % (sentid, forest.tag, forest.len, num, forest.num_edges, time.time() - basetime)

            forest.root = node
            node.set_root(True)
            line = file.readline()

            if line is not None and line.strip() != "":
                if line[0] == "(":
                    forest.goldtree = Tree.parse(line.strip(), trunc=True, lower=False)
                    line = file.readline()
            else:
                line = None

            forest.number_nodes()
            #print forest.root.position_id
          

            total_time += time.time() - start_time

            if num_sents % 100 == 0:
                print >> logs, "... %d sents loaded (%.2lf secs per sent) ..." \
                      % (num_sents, total_time/num_sents)

            forest.subtree() #compute the subtree string for each node

            yield forest

            if first is not None and num_sents >= first:
                break                

        # better check here instead of zero-division exception
        if num_sents == 0:
            print >> logs, "NO FORESTS FOUND!!! (empty input file?)"
            sys.exit(1)            
#            yield None # new: don't halt -- WHY?
        
        Forest.load_time = total_time
        print >> logs, "%d forests loaded in %.2lf secs (avg %.2lf per sent)" \
              % (num_sents, total_time, total_time/(num_sents+0.001))
开发者ID:srush,项目名称:tf-fork,代码行数:104,代码来源:forest.py

示例9: get_semantics_from_parse_tree

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def get_semantics_from_parse_tree(parse_tree_string):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    parse_tree = Tree.parse(parse_tree_string)

    # Split clauses to handle them separately
    split_clause_dict = frames.split_clauses(parse_tree)

    # Activize clauses
    for key, (clause, conjunction) in split_clause_dict.items():
        activized_clause = frames.activize_clause(clause)
        split_clause_dict[key] = (activized_clause, conjunction)

    result_list = []
        
    for (clause, conjunction) in split_clause_dict.values():
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = frames.split_conjunctions(clause)
        
        if conjunction != '':
            result_list.append(conjunction)
        
        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if frames.is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = frames.existential_there_insertion(tree)
                tree = frames.invert_clause(tree)
                tree = frames.wh_movement(tree)



                # Regex for finding verbs 
                verb_finder = re.compile(r'(?<=VB[ DGNPZ]) *\w*(?=\))')

                # Get the lemma of the verb for searching verbnet
                verbs = (word.strip().lower() for word in
                         verb_finder.findall(str(tree)))

                # Create VFOs for each verb, then match them to the parse tree
                for verb in verbs:
                    lemmatized_verb = morphy(verb,'v')
                    vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)

                    match_list = []
                    
                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)
                        
                        if match:
                            match_list.append((match, vfo.classid))

                    (best_match, sense) = frames.pick_best_match(match_list)
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense))
                    

    return result_list
开发者ID:amareknight,项目名称:SLURP,代码行数:69,代码来源:parsing.py

示例10: extract_frames_from_parse

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def extract_frames_from_parse(parse_tree_string):
    """Take a string representing the parse tree as input, and print the
    semantic parse. The result list consists of a list of tuples, with each
    tuple containing the VerbNet frame and its associated tree."""
    result_list = []

    # In case we're handed an bad string, bail somewhat gracefully
    try:
        parse_tree = Tree.parse(parse_tree_string)
    except ValueError:
        print "Warning: semantics could not parse tree", repr(parse_tree_string)
        return result_list

    # Split clauses to handle them separately
    split_clause_dict = frames.split_clauses(parse_tree)

    # Activize clauses
    for key, (clause, conjunction) in split_clause_dict.items():
        activized_clause = frames.activize_clause(clause)
        split_clause_dict[key] = (activized_clause, conjunction)

    for (clause, conjunction) in split_clause_dict.values():
        # Split conjunctions and duplicate arguments if necessary
        split_tree_dict = frames.split_conjunctions(clause)

        if conjunction != '':
            result_list.append(conjunction)

        for (split_tree, conjunction) in split_tree_dict.values():
            if conjunction != '':
                result_list.append(conjunction)

            for tree in split_tree:
                tag_list = []

                # Store whether there was an existential there
                if frames.is_existential(str(tree)):
                    tag_list.append('ex')

                # Transformational grammar stuff
                tree = frames.existential_there_insertion(tree)
                tree = frames.invert_clause(tree)
                tree = frames.wh_movement(tree)

                if EXTRACT_DEBUG:
                    print 'Transformed tree:'
                    print str(tree)

                verbs = frames.find_verbs(tree)

                # Create VFOs for each verb, then match them to the parse tree
                for verb, negation in verbs:
                    lemmatized_verb = morphy(verb, 'v')
                    vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)
                    match_list = []

                    if EXTRACT_DEBUG:
                        print 'VFO list for %s:' % verb
                        print '\n'.join(str(vfo.frame_list) for vfo in vfo_list)

                    for vfo in vfo_list:
                        match = vfo.match_parse(tree)

                        if match:
                            if EXTRACT_DEBUG:
                                print 'Matched:'
                                print '\t', str(vfo.frame_list)
                                print 'with'
                                print '\t', str(tree)
                            match_list.append((match, vfo.classid))

                    if EXTRACT_DEBUG:
                        print 'Match list:'

                        for m in match_list:
                            print 'Sense:', m[1]
                            for a, b in m[0].items():
                                print a, str(b)
                            print '\n\n'

                    (best_match, sense) = frames.pick_best_match(match_list)

                    if EXTRACT_DEBUG:
                        print 'Chose: '
                        if best_match:
                            for a, b in best_match.items():
                                print a, str(b)
                        else:
                            print str(None)
                        print '\n\n'
                    if not best_match is None:
                        result_list.append((best_match, tree, tag_list, sense, verb, negation))

    return result_list
开发者ID:uml-robotics,项目名称:SLURP,代码行数:96,代码来源:parsing.py


注:本文中的tree.Tree.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。