本文整理汇总了Python中tree.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.parse方法的具体用法?Python Tree.parse怎么用?Python Tree.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tree.Tree
的用法示例。
在下文中一共展示了Tree.parse方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: readkparses
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readkparses(f, k):
for j in xrange(k):
fvector = FVector.parse(f.readline().strip()) #float(f.readline().strip())
parse = f.readline().strip()
tree = Tree.parse(parse, trunc=True, lower=True)
yield (fvector, tree)
示例2: readonebest
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readonebest(f):
'''1-best output, or gold'''
f = getfile(f)
while True:
line = f.readline()
if line == '':
break
if line == '\n':
continue
yield Tree.parse(line.strip(), trunc=True, lower=True)
示例3: readkbest
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def readkbest(f, read_gold=False):
f = getfile(f)
while True: #now < len(lines):
line = f.readline() #lines[now]
if line == '':
break
if line == '\n':
continue
try:
k, tag = line.strip().split("\t")
k = int(k)
except:
break ## can finish earlier
kparses = []
for stuff in readkparses(f, int(k)):
kparses.append(stuff)
goldtree = Tree.parse(f.readline().strip(), trunc=True, lower=True) if read_gold \
else None
yield NBestForest(k, tag, kparses, goldtree)
示例4: load
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def load(filename, lower=True, sentid=0):
'''now return a generator! use load().next() for singleton.
and read the last line as the gold tree -- TODO: optional!
and there is an empty line at the end
'''
file = getfile(filename)
line = None
total_time = 0
num_sents = 0
while True:
start_time = time.time()
##'\tThe complicated language in ...\n"
## tag is often missing
try:
if line is None or line == "\n":
line = "\n"
while line == "\n":
line = file.readline() # emulate seek
tag, sent = line.split("\t")
except:
## no more forests
break
num_sents += 1
sent = sent.split()
cased_sent = sent [:]
if lower:
sent = [w.lower() for w in sent] # mark johnson: lowercase all words
num = int(file.readline())
forest = Forest(num, sent, cased_sent, tag)
forest.labelspans = {}
forest.short_edges = {}
delta = num_spu = 0
for i in xrange(1, num+1):
## '2\tDT* [0-1]\t1 ||| 1232=2 ...\n'
## node-based features here: wordedges, greedyheavy, word(1), [word(2)], ...
line = file.readline()
try:
keys, fields = line.split(" ||| ")
except:
keys = line
fields = ""
iden, labelspan, size = keys.split("\t") ## iden can be non-ints
size = int(size)
fvector = FVector.parse(fields)
node = Node(iden, labelspan, size, fvector, sent)
forest.add_node(node)
if cache_same:
if labelspan in forest.labelspans:
node.same = forest.labelspans[labelspan]
node.fvector = node.same.fvector
else:
forest.labelspans[labelspan] = node
for j in xrange(size):
is_oracle = False
## '\t1 ||| 0=8.86276 1=2 3\n'
tails, fields = file.readline().strip().split(" ||| ")
if tails[0] == "*": #oracle edge
is_oracle = True
tails = tails[1:]
tails = tails.split() ## could be non-integers
tailnodes = []
for x in tails:
assert x in forest.nodes, "BAD TOPOL ORDER: node #%s is referred to " % x + \
"(in a hyperedge of node #%s) before being defined" % iden
## topological ordering
tail = forest.nodes[x]
tailnodes.append(tail)
use_same = False
if fields[-1] == "~":
use_same = True
fields = fields[:-1]
fvector = FVector.parse(fields)
edge = Hyperedge(node, tailnodes, fvector)
if cache_same:
short_edge = edge.shorter()
if short_edge in forest.short_edges:
edge.same = forest.short_edges[short_edge]
if use_same:
edge.fvector += edge.same.fvector
#.........这里部分代码省略.........
示例5: treebank
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
#!/usr/bin/python
'''
Reads parse trees from a treebank (each line contains one parse tree)
Converts that tree into a binary tree (input is not necessarily binary)
'''
from tree import Tree
import sys
for line in sys.stdin:
line = line.strip()
t = Tree.parse(line)
# convert to binary and print
t.binarize()
print t
示例6: open
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
logs = sys.stderr
import itertools, collections
from tree import Tree
if __name__ == "__main__":
try:
_, parsefilename, goldfilename = sys.argv
except:
print >> logs, "usage: evalb.py <parse-file> <gold-file>\n"
sys.exit(1)
matchcount = parsecount = goldcount = 0
for parseline, goldline in itertools.izip(open(parsefilename), open(goldfilename)):
goldtree = Tree.parse(goldline)
goldbrackets = goldtree.label_span_counts()
goldcount += len(goldbrackets)
if parseline.strip() == "NONE": # parsing failure
continue
parsetree = Tree.parse(parseline)
parsebrackets = parsetree.label_span_counts()
parsecount += len(parsebrackets)
for bracket, count in parsebrackets.iteritems():
matchcount += min(count, goldbrackets[bracket])
print "%s\t%d brackets" % (parsefilename, parsecount)
print "%s\t%d brackets" % (goldfilename, goldcount)
示例7: int
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
print features.prep_features(sys.argv[1:])
f = sys.stdin
while True:
line = f.readline()
if line == '':
break
if line == '\n':
continue
try:
k, tag = line.strip().split("\t")
except:
break ## can finish earlier
print k, tag
k = int(k)
best_w = None
for j in xrange(k):
logprob = float(f.readline().strip())
parse = f.readline().strip()
tree = Tree.parse(parse)
##print tree
if j < maxk:
## print tree
features.evaluate(tree, tree.get_sent(), j)
##print features.pp_fv(fvector, j)
##fvector = features.extract(tree, tree.get_sent())
示例8: load
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
#.........这里部分代码省略.........
tailnodes = []
lhsstr = [] # 123 "thank" 456
lmstr = []
lmscore = 0
lmlhsstr = []
for x in tails:
if x[0]=='"': # word
word = desymbol(x[1:-1])
lhsstr.append(word) ## desymbol here and only here; ump will call quoteattr
if lm is not None:
this = lm.word2index(word)
lmscore += lm.ngram.wordprob(this, lmstr)
lmlhsstr.append(this)
lmstr += [this,]
else: # variable
assert x in forest.nodes, "BAD TOPOL ORDER: node #%s is referred to " % x + \
"(in a hyperedge of node #%s) before being defined" % iden
tail = forest.nodes[x]
tailnodes.append(tail)
lhsstr.append(tail)
if lm is not None:
lmstr = [] # "..." "..." x0 "..."
lmlhsstr.append(tail) # sync with lhsstr
fvector = Vector(fields)
if lm is not None:
fvector["lm1"] = lmscore # hack
edge = Hyperedge(node, tailnodes, fvector, lhsstr)
edge.lmlhsstr = lmlhsstr
## new
x = rule.split()
edge.ruleid = int(x[0])
if len(x) > 1:
edge.rule = Rule.parse(" ".join(x[1:]) + " ### " + fields)
forest.rules[edge.ruleid] = edge.rule #" ".join(x[1:]) #, None)
else:
edge.rule = forest.rules[edge.ruleid] # cahced rule
node.add_edge(edge)
if is_oracle:
node.oracle_edge = edge
if node.sp_terminal():
node.word = node.edges[0].subs[0].word
## splitted nodes 12-3-4 => (12, 3, 4)
tmp = sorted([(map(int, x.iden.split("-")), x) for x in forest.nodeorder])
forest.nodeorder = [x for (_, x) in tmp]
forest.rehash()
sentid += 1
## print >> logs, "sent #%d %s, %d words, %d nodes, %d edges, loaded in %.2lf secs" \
## % (sentid, forest.tag, forest.len, num, forest.num_edges, time.time() - basetime)
forest.root = node
node.set_root(True)
line = file.readline()
if line is not None and line.strip() != "":
if line[0] == "(":
forest.goldtree = Tree.parse(line.strip(), trunc=True, lower=False)
line = file.readline()
else:
line = None
forest.number_nodes()
#print forest.root.position_id
total_time += time.time() - start_time
if num_sents % 100 == 0:
print >> logs, "... %d sents loaded (%.2lf secs per sent) ..." \
% (num_sents, total_time/num_sents)
forest.subtree() #compute the subtree string for each node
yield forest
if first is not None and num_sents >= first:
break
# better check here instead of zero-division exception
if num_sents == 0:
print >> logs, "NO FORESTS FOUND!!! (empty input file?)"
sys.exit(1)
# yield None # new: don't halt -- WHY?
Forest.load_time = total_time
print >> logs, "%d forests loaded in %.2lf secs (avg %.2lf per sent)" \
% (num_sents, total_time, total_time/(num_sents+0.001))
示例9: get_semantics_from_parse_tree
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def get_semantics_from_parse_tree(parse_tree_string):
"""Take a string representing the parse tree as input, and print the
semantic parse. The result list consists of a list of tuples, with each
tuple containing the VerbNet frame and its associated tree."""
parse_tree = Tree.parse(parse_tree_string)
# Split clauses to handle them separately
split_clause_dict = frames.split_clauses(parse_tree)
# Activize clauses
for key, (clause, conjunction) in split_clause_dict.items():
activized_clause = frames.activize_clause(clause)
split_clause_dict[key] = (activized_clause, conjunction)
result_list = []
for (clause, conjunction) in split_clause_dict.values():
# Split conjunctions and duplicate arguments if necessary
split_tree_dict = frames.split_conjunctions(clause)
if conjunction != '':
result_list.append(conjunction)
for (split_tree, conjunction) in split_tree_dict.values():
if conjunction != '':
result_list.append(conjunction)
for tree in split_tree:
tag_list = []
# Store whether there was an existential there
if frames.is_existential(str(tree)):
tag_list.append('ex')
# Transformational grammar stuff
tree = frames.existential_there_insertion(tree)
tree = frames.invert_clause(tree)
tree = frames.wh_movement(tree)
# Regex for finding verbs
verb_finder = re.compile(r'(?<=VB[ DGNPZ]) *\w*(?=\))')
# Get the lemma of the verb for searching verbnet
verbs = (word.strip().lower() for word in
verb_finder.findall(str(tree)))
# Create VFOs for each verb, then match them to the parse tree
for verb in verbs:
lemmatized_verb = morphy(verb,'v')
vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)
match_list = []
for vfo in vfo_list:
match = vfo.match_parse(tree)
if match:
match_list.append((match, vfo.classid))
(best_match, sense) = frames.pick_best_match(match_list)
if not best_match is None:
result_list.append((best_match, tree, tag_list, sense))
return result_list
示例10: extract_frames_from_parse
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import parse [as 别名]
def extract_frames_from_parse(parse_tree_string):
"""Take a string representing the parse tree as input, and print the
semantic parse. The result list consists of a list of tuples, with each
tuple containing the VerbNet frame and its associated tree."""
result_list = []
# In case we're handed an bad string, bail somewhat gracefully
try:
parse_tree = Tree.parse(parse_tree_string)
except ValueError:
print "Warning: semantics could not parse tree", repr(parse_tree_string)
return result_list
# Split clauses to handle them separately
split_clause_dict = frames.split_clauses(parse_tree)
# Activize clauses
for key, (clause, conjunction) in split_clause_dict.items():
activized_clause = frames.activize_clause(clause)
split_clause_dict[key] = (activized_clause, conjunction)
for (clause, conjunction) in split_clause_dict.values():
# Split conjunctions and duplicate arguments if necessary
split_tree_dict = frames.split_conjunctions(clause)
if conjunction != '':
result_list.append(conjunction)
for (split_tree, conjunction) in split_tree_dict.values():
if conjunction != '':
result_list.append(conjunction)
for tree in split_tree:
tag_list = []
# Store whether there was an existential there
if frames.is_existential(str(tree)):
tag_list.append('ex')
# Transformational grammar stuff
tree = frames.existential_there_insertion(tree)
tree = frames.invert_clause(tree)
tree = frames.wh_movement(tree)
if EXTRACT_DEBUG:
print 'Transformed tree:'
print str(tree)
verbs = frames.find_verbs(tree)
# Create VFOs for each verb, then match them to the parse tree
for verb, negation in verbs:
lemmatized_verb = morphy(verb, 'v')
vfo_list = frames.create_VerbFrameObjects(lemmatized_verb)
match_list = []
if EXTRACT_DEBUG:
print 'VFO list for %s:' % verb
print '\n'.join(str(vfo.frame_list) for vfo in vfo_list)
for vfo in vfo_list:
match = vfo.match_parse(tree)
if match:
if EXTRACT_DEBUG:
print 'Matched:'
print '\t', str(vfo.frame_list)
print 'with'
print '\t', str(tree)
match_list.append((match, vfo.classid))
if EXTRACT_DEBUG:
print 'Match list:'
for m in match_list:
print 'Sense:', m[1]
for a, b in m[0].items():
print a, str(b)
print '\n\n'
(best_match, sense) = frames.pick_best_match(match_list)
if EXTRACT_DEBUG:
print 'Chose: '
if best_match:
for a, b in best_match.items():
print a, str(b)
else:
print str(None)
print '\n\n'
if not best_match is None:
result_list.append((best_match, tree, tag_list, sense, verb, negation))
return result_list