本文整理汇总了Python中nltk.tree.Tree.parse方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.parse方法的具体用法?Python Tree.parse怎么用?Python Tree.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tree.Tree
的用法示例。
在下文中一共展示了Tree.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def main():
sents = 0
words_tot = 0
yngve_tot = 0
frazier_tot = 0
nodes_tot = 0
for line in sys.stdin:
if line.strip() == "":
continue
t = Tree.parse(line)
words = calc_words(t)
words_tot += words
sents += 1
yngve = calc_yngve(t, 0)
yngve_avg = float(yngve)/words
yngve_tot += yngve_avg
nodes = calc_nodes(t)
nodes_avg = float(nodes)/words
nodes_tot += nodes_avg
frazier = calc_frazier(t, 0, "")
frazier_avg = float(frazier)/words
frazier_tot += frazier_avg
# print "Sentence=%d\twords=%d\tyngve=%f\tfrazier=%f\tnodes=%f" % (sents, words, yngve_avg, frazier_avg, nodes_avg)
yngve_avg = float(yngve_tot)/sents
frazier_avg = float(frazier_tot)/sents
nodes_avg = float(nodes_tot)/sents
words_avg = float(words_tot)/sents
print "Total\tsents=%d\twords=%f\tyngve=%f\tfrazier=%f\tnodes=%f" % (sents, words_avg, yngve_avg, frazier_avg, nodes_avg)
示例2: parse_trees
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def parse_trees(self, flatten=False):
trees = []
for sentence in self.result['sentences']:
ptree = Tree.parse(sentence['parsetree'])
if flatten:
ptree = flatten_deeptree(ptree)
trees.append(ptree)
return trees
示例3: loadHeadTrees
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def loadHeadTrees(self,filename):
"""load trees with head annotated with ps2ds"""
trees = []
inf = codecs.open(filename,'r','utf-8')
for s in inf.readlines():
head_tree = Tree.parse(s)
head_tree = Tree('TOP',[head_tree]) # coordinate with original tree structure
trees.append(head_tree)
return trees
示例4: get_semantics_from_parse_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def get_semantics_from_parse_tree(parse_tree_string):
"""Take a string representing the parse tree as input, and print the
semantic parse. The result list consists of a list of tuples, with each
tuple containing the VerbNet frame and its associated tree."""
parse_tree = Tree.parse(parse_tree_string)
# parse_tree.draw()
split_clause_dict = split_clauses(parse_tree)
for key, (clause, conjunction) in split_clause_dict.items():
activized_clause = activize_clause(clause)
split_clause_dict[key] = (activized_clause, conjunction)
result_list = []
for position, (clause, conjunction) in split_clause_dict.items():
split_tree_dict = split_conjunctions(clause)
if conjunction != "":
result_list.append(conjunction)
for split, (split_tree, conjunction) in split_tree_dict.items():
if conjunction != "":
result_list.append(conjunction)
for tree in split_tree:
tree = existential_there_insertion(tree)
tree = invert_clause(tree)
tree = wh_movement(tree)
tree.draw()
# Regex for finding verbs
verb_finder = re.compile(r"(?<=VB[ DGNPZ]) *\w*(?=\))")
# Get the lemma of the verb for searching verbnet
verbs = (word.strip().lower() for word in verb_finder.findall(str(tree)))
for verb in verbs:
lemmatized_verb = lemmatizer.lemmatize(verb, "v")
vfo_list = create_VerbFrameObjects(lemmatized_verb)
match_list = []
for vfo in vfo_list:
match = vfo.match_parse(tree)
if match:
match_list.append(match)
best_match = pick_best_match(match_list)
if not best_match is None:
result_list.append((best_match, tree))
return result_list
示例5: _parse_trees_output
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def _parse_trees_output(output_):
res = []
cur_lines = []
for line in output_.splitlines(False):
if line == '':
res.append(Tree.parse('\n'.join(cur_lines)))
cur_lines = []
else:
cur_lines.append(line)
return res
示例6: _parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def _parse(self, t):
try:
return Tree.parse(self._normalize(t))
except ValueError, e:
sys.stderr.write("Bad tree detected; trying to recover...\n")
# Try to recover, if we can:
if e.args == ('mismatched parens',):
for n in range(1, 5):
try:
v = Tree.parse(self._normalize(t+')'*n))
sys.stderr.write(" Recovered by adding %d close "
"paren(s)\n" % n)
return v
except ValueError: pass
# Try something else:
sys.stderr.write(" Recovered by returning a flat parse.\n")
#sys.stderr.write(' '.join(t.split())+'\n')
return Tree('S', self._tag(t))
示例7: load_parse_doc
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def load_parse_doc(parse_path):
parse_path = os.path.abspath(parse_path)
parses = []
with open(parse_path, 'r') as fp:
for line in fp:
line = line.strip()
if line == '':
continue
parse = Tree.parse(line)
parses.append(parse)
return parses
示例8: build_tagged_sents
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def build_tagged_sents(files):
"""
Build the corpus of tagged sentences from the files of the sequoia corpus.
"""
sents = []
for fname in files:
fin = codecs.open(fname, "r", "utf-8")
for line in fin:
t = Tree.parse(line)
sents.append(t.pos())
fin.close()
return sents
示例9: _load_sent_token
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def _load_sent_token(self):
print "Loading sentences and tokens..."
sent_elmts = self.c_root.findall(CTAKES_PREFIX + 'textspan.Sentence')
t_counter = 0
for sent_elmt in sent_elmts:
sent_begin = int(sent_elmt.get('begin'))
sent_end = int(sent_elmt.get('end'))
sent_num = int(sent_elmt.get('sentenceNumber'))
cursor = sent_begin
sent_span = []
token_offset = 0
while cursor < sent_end:
buf = self._find_token_elmt_with_attrib_of_val('begin', cursor)
if len(buf) == 0:
cursor = cursor + 1
continue
elif len(buf) > 1:
print 'More than one token appear to begin at ' + str(cursor) + \
'\nLoading ctakes xml file terminated'
return
else:
token_elmt = buf[0]
t = Token(self.ds_id + '_t_' + str(t_counter))
t.type = token_elmt.tag.split('.')[-1][:-5]
# skipping 'newline' token when counting up tid
t_num = int(token_elmt.get('tokenNumber')) - sent_num
if t_num != t_counter:
print 'CAUTION: t_num does not equal to counter t_counter'
t.offset = token_offset
t.begin = int(token_elmt.get('begin'))
t.end = int(token_elmt.get('end'))
t.pos = token_elmt.get('partOfSpeech')
t.n_form = token_elmt.get('normalizedForm')
#t.c_form = token_elmt.get('canonicalForm')
#t.cap = int(token_elmt.get('capitalization'))
#t.num_p = int(token_elmt.get('numPosition'))
self.tokens.append(t)
sent_span.append(t)
cursor = t.end + 1
token_offset = token_offset + 1
t_counter += 1
s = Sentence(self.ds_id + '_s_' + str(sent_num))
s.span = sent_span
s.num = sent_num
#s.begin = sent_begin
#s.end = sent_end
s.parse = Tree.parse(self.p_fp.next())
for t in s.span:
t.sent = s
self.sents.append(s)
return
示例10: __init__
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def __init__(self, json_file):
data = json.load(json_file)
for k, v in data.iteritems():
self.__setattr__(k, v)
self.__raw_data = data # for future reference
#print data
self.spantree = SpanTree.parse(self.goldparse)
self.spantree.convert()
self.goldparse = Tree.parse(self.goldparse)
self.text = data['text'].split()
self.treebank_sentence = data['treebank_sentence'].split()
示例11: read
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def read(klass, path=KNOWLEDGE_PATH):
if not path:
raise Exception("Specify a path to the verbframes.json as $WIMKB")
with open(path, 'rb') as kbfile:
data = json.load(kbfile, encoding="utf8")
kwargs = {}
for frame in data['frames']:
for mapping in frame['mappings']:
# Update mapping with frame object
mapping['frame'] = frame['frame']
# Convert string reprs of Trees
mapping['verbmap'] = Tree.parse(mapping['verbmap'])
if 'parse' in mapping:
mapping['parse'] = Tree.parse(mapping['parse'])
# Convert kwargs
kwargs[frame['frame']] = frame['mappings']
return klass(**kwargs)
示例12: findAmbiguities
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def findAmbiguities(self,line):
result = self.parse(line)
#if 'coref' in result:
# return 1
trees = []
retval = 0
for i in range(len(result['sentences'])):
tree = Tree.parse(result['sentences'][i]['parsetree'])
trees.append(tree)
# Since tree[0] is a S
for subtree in tree:
retval = max(retval, self.exploreSubTree(subtree))
return retval
示例13: tag_ptree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def tag_ptree(ptree, coreflist):
"""Tags given parse tree with coreferences
Args:
ptree: string, parenthesized str represenation of parse tree
coreflist: list of tuples, [('1', {'text': 'dog', 'ref': None})]
Returns:
string, tagged parse tree
>>> ptree = '(S NP( (NN He)) VP( (V ran)))'
>>> coreflist = [('1', {'text': 'He', 'ref': None})]
>>> tag_ptree(ptree, coreflist)
'(S NP( COREF_TAG_1( (NN He))) VP( (V ran)))'
"""
pattern = r"""(?P<lp>\(?\s*) # left parenthesis
(?P<tg>[a-zA-Z$]+)? # POS tag
(?P<data>\s*%s) # subtree of tag
(?P<rp>(?:\s*\))*) # right parenthesis
"""
for cid, coref in coreflist[::-1]:
words = ''.join(word_tokenize(coref['text']))
nltktree = Tree.parse(ptree)
nltktree.reverse() # perform search right to left
data = None
for subtree in nltktree.subtrees(): # BFS
if ''.join(subtree.leaves()) == words: # equal ignoring whitespace
data = subtree.pprint()
break
# If found via breadth-first search of parse tree
if data:
ptree = ptree.replace(data, '( COREF_TAG_%s%s)' % (cid, data))
else: # Try finding via regex matching instead
dpattern = r'\s*'.join([r'\(\s*[a-zA-Z$]+\s+%s\s*\)' % word
for word in word_tokenize(coref['text'])])
found = re.findall(pattern % dpattern, ptree, re.X)
if found:
repl = '%s%s ( COREF_TAG_%s%s) %s' % (found[0][0],
found[0][1],
cid,
found[0][2],
found[0][3])
ptree = re.sub(pattern % dpattern, repl, ptree, 1, re.X)
return ptree
示例14: parseQuestion
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def parseQuestion(self, text):
question = Question()
print "RECEIVED DATA IS\n" + text
wordList = nltk.word_tokenize(text)
i = 0
tokens = list()
for word in wordList:
print "WORD: "+str(word)
if not str(word).strip() is "" and not str(word).strip() is "." and not str(word).strip() is "?" and not str(word).strip() is "!" and not str(word).strip() is ",":
tokens.append(word)
i+=1
print tokens
question.setTokens(tokens)
result = self.parse(text)
tree = Tree.parse(result['sentences'][0]['parsetree'])
print TreeUtils.findPocs(tree)
示例15: create_trees_nltk
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import parse [as 别名]
def create_trees_nltk(filename):
f = open(filename, "r")
response = f.readlines(); f.close()
valid_tree_texts = []
tree_text = ''
for line in response:
line = line.strip()
if(line == ""):
valid_tree_texts.append(tree_text)
tree_text = ""
else:
tree_text += line+" "
trees = [Tree.parse(line) for line in valid_tree_texts]
for i in range(len(trees)):
trees[i].chomsky_normal_form()
return trees