本文整理汇总了Python中nltk.tree.Tree.append方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.append方法的具体用法?Python Tree.append怎么用?Python Tree.append使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tree.Tree
的用法示例。
在下文中一共展示了Tree.append方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: BP_tree_to_nltk_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def BP_tree_to_nltk_tree(tree):
root = Tree(str(tree.keys), children = [])
if isinstance(tree, BPnode) or isinstance(tree, Node):
for child in tree.children:
root.append(BP_tree_to_nltk_tree(child))
return root
示例2: load_ace_file
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def load_ace_file(textfile, fmt):
print ' - %s' % os.path.split(textfile)[1]
annfile = textfile+'.tmx.rdc.xml'
# Read the xml file, and get a list of entities
entities = []
xml = ET.parse(open(annfile)).getroot()
for entity in xml.findall('document/entity'):
typ = entity.find('entity_type').text
for mention in entity.findall('entity_mention'):
if mention.get('TYPE') != 'NAME': continue # only NEs
s = int(mention.find('head/charseq/start').text)
e = int(mention.find('head/charseq/end').text)+1
entities.append( (s, e, typ) )
# Read the text file, and mark the entities.
text = open(textfile).read()
# Strip XML tags, since they don't count towards the indices
text = re.sub('<(?!/?TEXT)[^>]+>', '', text)
# Blank out anything before/after <TEXT>
def subfunc(m): return ' '*(m.end()-m.start()-6)
text = re.sub('[\s\S]*<TEXT>', subfunc, text)
text = re.sub('</TEXT>[\s\S]*', '', text)
# Simplify quotes
text = re.sub("``", ' "', text)
text = re.sub("''", '" ', text)
entity_types = set(typ for (s,e,typ) in entities)
# Binary distinction (NE or not NE)
if fmt == 'binary':
i = 0
toks = Tree('S', [])
for (s,e,typ) in sorted(entities):
if s < i: s = i # Overlapping! Deal with this better?
if e <= s: continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree('NE', text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
# Multiclass distinction (NE type)
elif fmt == 'multiclass':
i = 0
toks = Tree('S', [])
for (s,e,typ) in sorted(entities):
if s < i: s = i # Overlapping! Deal with this better?
if e <= s: continue
toks.extend(word_tokenize(text[i:s]))
toks.append(Tree(typ, text[s:e].split()))
i = e
toks.extend(word_tokenize(text[i:]))
yield toks
else:
raise ValueError('bad fmt value')
示例3: unfolded_decoding
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def unfolded_decoding(W_d, b_d, tree, encoded):
(n, m) = W_d.shape
# store all a_e results in tree structure
decoding_tree = Tree(encoded, [])
try:
decoding_tree.span = tree.span
except:
pass
# if the given node (root) has children, decode the node's encoding, split it,
# and use this as the children's encoding (output) to recurse back, until terminal
# nodes are reached
if type(tree) == nltk.tree.Tree and len(tree) > 0:
decoded = decode(W_d, b_d, encoded)
for i, child in enumerate(tree):
# NOTE: the number of branchings n is NOT assumed, but that it is uniform and that
# len(input layer) = n*len(encoding) IS assumed
full_decoded = unfolded_decoding(W_d, b_d, child, decoded[i * m : m + (i * m)])
decoding_tree.append(full_decoded)
return decoding_tree
else:
decoding_tree = Tree(encoded, [])
try:
decoding_tree.span = tree.span
except:
pass
return decoding_tree
示例4: __str2BguTree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def __str2BguTree(self,text):
lines = text.split('\n')
tree = Tree('s',[])
for line in lines:
if line=='':
continue
mlist = line.split("\t")
word = mlist[0]
raw = mlist[1]
tree.append((word,bguTag(raw)))
return tree
示例5: __build_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def __build_tree(self, node_num):
word_tuple = self.words[node_num]
tree_node = Tree(word_tuple[1], [])
node_dependencies = self.dependencies.get(node_num)
if node_dependencies is not None:
for dependency in node_dependencies:
dependency_node = self.__build_tree(dependency[0])
tree_node.append(dependency_node)
return tree_node
示例6: postag_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def postag_tree(tree):
# Part-of-speech tagging.
words = tree.leaves()
tag_iter = (pos for (word, pos) in pos_tag(words))
newtree = Tree('S', [])
for child in tree:
if isinstance(child, Tree):
newtree.append(Tree(child.label(), []))
for subchild in child:
newtree[-1].append( (subchild, next(tag_iter)) )
else:
newtree.append( (child, next(tag_iter)) )
return newtree
示例7: conlltags2tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
root_label='S', strict=False):
"""
Convert the CoNLL IOB format to a tree.
"""
tree = Tree(root_label, [])
for (word, postag, chunktag) in sentence:
if chunktag is None:
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as O
tree.append((word,postag))
elif chunktag.startswith('B-'):
tree.append(Tree(chunktag[2:], [(word,postag)]))
elif chunktag.startswith('I-'):
if (len(tree)==0 or not isinstance(tree[-1], Tree) or
tree[-1].label() != chunktag[2:]):
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as B-*
tree.append(Tree(chunktag[2:], [(word,postag)]))
else:
tree[-1].append((word,postag))
elif chunktag == 'O':
tree.append((word,postag))
else:
raise ValueError("Bad conll tag {0!r}".format(chunktag))
return tree
示例8: tags2tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def tags2tree(sentence, root_label='S', strict=False):
tree = Tree(root_label, [])
for (word, postag, chunktag) in sentence:
if chunktag is None:
if strict:
raise ValueError("Bad tag sequence")
else:
# Treat as O
tree.append((word, postag))
elif chunktag.startswith('B'):
tree.append(Tree(chunktag[2:], [(word, postag)]))
elif chunktag.startswith('I'):
if (len(tree) == 0 or not isinstance(tree[-1], Tree) or
tree[-1].label() != chunktag[2:]):
if strict:
raise ValueError("Bad tag sequence")
else:
# Treat as B-*
tree.append(Tree(chunktag[2:], [(word, postag)]))
else:
tree[-1].append((word, postag))
elif chunktag == 'O':
tree.append((word, postag))
else:
raise ValueError("Bad tag %r" % chunktag)
return tree
示例9: binarize
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def binarize(tree):
if not isinstance(tree, Tree):
return tree
children = [binarize(ch) for ch in tree]
while len(children) > 2:
temp = Tree('(' + tree.label() + 'bar)')
temp.append(children[-2])
temp.append(children[-1])
children = children[:-2] + [temp]
ret = Tree('(' + tree.label() + ')')
for ch in children:
ret.append(ch)
return ret
示例10: split_tree_tokens
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def split_tree_tokens(tree):
"""Process a chunk-parse Tree, splitting nodes in the form "token/POS".
Returns a similar tree in which the leaves are PoS tagged tokens in the
form:
("token", "TAG")
"""
token_iter = (tuple(token.split('/')) for token in tree.leaves())
newtree = NLTKParseTree(tree.node, [])
for child in tree:
if isinstance(child, NLTKParseTree):
newtree.append(NLTKParseTree(child.node, []))
for subchild in child:
newtree[-1].append(token_iter.next())
else:
newtree.append(token_iter.next())
return newtree
示例11: simplify
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def simplify(tree):
if isinstance(tree, str):
return tree
ret = Tree(tree.label(), [])
for ch in tree:
newch = simplify(ch)
if newch is None:
continue
ret.append(newch)
if len(ret) == 0:
ret.append('None')
for cond, modif in RULES:
if cond(ret):
ret = modif(ret)
if ret is None:
break
return ret
示例12: _tagged_to_parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def _tagged_to_parse(tagged_tokens):
"""Convert a list of tagged tokens to a chunk-parse Tree."""
tree = NLTKParseTree('TEXT', [])
sent = NLTKParseTree('S', [])
for ((token, pos), tag) in tagged_tokens:
if tag == 'O':
sent.append((token, pos))
if pos == '.':
# End of sentence, add to main tree
tree.append(sent)
# Start a new subtree
sent = NLTKParseTree('S', [])
elif tag.startswith('B-'):
sent.append(NLTKParseTree(tag[2:], [(token, pos)]))
elif tag.startswith('I-'):
if (sent and isinstance(sent[-1], NLTKParseTree) and
sent[-1].node == tag[2:]):
sent[-1].append((token, pos))
else:
sent.append(NLTKParseTree(tag[2:], [(token, pos)]))
if sent:
tree.append(sent)
return tree
示例13: _tagged_to_parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def _tagged_to_parse(self, tagged_tokens):
"""
Convert a list of tagged tokens to a chunk-parse tree.
"""
sent = Tree("S", [])
for (tok, tag) in tagged_tokens:
if tag == "O":
sent.append(tok)
elif tag.startswith("B-"):
sent.append(Tree(tag[2:], [tok]))
elif tag.startswith("I-"):
if sent and isinstance(sent[-1], Tree) and sent[-1].label() == tag[2:]:
sent[-1].append(tok)
else:
sent.append(Tree(tag[2:], [tok]))
return sent
示例14: _tagged_to_parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def _tagged_to_parse(self, tagged_tokens):
"""
Convert a list of tagged tokens to a chunk-parse tree.
"""
sent = Tree('S', [])
for (tok,tag) in tagged_tokens:
if tag == 'O':
sent.append(tok)
elif tag.startswith('B-'):
sent.append(Tree(tag[2:], [tok]))
elif tag.startswith('I-'):
if (sent and isinstance(sent[-1], Tree) and
sent[-1].node == tag[2:]):
sent[-1].append(tok)
else:
sent.append(Tree(tag[2:], [tok]))
return sent
示例15: chunk
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import append [as 别名]
def chunk(self, tree, rule, depth):
ruleContents = rule.contents
ruleName = rule.type
if depth==0: #maximum recursion set by depth
return tree
children = tree.treepositions('postorder') #get tuples for all locations in tree
string = ""
parent = {}
subtrees = {} #key->new subtree to add to tree; value->location to place in treepositions()
dictBrothers = rule.find_brothers(children, parent) # returns a dict. of those children in the tree who have the same parent,
# such that a rule MIGHT apply to them
if dictBrothers == dict(): # no possible application of rule
return tree
#now we have dictBrothers which is a list of all children who have the same parent,
#we check to see which list of brothers corresponds to ruleContents
#such that tree will need to be altered at that location
for child in children:
# look for a child in tree for whom it both (1) has brothers and (2) rule applies (rule_to_children(tree, brothers, rule))
# otherwise, just "continue"
if not parent[child] in dictBrothers:
continue
tempBrothers = dictBrothers[parent[child]]
tuple = self.rule_to_children(tree, tempBrothers)
if tuple == (-1,-1):
continue
#found a rule applies for certain children
#now set up new tree
#and re-arrange tree to fit
#then recursively call chunker with depth-1
start = tuple[0]
end = tuple[1]
newTree = Tree("("+ruleName+")")
for i in range(end-start): #set up new tree
newChild = tempBrothers[i+start]
ruleList = ruleContents.split()
typeOf = type(tree[newChild])
if typeOf is Tree:
modifiedName = "<"+tree[newChild].node+">"
tree[newChild].node = modifiedName
else:
#ruleList = ruleContents.split()
#subst="-->"
#for i in range(len(rule)):
#subst+="<"+ruleList[i]+"> " #add this so we know how tree was derived
newTuple = (tree[newChild][0], "<"+str(tree[newChild][-1])+">")
tree[newChild] = newTuple
newTree.append(tree[newChild])
tree[tempBrothers[start]] = newTree #attach new tree at left-most child (start)
#then remove old children except for
#0/start, which is the new tree
for i in range(end-start):
if i != 0:
tree[tempBrothers[i+start]] = "REMOVE"
while "REMOVE" in tree:
tree.remove("REMOVE")
for subtree in tree.subtrees():
if "REMOVE" in subtree:
subtree.remove("REMOVE")
#now recursively chunk if there are more brothers
#to whom rule applies
if len(dictBrothers)>1 or len(dictBrothers[parent[child]])>len(ruleContents.split()):
return self.chunk(tree, rule, depth-1)
else:
return tree
#found no children for whom rule applies, so just return tree
return tree