本文整理汇总了Python中nltk.tree.Tree.label方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.label方法的具体用法?Python Tree.label怎么用?Python Tree.label使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tree.Tree
的用法示例。
在下文中一共展示了Tree.label方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: attach_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def attach_tree(head,dep,attachment,chain,indexes,flag,coindex=None):
#head,dep: trees; flag: 'right'/'left'
""" attach dep's projection chain to head's projection chain """
if isinstance(coindex,int): # handle coindex tag
label = attachment['label2']
offset = attachment['offset2']
dep = Tree(dep.label(),['*-'+str(coindex)])
else:
label = attachment['label']
offset = attachment['offset']
l_index = [l[0] for l in chain[0]].index(label)
count = sum([l[1] for l in chain[0]][:l_index+1])-offset
if flag=='right':
a_index = indexes[count-1]+1
elif flag=='left':
a_index = indexes[count-1]
indexes[count-1] += 1
else:
return "Invalid flag!"
if head.label()=='PRN':
s = 'head[0]'
else:
s = 'head'
for i in range(count-1):
s += '['+str(indexes[i])+']'
eval(s+'.insert('+str(a_index)+',dep)') # insert() vs pop()
if 'f_tag' in attachment:
if attachment['f_tag'] not in {'PRD','PRDs'}:
eval(s+'.set_label('+s+'.label()+"-"+attachment["f_tag"])')
else:
s += '['+str(indexes[count-1])+']'
eval(s+'.set_label('+s+'.label()+"-"+attachment["f_tag"])')
return head,indexes
示例2: munge
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def munge(t):
if type(t) == Tree:
toks = t.leaves()
t = Tree(t.label(), [munge(child) for child in t])
setattr(t, "tokens", toks)
return t
else:
return Tree(t, [])
示例3: _strip_functional_tags
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def _strip_functional_tags(self, tree: Tree) -> None:
"""
Removes all functional tags from constituency labels in an NLTK tree.
We also strip off anything after a =, - or | character, because these
are functional tags which we don't want to use.
This modification is done in-place.
"""
clean_label = tree.label().split("=")[0].split("-")[0].split("|")[0]
tree.set_label(clean_label)
for child in tree:
if not isinstance(child[0], str):
self._strip_functional_tags(child)
示例4: build_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def build_tree(node,chain): # -> handle function tags
""" -> PS tree of node's projection chain """
preterminal = node['tag']
if 'lemma' in node: # not a trace-node
if (node['lemma'].lower() in wh_lemmas) and \
node['tag']!='CONJ': #WH feature
preterminal += '-WH'
output = Tree(preterminal,[node['word']])
for l in chain[0][::-1]:
for i in range(l[1]):
output = Tree(l[0],[output])
if chain[1]:
if chain[1]=='PRN':
output = Tree(chain[1],[output])
else:
output.set_label(output.label()+'-'+chain[1])
return output
示例5: _get_gold_spans
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def _get_gold_spans(self, # pylint: disable=arguments-differ
tree: Tree,
index: int,
typed_spans: Dict[Tuple[int, int], str]) -> int:
"""
Recursively construct the gold spans from an nltk ``Tree``.
Labels are the constituents, and in the case of nested constituents
with the same spans, labels are concatenated in parent-child order.
For example, ``(S (NP (D the) (N man)))`` would have an ``S-NP`` label
for the outer span, as it has both ``S`` and ``NP`` label.
Spans are inclusive.
TODO(Mark): If we encounter a gold nested labelling at test time
which we haven't encountered, we won't be able to run the model
at all.
Parameters
----------
tree : ``Tree``, required.
An NLTK parse tree to extract spans from.
index : ``int``, required.
The index of the current span in the sentence being considered.
typed_spans : ``Dict[Tuple[int, int], str]``, required.
A dictionary mapping spans to span labels.
Returns
-------
typed_spans : ``Dict[Tuple[int, int], str]``.
A dictionary mapping all subtree spans in the parse tree
to their constituency labels. POS tags are ignored.
"""
# NLTK leaves are strings.
if isinstance(tree[0], str):
# The "length" of a tree is defined by
# NLTK as the number of children.
# We don't actually want the spans for leaves, because
# their labels are POS tags. Instead, we just add the length
# of the word to the end index as we iterate through.
end = index + len(tree)
else:
# otherwise, the tree has children.
child_start = index
for child in tree:
# typed_spans is being updated inplace.
end = self._get_gold_spans(child, child_start, typed_spans)
child_start = end
# Set the end index of the current span to
# the last appended index - 1, as the span is inclusive.
span = (index, end - 1)
current_span_label = typed_spans.get(span)
if current_span_label is None:
# This span doesn't have nested labels, just
# use the current node's label.
typed_spans[span] = tree.label()
else:
# This span has already been added, so prepend
# this label (as we are traversing the tree from
# the bottom up).
typed_spans[span] = tree.label() + "-" + current_span_label
return end
示例6: traverse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import label [as 别名]
def traverse(node):
def extract_tags(W):
pos = [W.getAttribute('lc') if W.getAttribute('lc') else None]
if W.getAttribute('clitic') in {'ezafe', 'pronominal', 'verb', 'prep', 'adv', 'det'}:
pos.append(W.getAttribute('clitic'))
if W.getAttribute('ne_sort'):
pos.append(W.getAttribute('ne_sort'))
if W.getAttribute('n_type'):
pos.append(W.getAttribute('n_type'))
if W.getAttribute('ya_type'):
pos.append(W.getAttribute('ya_type'))
if W.getAttribute('ke_type'):
pos.append(W.getAttribute('ke_type'))
if W.getAttribute('type'):
pos.append(W.getAttribute('type'))
if W.getAttribute('kind'):
pos.append(W.getAttribute('kind'))
return pos
def clitic_join(tree, clitic):
if type(tree[-1]) == Tree:
return clitic_join(tree[-1], clitic)
else:
if(clitic[0][0][0] == 'ا'):
clitic[0] = ('' + clitic[0][0], clitic[0][1])
tree[-1]=(tree[-1][0] + clitic[0][0], clitic[0][1])
tree.set_label('CLITICS')
return
if not len(node.childNodes):
return
first = node.childNodes[0]
if first.tagName == 'w':
pos=extract_tags(first)
return Tree(node.tagName, [(first.childNodes[0].data.replace('می ', 'می'), self._pos_map(pos))])
childs = node.childNodes[2:] if node.tagName == 'S' else node.childNodes
for child in childs:
if not len(child.childNodes):
childs.remove(child)
tree = Tree(node.tagName, map(traverse, childs))
if self._join_clitics and len(tree) > 1 and type(tree[1]) == Tree and tree[1].label() == 'CLITIC' and tree[1][0][1] not in {'P', 'V'}:
clitic=tree[-1]
tree = Tree(tree.label(), [subtree for subtree in tree[0]])
clitic_join(tree, clitic)
if self._join_verb_parts and len(tree) > 1 and type(tree[1]) == Tree and type(tree[0]) == Tree and tree[0].label() == 'AUX' and tree[0][0][0] in self._tokenizer.before_verbs:
tree[1][0] = (tree[0][0][0] + ' ' + tree[1][0][0], tree[1][0][1])
tree.remove(tree[0])
if self._join_verb_parts and len(tree.leaves()) > 1 and tree.leaves()[-1][0] in self._tokenizer.after_verbs and tree.leaves()[-2][0] in self._tokenizer.verbe :
tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0], tree[1][0][1])
path = tree.leaf_treeposition(len(tree.leaves())-2)
removingtree = tree
while len(path) > 2 :
removingtree = removingtree[path[0]]
path = path[1:]
removingtree.remove(Tree(tree.pos()[-2][1],[tree.pos()[-2][0]]))
if self._join_verb_parts and len(tree.leaves()) > 1 and tree.leaves()[-1][0] in self._tokenizer.after_verbs and tree.leaves()[-2][0] in self._tokenizer.verbe :
tree[1][0] = (tree[0].leaves()[-1][0] + ' ' + tree[1][0][0], tree[1][0][1])
path = tree.leaf_treeposition(len(tree.leaves())-2)
removingtree = tree
while len(path) > 2 :
removingtree = removingtree[path[0]]
path = path[1:]
removingtree.remove(Tree(tree.pos()[-2][1],[tree.pos()[-2][0]]))
return tree