本文整理汇总了Python中nltk.tree方法的典型用法代码示例。如果您正苦于以下问题:Python nltk.tree方法的具体用法?Python nltk.tree怎么用?Python nltk.tree使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk
的用法示例。
在下文中一共展示了nltk.tree方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: tree2conlltags
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def tree2conlltags(t):
"""
Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
Convert a tree to the CoNLL IOB tag format.
:param t: The tree to be converted.
:type t: Tree
:rtype: list(tuple)
"""
tags = []
for child in t:
try:
category = child.label()
prefix = "B-"
for contents in child:
if isinstance(contents, Tree):
raise ValueError("Tree is too deeply nested to be printed in CoNLL format")
tags.append((contents[0], contents[1], prefix+category))
prefix = "I-"
except AttributeError:
tags.append((child[0], child[1], "O"))
return tags
示例2: ieer_headlines
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def ieer_headlines():
from nltk.corpus import ieer
from nltk.tree import Tree
print("IEER: First 20 Headlines")
print("=" * 45)
trees = [(doc.docno, doc.headline) for file in ieer.fileids() for doc in ieer.parsed_docs(file)]
for tree in trees[:20]:
print()
print("%s:\n%s" % tree)
#############################################
## Dutch CONLL2002: take_on_role(PER, ORG
#############################################
示例3: tree2conlltags
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def tree2conlltags(t):
"""
Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
Convert a tree to the CoNLL IOB tag format.
:param t: The tree to be converted.
:type t: Tree
:rtype: list(tuple)
"""
tags = []
for child in t:
try:
category = child.node
prefix = "B-"
for contents in child:
if isinstance(contents, Tree):
raise ValueError, "Tree is too deeply nested to be printed in CoNLL format"
tags.append((contents[0], contents[1], prefix+category))
prefix = "I-"
except AttributeError:
tags.append((child[0], child[1], "O"))
return tags
示例4: _trace_production
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def _trace_production(self, production, p, span, width):
"""
Print trace output indicating that a given production has been
applied at a given location.
:param production: The production that has been applied
:type production: Production
:param p: The probability of the tree produced by the production.
:type p: float
:param span: The span of the production
:type span: tuple
:rtype: None
"""
str = '|' + '.' * span[0]
str += '=' * (span[1] - span[0])
str += '.' * (width - span[1]) + '| '
str += '%s' % production
if self._trace > 2: str = '%-40s %12.10f ' % (str, p)
print str
示例5: ieer_headlines
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def ieer_headlines():
from nltk.corpus import ieer
from nltk.tree import Tree
print "IEER: First 20 Headlines"
print "=" * 45
trees = [doc.headline for file in ieer.fileids() for doc in ieer.parsed_docs(file)]
for tree in trees[:20]:
print
print "%s:\n%s" % (doc.docno, tree)
#############################################
## Dutch CONLL2002: take_on_role(PER, ORG
#############################################
示例6: tree2conlltags
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def tree2conlltags(t):
"""
Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
Convert a tree to the CoNLL IOB tag format.
:param t: The tree to be converted.
:type t: Tree
:rtype: list(tuple)
"""
tags = []
for child in t:
try:
category = child.label()
prefix = "B-"
for contents in child:
if isinstance(contents, Tree):
raise ValueError(
"Tree is too deeply nested to be printed in CoNLL format"
)
tags.append((contents[0], contents[1], prefix + category))
prefix = "I-"
except AttributeError:
tags.append((child[0], child[1], "O"))
return tags
示例7: ieer_headlines
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def ieer_headlines():
from nltk.corpus import ieer
from nltk.tree import Tree
print("IEER: First 20 Headlines")
print("=" * 45)
trees = [
(doc.docno, doc.headline)
for file in ieer.fileids()
for doc in ieer.parsed_docs(file)
]
for tree in trees[:20]:
print()
print("%s:\n%s" % tree)
#############################################
## Dutch CONLL2002: take_on_role(PER, ORG
#############################################
示例8: accuracy
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def accuracy(chunker, gold):
"""
Score the accuracy of the chunker against the gold standard.
Strip the chunk information from the gold standard and rechunk it using
the chunker, then compute the accuracy score.
:type chunker: ChunkParserI
:param chunker: The chunker being evaluated.
:type gold: tree
:param gold: The chunk structures to score the chunker on.
:rtype: float
"""
gold_tags = []
test_tags = []
for gold_tree in gold:
test_tree = chunker.parse(gold_tree.flatten())
gold_tags += tree2conlltags(gold_tree)
test_tags += tree2conlltags(test_tree)
# print 'GOLD:', gold_tags[:50]
# print 'TEST:', test_tags[:50]
return _accuracy(gold_tags, test_tags)
# Patched for increased performance by Yoav Goldberg <yoavg@cs.bgu.ac.il>, 2006-01-13
# -- statistics are evaluated only on demand, instead of at every sentence evaluation
#
# SB: use nltk.metrics for precision/recall scoring?
#
示例9: conlltags2tree
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
root_label='S', strict=False):
"""
Convert the CoNLL IOB format to a tree.
"""
tree = Tree(root_label, [])
for (word, postag, chunktag) in sentence:
if chunktag is None:
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as O
tree.append((word,postag))
elif chunktag.startswith('B-'):
tree.append(Tree(chunktag[2:], [(word,postag)]))
elif chunktag.startswith('I-'):
if (len(tree)==0 or not isinstance(tree[-1], Tree) or
tree[-1].label() != chunktag[2:]):
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as B-*
tree.append(Tree(chunktag[2:], [(word,postag)]))
else:
tree[-1].append((word,postag))
elif chunktag == 'O':
tree.append((word,postag))
else:
raise ValueError("Bad conll tag %r" % chunktag)
return tree
示例10: _untag
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def _untag(self, tree):
for i, child in enumerate(tree):
if isinstance(child, Tree):
self._untag(child)
elif isinstance(child, tuple):
tree[i] = child[0]
else:
raise ValueError('expected child to be Tree or tuple')
return tree
示例11: tree2semi_rel
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def tree2semi_rel(tree):
"""
Group a chunk structure into a list of 'semi-relations' of the form (list(str), ``Tree``).
In order to facilitate the construction of (``Tree``, string, ``Tree``) triples, this
identifies pairs whose first member is a list (possibly empty) of terminal
strings, and whose second member is a ``Tree`` of the form (NE_label, terminals).
:param tree: a chunk tree
:return: a list of pairs (list(str), ``Tree``)
:rtype: list of tuple
"""
from nltk.tree import Tree
semi_rels = []
semi_rel = [[], None]
for dtr in tree:
if not isinstance(dtr, Tree):
semi_rel[0].append(dtr)
else:
# dtr is a Tree
semi_rel[1] = dtr
semi_rels.append(semi_rel)
semi_rel = [[], None]
return semi_rels
示例12: get_object
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def get_object(self, sub_tree):
"""
Returns an Object with all attributes of an object
"""
siblings = self.pred_verb_phrase_siblings
Object = None
for each_tree in sub_tree:
if each_tree.label() in ["NP", "PP"]:
sub_nodes = each_tree.subtrees()
sub_nodes = [each for each in sub_nodes if each.pos()]
for each in sub_nodes:
if each.label() in self.noun_types:
Object = each.leaves()
break
break
else:
sub_nodes = each_tree.subtrees()
sub_nodes = [each for each in sub_nodes if each.pos()]
for each in sub_nodes:
if each.label() in self.adjective_types:
Object = each.leaves()
break
# Get first noun in the tree
self.pred_verb_phrase_siblings = None
return {'object': Object}
示例13: conlltags2tree
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def conlltags2tree(sentence, chunk_types=('NP','PP','VP'),
top_node='S', strict=False):
"""
Convert the CoNLL IOB format to a tree.
"""
tree = Tree(top_node, [])
for (word, postag, chunktag) in sentence:
if chunktag is None:
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as O
tree.append((word,postag))
elif chunktag.startswith('B-'):
tree.append(Tree(chunktag[2:], [(word,postag)]))
elif chunktag.startswith('I-'):
if (len(tree)==0 or not isinstance(tree[-1], Tree) or
tree[-1].node != chunktag[2:]):
if strict:
raise ValueError("Bad conll tag sequence")
else:
# Treat as B-*
tree.append(Tree(chunktag[2:], [(word,postag)]))
else:
tree[-1].append((word,postag))
elif chunktag == 'O':
tree.append((word,postag))
else:
raise ValueError("Bad conll tag %r" % chunktag)
return tree
示例14: parse
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def parse(self, tokens):
# Inherit docs from ParserI
tokens = list(tokens)
self._grammar.check_coverage(tokens)
# The most likely constituent table. This table specifies the
# most likely constituent for a given span and type.
# Constituents can be either Trees or tokens. For Trees,
# the "type" is the Nonterminal for the tree's root node
# value. For Tokens, the "type" is the token's type.
# The table is stored as a dictionary, since it is sparse.
constituents = {}
# Initialize the constituents dictionary with the words from
# the text.
if self._trace: print ('Inserting tokens into the most likely'+
' constituents table...')
for index in range(len(tokens)):
token = tokens[index]
constituents[index,index+1,token] = token
if self._trace > 1:
self._trace_lexical_insertion(token, index, len(tokens))
# Consider each span of length 1, 2, ..., n; and add any trees
# that might cover that span to the constituents dictionary.
for length in range(1, len(tokens)+1):
if self._trace:
print ('Finding the most likely constituents'+
' spanning %d text elements...' % length)
for start in range(len(tokens)-length+1):
span = (start, start+length)
self._add_constituents_spanning(span, constituents,
tokens)
# Return the tree that spans the entire text & have the right cat
return constituents.get((0, len(tokens), self._grammar.start()))
示例15: _find_instantiations
# 需要导入模块: import nltk [as 别名]
# 或者: from nltk import tree [as 别名]
def _find_instantiations(self, span, constituents):
"""
:return: a list of the production instantiations that cover a
given span of the text. A "production instantiation" is
a tuple containing a production and a list of children,
where the production's right hand side matches the list of
children; and the children cover ``span``. :rtype: list
of ``pair`` of ``Production``, (list of
(``ProbabilisticTree`` or token.
:type span: tuple(int, int)
:param span: The section of the text for which we are
trying to find production instantiations. The span is
specified as a pair of integers, where the first integer
is the index of the first token that should be covered by
the production instantiation; and the second integer is
the index of the first token that should not be covered by
the production instantiation.
:type constituents: dict(tuple(int,int,Nonterminal) -> ProbabilisticToken or ProbabilisticTree)
:param constituents: The most likely constituents table. This
table records the most probable tree representation for
any given span and node value. See the module
documentation for more information.
"""
rv = []
for production in self._grammar.productions():
childlists = self._match_rhs(production.rhs(), span, constituents)
for childlist in childlists:
rv.append( (production, childlist) )
return rv