本文整理汇总了Python中nltk.Tree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.fromstring方法的具体用法?Python Tree.fromstring怎么用?Python Tree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree
的用法示例。
在下文中一共展示了Tree.fromstring方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: leaves
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def leaves(self):
"""
Return the leaves of the tree.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.leaves()
['the', 'dog', 'chased', 'the', 'cat']
:return: a list containing this tree's leaves.
The order reflects the order of the
leaves in the tree's hierarchical structure.
:rtype: list
"""
leaves = []
for child in self:
if isinstance(child, Tree):
leaves.extend(child.leaves())
else:
leaves.append(child)
return leaves
示例2: subtrees
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def subtrees(self, filter=None):
"""
Generate all the subtrees of this tree, optionally restricted
to trees matching the filter function.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> for s in t.subtrees(lambda t: t.height() == 2):
... print(s)
(D the)
(N dog)
(V chased)
(D the)
(N cat)
:type filter: function
:param filter: the function to filter all local trees
"""
if not filter or filter(self):
yield self
for child in self:
if isinstance(child, Tree):
for subtree in child.subtrees(filter):
yield subtree
示例3: productions
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def productions(self):
"""
Generate the productions that correspond to the non-terminal nodes of the tree.
For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
form P -> C1 C2 ... Cn.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.productions()
[S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
NP -> D N, D -> 'the', N -> 'cat']
:rtype: list(Production)
"""
if not isinstance(self._label, string_types):
raise TypeError('Productions can only be generated from trees having node labels that are strings')
prods = [Production(Nonterminal(self._label), _child_names(self))]
for child in self:
if isinstance(child, Tree):
prods += child.productions()
return prods
示例4: pos
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def pos(self):
"""
Return a sequence of pos-tagged words extracted from the tree.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.pos()
[('the', 'D'), ('dog', 'N'), ('chased', 'V'), ('the', 'D'), ('cat', 'N')]
:return: a list of tuples containing leaves and pre-terminals (part-of-speech tags).
The order reflects the order of the leaves in the tree's hierarchical structure.
:rtype: list(tuple)
"""
pos = []
for child in self:
if isinstance(child, Tree):
pos.extend(child.pos())
else:
pos.append((child, self._label))
return pos
示例5: main
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def main():
args = parse_args()
parser = RstParser()
parser.load('../data/model')
with gzip.open('../data/resources/bc3200.pickle.gz') as fin:
print('Load Brown clusters for creating features ...')
brown_clusters = pickle.load(fin)
core_nlp = StanfordCoreNLP('http://localhost:9000')
annotate = lambda x: core_nlp.annotate(x, properties={
'annotators': 'tokenize,ssplit,pos,lemma,parse,depparse',
'outputFormat': 'json',
'ssplit.isOneSentence': True
})
edu_file_list = [os.path.join(args.edu_file_dir, fname) for fname in os.listdir(args.edu_file_dir) if fname.endswith('.edu.txt')]
for edu_file in edu_file_list:
print('Parsing {}...'.format(edu_file))
doc = create_doc_from_edu_file(edu_file, annotate_func=annotate)
pred_rst = parser.sr_parse(doc, brown_clusters)
tree_str = pred_rst.get_parse()
pprint_tree_str = Tree.fromstring(tree_str).pformat(margin=150)
with open(os.path.join(args.output_dir, os.path.basename(edu_file) + '.parse'), 'w') as fout:
fout.write(pprint_tree_str)
示例6: add_words
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def add_words(self, file_name):
# Add words to the dictionary
f_in = open(file_name, 'r')
for line in f_in:
if line.strip() == '':
continue
data = eval(line)
sen_tree = Tree.fromstring(data['sentence1_parse'])
words = self.filter_words(sen_tree)
words = ['<s>'] + words + ['</s>']
for word in words:
self.dictionary.add_word(word)
sen_tree = Tree.fromstring(data['sentence2_parse'])
words = self.filter_words(sen_tree)
words = ['<s>'] + words + ['</s>']
for word in words:
self.dictionary.add_word(word)
f_in.close()
示例7: main
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def main():
infilename = os.path.join(os.path.dirname(__file__),'வெண்பாinput.txt')
outfilename = os.path.join(os.path.dirname(__file__),'வெண்பாoutput.txt')
data = open(infilename).read()
input_stream = antlr4.InputStream(data)
lexer = வெண்பாLexer(input_stream)
stream = CommonTokenStream(lexer)
parser = வெண்பாParser(stream)
tree = parser.வெண்பா()
# print(tree.toStringTree())
strtree = Trees.toStringTree(tree, None, parser)
print(strtree)
t = nltkTree.fromstring(strtree)
# t.pretty_print()
a = TreePrettyPrinter(t).text()
print (a)
# t.pprint(margin=70, indent=0, nodesep=u'', parens=u'()', quotes=False)
# pprint(Trees.toStringTree(tree, None, parser), width=20, indent=4)
with open(outfilename, 'w', encoding='utf8') as f:
f.write( a)
示例8: productions
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def productions(self):
"""
Generate the productions that correspond to the non-terminal nodes of the tree.
For each subtree of the form (P: C1 C2 ... Cn) this produces a production of the
form P -> C1 C2 ... Cn.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.productions()
[S -> NP VP, NP -> D N, D -> 'the', N -> 'dog', VP -> V NP, V -> 'chased',
NP -> D N, D -> 'the', N -> 'cat']
:rtype: list(Production)
"""
if not isinstance(self._label, string_types):
raise TypeError(
'Productions can only be generated from trees having node labels that are strings'
)
prods = [Production(Nonterminal(self._label), _child_names(self))]
for child in self:
if isinstance(child, Tree):
prods += child.productions()
return prods
示例9: label
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def label(self):
"""
Return the node label of the tree.
>>> t = Tree.fromstring('(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))')
>>> t.label()
'S'
:return: the node label (typically a string)
:rtype: any
"""
return self._label
示例10: set_label
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def set_label(self, label):
"""
Set the node label of the tree.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.set_label("T")
>>> print(t)
(T (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))
:param label: the node label (typically a string)
:type label: any
"""
self._label = label
示例11: flatten
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def flatten(self):
"""
Return a flat version of the tree, with all non-root non-terminals removed.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> print(t.flatten())
(S the dog chased the cat)
:return: a tree consisting of this tree's root connected directly to
its leaves, omitting all intervening non-terminal nodes.
:rtype: Tree
"""
return Tree(self.label(), self.leaves())
示例12: height
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def height(self):
"""
Return the height of the tree.
>>> t = Tree.fromstring("(S (NP (D the) (N dog)) (VP (V chased) (NP (D the) (N cat))))")
>>> t.height()
5
>>> print(t[0,0])
(D the)
>>> t[0,0].height()
2
:return: The height of this tree. The height of a tree
containing no children is 1; the height of a tree
containing only leaves is 2; and the height of any other
tree is one plus the maximum of its children's
heights.
:rtype: int
"""
max_child_height = 0
for child in self:
if isinstance(child, Tree):
max_child_height = max(max_child_height, child.height())
else:
max_child_height = max(max_child_height, 1)
return 1 + max_child_height
示例13: sinica_parse
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def sinica_parse(s):
"""
Parse a Sinica Treebank string and return a tree. Trees are represented as nested brackettings,
as shown in the following example (X represents a Chinese character):
S(goal:NP(Head:Nep:XX)|theme:NP(Head:Nhaa:X)|quantity:Dab:X|Head:VL2:X)#0(PERIODCATEGORY)
:return: A tree corresponding to the string representation.
:rtype: Tree
:param s: The string to be converted
:type s: str
"""
tokens = re.split(r'([()| ])', s)
for i in range(len(tokens)):
if tokens[i] == '(':
tokens[i-1], tokens[i] = tokens[i], tokens[i-1] # pull nonterminal inside parens
elif ':' in tokens[i]:
fields = tokens[i].split(':')
if len(fields) == 2: # non-terminal
tokens[i] = fields[1]
else:
tokens[i] = "(" + fields[-2] + " " + fields[-1] + ")"
elif tokens[i] == '|':
tokens[i] = ''
treebank_string = " ".join(tokens)
return Tree.fromstring(treebank_string, remove_empty_top_bracketing=True)
# s = re.sub(r'^#[^\s]*\s', '', s) # remove leading identifier
# s = re.sub(r'\w+:', '', s) # remove role tags
# return s
######################################################################
## Demonstration
######################################################################
示例14: process
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def process(self, data_bundle: DataBundle):
r"""
对DataBundle中的数据进行预处理。输入的DataSet应该至少拥有raw_words这一列,且内容类似与
.. csv-table:: 下面是使用SSTLoader读取的DataSet所具备的field
:header: "raw_words"
"(2 (3 (3 Effective) (2 but)) (1 (1 too-tepid)..."
"(3 (3 (2 If) (3 (2 you) (3 (2 sometimes) ..."
"..."
:param ~fastNLP.io.DataBundle data_bundle: 需要处理的DataBundle对象
:return:
"""
# 先取出subtree
for name in list(data_bundle.datasets.keys()):
dataset = data_bundle.get_dataset(name)
ds = DataSet()
use_subtree = self.subtree or (name == 'train' and self.train_tree)
for ins in dataset:
raw_words = ins[Const.RAW_WORD]
tree = Tree.fromstring(raw_words)
if use_subtree:
for t in tree.subtrees():
raw_words = " ".join(t.leaves())
instance = Instance(raw_words=raw_words, target=t.label())
ds.append(instance)
else:
instance = Instance(raw_words=' '.join(tree.leaves()), target=tree.label())
ds.append(instance)
data_bundle.set_dataset(ds, name)
# 根据granularity设置tag
data_bundle = _granularize(data_bundle, tag_map=self.tag_map)
data_bundle = super().process(data_bundle)
return data_bundle
示例15: _get_one
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import fromstring [as 别名]
def _get_one(data, subtree):
tree = Tree.fromstring(data)
if subtree:
return [(t.leaves(), t.label()) for t in tree.subtrees()]
return [(tree.leaves(), tree.label())]