本文整理汇总了Python中nltk.tree.Tree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.fromstring方法的具体用法?Python Tree.fromstring怎么用?Python Tree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.tree.Tree
的用法示例。
在下文中一共展示了Tree.fromstring方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def parse_tree(self, text, binary=False, preprocessed=False):
nlp_output = self.nlp.annotate(text, properties={
'annotators': 'tokenize,ssplit,pos,parse',
'outputFormat': 'json',
'parse.binaryTrees': 'true'
})
if type(nlp_output) == str:
nlp_output = json.loads(nlp_output, strict=False)
if len(nlp_output['sentences']) > 1:
#merge trees from sentences
tree_string = "(Top "
for s in nlp_output['sentences']:
p_tree = Tree.fromstring(s['parse'])
tree_string += str(p_tree[0])
tree_string += ")"
merged_tree = Tree.fromstring(tree_string)
else:
#no merging required
merged_tree = Tree.fromstring(nlp_output['sentences'][0]['parse'])
#remove root
merged_tree = merged_tree[0]
if binary:
nltk.treetransforms.chomsky_normal_form(merged_tree)
if preprocessed:
merged_tree = preprocess_parse_tree(merged_tree)
return merged_tree
示例2: removeNounMods
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeNounMods(tree):
tree_str = tsurgeon.remove_internal_mods(tree)
if tree_str != '':
tree = Tree.fromstring(tree_str)
tree_str = tsurgeon.remove_participle_mods(tree)
if tree_str != '':
tree = Tree.fromstring(tree_str)
return tree
示例3: parser_output_to_parse_deriv_trees
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def parser_output_to_parse_deriv_trees(output):
lines = output.strip().split("\n")
deriv_tree_lines = lines[::2]
parse_tree_lines = lines[1::2]
parse_trees = [Tree.fromstring(line.replace('\x06', 'epsilon_')) for line in parse_tree_lines if line != '']
deriv_trees = [Tree.fromstring(line) for line in deriv_tree_lines if line != '']
return parse_trees, deriv_trees
示例4: test_lbranch_parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def test_lbranch_parse(self):
model = LBranch([], 'S') # empty training set
trees = [model.parse(s) for s in self.tagged_sents]
trees2 = [
Tree.fromstring("""(S (S|<> (S|<> (S|<> (D El) (N gato)) (V come)) (N pescado)) (P .))"""),
Tree.fromstring("""(S (S|<> (S|<> (S|<> (D La) (N gata)) (V come)) (N salmón)) (P .))"""),
]
self.assertEqual(trees, trees2)
示例5: test_flat_parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def test_flat_parse(self):
model = Flat([], 'S') # empty training set
trees = [model.parse(s) for s in self.tagged_sents]
trees2 = [
Tree.fromstring("(S (D El) (N gato) (V come) (N pescado) (P .))"),
Tree.fromstring("(S (D La) (N gata) (V come) (N salmón) (P .))"),
]
self.assertEqual(trees, trees2)
示例6: extractParticiple
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def extractParticiple(tree):
part_mod = tsurgeon.hasParticipleMod(tree)
if part_mod != '':
subject = tsurgeon.findSubject(tree)
subject_words = Tree.fromstring(subject).leaves()
part_tree = Tree.fromstring(part_mod)
part_words = part_tree.leaves()
# Ignoring inflection
result_words = subject_words + ['is'] + part_words[1:]
sentence = ' '.join(result_words).strip() + '.'
return sentence
pass
示例7: test_productions
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def test_productions(self):
t = Tree.fromstring(
"""
(S
(NP (Det el) (Noun gato))
(VP (Verb come) (NP (Noun pescado) (Adj crudo)))
)
""")
# Bugfix from official test (, start='S')
model = UPCFG([t], start='S')
prods = model.productions()
prods2 = [
ProbabilisticProduction(N('S'), [N('NP'), N('VP')], prob=1.0),
ProbabilisticProduction(N('NP'), [N('Det'), N('Noun')], prob=0.5),
ProbabilisticProduction(N('Det'), ['Det'], prob=1.0),
ProbabilisticProduction(N('Noun'), ['Noun'], prob=1.0),
ProbabilisticProduction(N('VP'), [N('Verb'), N('NP')], prob=1.0),
ProbabilisticProduction(N('Verb'), ['Verb'], prob=1.0),
ProbabilisticProduction(N('NP'), [N('Noun'), N('Adj')], prob=0.5),
ProbabilisticProduction(N('Adj'), ['Adj'], prob=1.0),
]
self.assertEqual(set(prods), set(prods2))
示例8: removeLeadingMods
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeLeadingMods(tree):
tree_str = tsurgeon.remove_leading_mods(tree)
if tree_str != '':
new = Tree.fromstring(tree_str)
if new != tree:
return removeLeadingMods(new)
return tree
示例9: removeVerbMods
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def removeVerbMods(tree):
tree_str = tsurgeon.remove_verb_modifiers(tree)
if tree_str != '':
new = Tree.fromstring(tree_str)
if new != tree:
return removeVerbMods(new)
return tree
示例10: parse
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def parse(self, text):
"""
NOTE: since the Stanford tagger and parser libraries are case-sensitive, the casing of the output of this
method is preserved. Caller must remember to normalize the casing when conducting comparison
:param text: text to be parsed
:return: a SentenceParseResult object
}
"""
server = jsonrpc.ServerProxy(jsonrpc.JsonRpc20(),
jsonrpc.TransportTcpIp(addr=(CORENLP_SERVER_HOST, CORENLP_SERVER_PORT)))
parsed_sentences = loads(server.parse(text))['sentences']
if len(parsed_sentences) > 1:
raise Exception('Multi-sentence query is not supported')
parsed_sentence = parsed_sentences[0]
word_tokens = [ParsedWordToken(word_wire_format) for word_wire_format in parsed_sentence['words']]
# word_tokens = self._recover_contractions(word_tokens)
normalized_sentence = ' '.join([word_token.text for word_token in word_tokens])
parsed_tree = Tree.fromstring(parsed_sentence['parsetree'])
word_dependency = SentenceWordDependency(parsed_sentence['dependencies'])
return SentenceParseResult(word_tokens=word_tokens,
normalized_sentence=normalized_sentence,
parsed_tree=parsed_tree,
word_dependency=word_dependency)
示例11: tag_var_nodes
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def tag_var_nodes(vars_dir, trees_dir, tagged_dir):
"""
Tag variable nodes in tree
Tag variables nodes in trees with "_VAR:f:n:m:e" suffix where
f is the name of the parse file,
n is the tree number,
m is the variable's node number and
e is name of the pattern used for extracting this variable.
Will only output those trees containing at least two variables.
"""
# At first I used the tregex's '-f' option to print the filename,
# but when traversing the files in a directory,
# it prints the wrong filenames (after the first one?),
# so now the filename is encoded in the node label too.
tagged_dir = Path(tagged_dir)
tagged_dir.makedirs_p()
for vars_fname in Path(vars_dir).glob('*.json'):
d = defaultdict(list)
# create a dict mapping each tree number to a list of
# (nodeNumber, extractName) tuples for its variables
for record in json.load(vars_fname.open()):
pair = record['nodeNumber'], record['key']
d[record['treeNumber']].append(pair)
lemtree_fname = record['filename']
parses = (Path(trees_dir) / lemtree_fname).lines()
tagged_parses = []
for tree_number, pairs in d.items():
if len(pairs) > 1:
# tree numbers in records count from one
tree = Tree.fromstring(parses[tree_number - 1])
# get NLTK-style indices for all nodes in a preorder
# traversal of the tree
positions = tree.treepositions()
vars_count = 0
for node_number, key in pairs:
# node numbers in records count from one
position = positions[node_number - 1]
subtree = tree[position]
try:
subtree.set_label(
'{}_VAR_{}'.format(subtree.label(), key))
except AttributeError:
log.error('skipping variable "{}" because it is a leaf '
'node ({})'.format(subtree, key))
else:
vars_count += 1
if vars_count > 1:
tagged_parses.append(tree.pformat(margin=99999))
if tagged_parses:
tagged_fname = derive_path(lemtree_fname, new_dir=tagged_dir)
log.info('writing tagged trees to ' + tagged_fname)
tagged_fname.write_lines(tagged_parses)
示例12: question
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def question(inputstr):
entities = supersense_tag(inputstr)
# print("Supersense-tagging done")
entities.update(named_entities(inputstr))
# print("NER done")
main_tree = parser.raw_parse(inputstr).next()
# print("Parsing done")
'''
main_tree_str = save_embedded_clause(main_tree_str)
print(main_tree_str)
'''
main_tree_str = clean_sentence(main_tree)
# Tree.fromstring(main_tree_str).pprint()
# TODO: mark_unmovable_tags
main_tree = inverse_verb(main_tree_str)
sentence = str(' '.join(Tree.fromstring(main_tree_str).leaves()))
sentence_inversed = str(' '.join(main_tree.leaves()))
questions = []
prep = [] # use to store prep when traverse the tree
gen_question_recur(main_tree, sentence_inversed, sentence, questions, entities, prep)
questions = [cleanup_question(q) for q in questions]
questions.append(fix_output(main_tree))
return questions
示例13: add_tree
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def add_tree(self, datum):
# parse tree and binarize
tree = Tree.fromstring(datum["raw_tree"])
tree.chomsky_normal_form()
tree.collapse_unary(collapsePOS=True)
tree = ParentedTree.convert(tree)
# assign indices to subtrees
indices = {}
counter = 0
for t in tree.subtrees():
indices[t.treeposition()] = counter
counter += 1
# generate parent pointers and labels
# (labels = one instance of sent in sents by treelstm terminology)
parents = [0] * (counter - 1)
labels = []
counter = 0
for t in tree.subtrees():
parent = t.parent()
if parent != None:
parents[counter] = indices[parent.treeposition()]
counter += 1
if type(t[0]) is str or type(t[0]) is unicode: labels.append(t[0])
self.parents_file.write(" ".join(map(str, parents)) + "\n")
self.sents_file.write(" ".join(labels) + "\n")
self.trees.append(datum)
return len(self.trees) - 1 # ID
示例14: read_segtree_file
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def read_segtree_file(fn):
"""reads a string representing a discourse tree (from the seg.
annotation) and returns a list of its child tree objects"""
with codecs.open(fn, 'r', 'utf-8') as f:
s = f.read()
text_tree = Tree.fromstring(s, read_leaf=prefix_number_seg_token)
return [segment for segment in text_tree]
示例15: find_subtrees
# 需要导入模块: from nltk.tree import Tree [as 别名]
# 或者: from nltk.tree.Tree import fromstring [as 别名]
def find_subtrees(tree, depth):
"""
Returns all subtrees at a given depth
Arguments
---------
tree: either an nltk.tree.Tree or a PTB-formatted string
depth: the target depth
Returns
-------
list of nlt.tree.Tree objects representing the selected subtrees
>>> ptb_str = "(ROOT (S (NP (DT The) (VBG following)) (VP (VBP are) (NP (NP (JJ major) (NN news) (NNS items)) (PP (IN in) (NP (NP (VBG leading) (JJ Turkish) (NNS newspapers)) (PP (IN on) (NP (NNP Monday))))))) (. .)))"
>>> ptb_tree = Tree.fromstring(ptb_str)
>>> subtrees = find_subtrees(ptb_str, 2) # find_subtrees accepts strings
>>> [t.label() for t in subtrees] # and it returns a list of subtrees (ojbects of the kind nlt.tree.Tree)
['NP', 'VP', '.']
>>> subtrees = find_subtrees(ptb_tree, 3) # and trees
>>> [t.label() for t in subtrees]
['DT', 'VBG', 'VBP', 'NP']
>>> subtrees = find_subtrees(ptb_tree, 4)
>>> [t.label() for t in subtrees]
['NP', 'PP']
"""
if isinstance(tree, str):
tree = Tree.fromstring(tree)
subtrees = []
_find_subtrees(tree, 0, depth, subtrees)
return subtrees