本文整理汇总了Python中nltk.Tree.leaves方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.leaves方法的具体用法?Python Tree.leaves怎么用?Python Tree.leaves使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.Tree
的用法示例。
在下文中一共展示了Tree.leaves方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_sentence_and_indexes
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def get_sentence_and_indexes(parsed_sentence):
sentence_tree = Tree(parsed_sentence)
if sentence_tree.node == bitpar_top: #remove designated TOP-symbol
sentence_tree = sentence_tree[0]
rlist = [0]*len(sentence_tree.leaves())
slist = [""]*len(sentence_tree.leaves())
get_sentence_and_indexes_rec_helper(sentence_tree, rlist, slist)
reordered_sentence = " ".join(slist)
return reordered_sentence, rlist
示例2: extract_itg
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def extract_itg(alignments_file_name, parses_file_name, inv_extension):
"""Extract a inversion transduction grammar (ITG)
from the given files.
Keyword arguments:
alignments_file_name -- name of file containing alignments
between sentences in l1_file_name and l2_file_name
parses_file_name -- name of file containing parse trees
of the sentences in l1_file_name
inv_extension -- extension denoting whether a node is inverted
Returns a Counter of binary ITG rules and unary rules. Each ITG rule is
represented as the tuple (lhs, rhs), where rhs is a tuple of nodes."""
binary_itg = Counter()
unary_itg = Counter()
num_lines = number_of_lines(parses_file_name)
alignments_file = open(alignments_file_name)
parses_file = open(parses_file_name)
for i, l1_parse in enumerate(parses_file):
if i % (num_lines/100) is 0:
sys.stdout.write('\r%d%%' % (i*100/num_lines,))
sys.stdout.flush()
try: # TODO remove try/catch
reordered_indexes = str_to_reordered_indexes(alignments_file.next())
# remove outer brackets from Berkeley parse
l1_parse = l1_parse.strip()
l1_parse = l1_parse[1:len(l1_parse)-1]
l1_parse = l1_parse.strip()
parse_tree = Tree(l1_parse)
parse_forest = generate_forest(parse_tree,
reordered_indexes, inv_extension)
except:
error_log = open('error.log', 'a')
error_log.write('%s -- in extract_itg/3\n' % time.asctime())
error_log.write('line: %s\n' % i)
error_log.write('%s\n' % l1_parse.strip())
error_log.write('%s\n' % reordered_indexes)
error_log.write('\n')
error_log.close()
print 'Error in extract_itg/3. See error.log'
raise
binary_rules, unary_rules = extract_rules(parse_forest,
parse_tree.leaves())
for rule in binary_rules:
binary_itg[rule] += 1
for rule in unary_rules:
unary_itg[rule] += 1
alignments_file.close()
parses_file.close()
return binary_itg, unary_itg
示例3: _build_hierplane_tree
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def _build_hierplane_tree(self, tree: Tree, index: int, is_root: bool) -> JsonDict:
"""
Recursively builds a JSON dictionary from an NLTK ``Tree`` suitable for
rendering trees using the `Hierplane library<https://allenai.github.io/hierplane/>`.
Parameters
----------
tree : ``Tree``, required.
The tree to convert into Hierplane JSON.
index : int, required.
The character index into the tree, used for creating spans.
is_root : bool
An indicator which allows us to add the outer Hierplane JSON which
is required for rendering.
Returns
-------
A JSON dictionary render-able by Hierplane for the given tree.
"""
children = []
for child in tree:
if isinstance(child, Tree):
# If the child is a tree, it has children,
# as NLTK leaves are just strings.
children.append(self._build_hierplane_tree(child, index, is_root=False))
else:
# We're at a leaf, so add the length of
# the word to the character index.
index += len(child)
label = tree.label()
span = " ".join(tree.leaves())
hierplane_node = {
"word": span,
"nodeType": label,
"attributes": [label],
"link": label
}
if children:
hierplane_node["children"] = children
# TODO(Mark): Figure out how to span highlighting to the leaves.
if is_root:
hierplane_node = {
"linkNameToLabel": LINK_TO_LABEL,
"nodeTypeToStyle": NODE_TYPE_TO_STYLE,
"text": span,
"root": hierplane_node
}
return hierplane_node
示例4: convert_psd_sent_2_segmentation_2
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
def convert_psd_sent_2_segmentation_2(parsed_corpus):
SegCorpus=[]
for top_k_psd_of_the_sent in parsed_corpus:
segmentation=[]
best_score, best_parse_tree_str= top_k_psd_of_the_sent[0]
tree=Tree(best_parse_tree_str)
# tree=ROOT, tree[0]=S, tree[0, ] is the subtrees of S, i.e. POS tags, we can use alternative methods
# note that it is highly dependent on the format of the parser outputs!!
for subtree in tree.subtrees(lambda t: t.height()==tree.height()-2):
segmentation.append(''.join(subtree.leaves()))
SegCorpus.append(segmentation)
if not ''.join(segmentation)==''.join(tree.leaves()):
print('Error! Leaves/characters in thee segmentation != total characters in the tree (as leaves), Double check the format and/or code!')
break
return SegCorpus
示例5: print
# 需要导入模块: from nltk import Tree [as 别名]
# 或者: from nltk.Tree import leaves [as 别名]
f=codecs.open(path_annotation, 'rU', 'utf-8')
lines=f.readlines()
f.close()
Production=[]
count=0
total_nth=int(len(lines)/10)
for line in lines:
if count%total_nth==0:
print(count/total_nth*10, '% finished')
count +=1
tree=Tree(line.strip())
tag, subscript=decompose_tag(tree.node)
word=''.join(tree.leaves())
word_pos2tree_str[(word, tag)]=line.strip()
print('done!')
#
# gen single-char annotation from the corpus
#
print('\n\ngenerating rules for single-char words from corpus')
#---> one needs to run 2a_gen_tag_set_for_word_type.py to gen word2newtag.pickle before using it
path_word2newtag='../working_data/word2newtag.pickle'