本文整理汇总了Python中tree.Tree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.fromstring方法的具体用法?Python Tree.fromstring怎么用?Python Tree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tree.Tree
的用法示例。
在下文中一共展示了Tree.fromstring方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generate_rule
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_rule(treebank_file):
# if you use unicode here, there is a bug...
f = open(treebank_file, "r")
pos_set = set([])
full_rule_set = set([])
s_ind = 0
for sentence in f:
if language_setting == "chn":
sentence = sentence.decode('utf-8')
s_ind += 1
if s_ind % 100 == 0:
sys.stderr.write(str(s_ind) + "..")
tree = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
preterminals = [t.label() for t in tree.subtrees(lambda t: t.height() == 2)]
pos_set.update(preterminals)
# First, collapse the unary, notice that the POS tags should not be affected
if unary_collapse:
NewTree.collapse_unary(tree)
bt = NewTree.get_binarize_lex(tree)
# Extract rules from the tree
rule_set = NewTree.generate_rules(bt)
# Add them to the full set
for sr in rule_set:
full_rule_set.add(sr)
sys.stderr.write("\n")
f.close()
# print core_pos_set
# Generate the back-off rules
backoff_rule_set = set([])
for r in full_rule_set:
args = r.split(" ")
for i in xrange(1, len(args)-1):
if args[i] in pos_set:
args_copy = deepcopy(args)
args_copy[i] = "BPOS|"
backoff_rule_set.add(" ".join(args_copy))
ind = 0
for r in full_rule_set:
print str(ind) + " " + r
ind += 1
if use_back_off_rule:
for r in backoff_rule_set:
print str(ind) + " " + r
ind += 1
for pos in pos_set:
print str(ind) + " 1 BPOS| " + pos
ind += 1
示例2: generate_conll
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_conll(inputf):
f = open(inputf, "r")
s_ind = 0
for sentence in f:
if language_setting == "chn":
sentence = sentence.decode('utf-8')
s_ind += 1
if s_ind % 100 == 0:
sys.stderr.write(str(s_ind) + "..")
t = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
deps = NewTree.generateDep(t)
NewTree.print_conll_lines(deps,sys.stdout)
sys.stdout.write("\n")
f.close()
示例3: generate_part
# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_part(treebank_file, rule_file):
# This generate the gold parts file for the use of C++
rule_dic = read_rule_file(rule_file)
f = open(treebank_file, "r")
s_ind = 0
for sentence in f:
if language_setting == "chn":
sentence = sentence.decode('utf-8')
s_ind += 1
if s_ind % 100 == 0:
sys.stderr.write(str(s_ind) + "..")
parts = []
t = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
if unary_collapse:
NewTree.collapse_unary(t)
bt = NewTree.get_binarize_lex(t)
for pos in bt.treepositions(order='postorder'):
nt = bt[pos]
if isinstance(nt, str) or isinstance(nt, unicode):
continue
elif nt.height() == 2:
continue
else:
info = NewTree.get_span_info(nt,rule_dic)
parts.append(info)
work_tree = deepcopy(t)
NewTree.lexLabel(work_tree)
parent_dic, dep_label_set = NewTree.getParentDic(work_tree)
print len([item for item in parts if item != None])
parent_list = []
label_list = []
for ind in xrange(1, (len(t.leaves()) + 1)):
p = str(int(parent_dic[str(ind)]) - 1)
parent_list.append(p)
for ind in xrange(1, (len(t.leaves()) + 1)):
l = dep_label_set[str(ind)]
label_list.append(l)
for p in parts:
if p != None:
print " ".join(p)
else:
pass
sys.stderr.write("\n")
f.close()