当前位置: 首页>>代码示例>>Python>>正文


Python Tree.fromstring方法代码示例

本文整理汇总了Python中tree.Tree.fromstring方法的典型用法代码示例。如果您正苦于以下问题:Python Tree.fromstring方法的具体用法?Python Tree.fromstring怎么用?Python Tree.fromstring使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tree.Tree的用法示例。


在下文中一共展示了Tree.fromstring方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: generate_rule

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_rule(treebank_file):
    # if you use unicode here, there is a bug...
    f = open(treebank_file, "r")
    pos_set = set([])
    full_rule_set = set([])
    s_ind = 0
    for sentence in f:
        if language_setting == "chn":
            sentence = sentence.decode('utf-8')
        s_ind += 1
        if s_ind % 100 == 0:
            sys.stderr.write(str(s_ind) + "..")
        tree = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
        preterminals = [t.label() for t in tree.subtrees(lambda t: t.height() == 2)]
        pos_set.update(preterminals)

        # First, collapse the unary, notice that the POS tags should not be affected
        if unary_collapse:
            NewTree.collapse_unary(tree)
        bt = NewTree.get_binarize_lex(tree)
        # Extract rules from the tree
        rule_set = NewTree.generate_rules(bt)

        # Add them to the full set
        for sr in rule_set:
            full_rule_set.add(sr)
    sys.stderr.write("\n")
    f.close()

    # print core_pos_set
    # Generate the back-off rules
    backoff_rule_set = set([])
    for r in full_rule_set:
        args = r.split(" ")

        for i in xrange(1, len(args)-1):
            if args[i] in pos_set:
                args_copy = deepcopy(args)
                args_copy[i] = "BPOS|"
                backoff_rule_set.add(" ".join(args_copy))

    ind = 0
    for r in full_rule_set:
        print str(ind) + " " + r
        ind += 1
    if use_back_off_rule:
        for r in backoff_rule_set:
            print str(ind) + " " + r
            ind += 1
        for pos in pos_set:
            print str(ind) + " 1 BPOS| " + pos
            ind += 1
开发者ID:jungikim,项目名称:PAD,代码行数:54,代码来源:Generator.py

示例2: generate_conll

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_conll(inputf):
    f = open(inputf, "r")
    s_ind = 0
    for sentence in f:
        if language_setting == "chn":
            sentence = sentence.decode('utf-8')
        s_ind += 1
        if s_ind % 100 == 0:
            sys.stderr.write(str(s_ind) + "..")
        t = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
        deps = NewTree.generateDep(t)
        NewTree.print_conll_lines(deps,sys.stdout)
        sys.stdout.write("\n")
    f.close()
开发者ID:jungikim,项目名称:PAD,代码行数:16,代码来源:Generator.py

示例3: generate_part

# 需要导入模块: from tree import Tree [as 别名]
# 或者: from tree.Tree import fromstring [as 别名]
def generate_part(treebank_file, rule_file):
    # This generate the gold parts file for the use of C++
    rule_dic = read_rule_file(rule_file)
    f = open(treebank_file, "r")
    s_ind = 0

    for sentence in f:
        if language_setting == "chn":
            sentence = sentence.decode('utf-8')
        s_ind += 1
        if s_ind % 100 == 0:
            sys.stderr.write(str(s_ind) + "..")
        parts = []
        t = Tree.fromstring(sentence, remove_empty_top_bracketing=False)
        if unary_collapse:
            NewTree.collapse_unary(t)
        bt = NewTree.get_binarize_lex(t)
        for pos in bt.treepositions(order='postorder'):
            nt = bt[pos]
            if isinstance(nt, str) or isinstance(nt, unicode):
                continue
            elif nt.height() == 2:
                continue
            else:
                info = NewTree.get_span_info(nt,rule_dic)
                parts.append(info)

        work_tree = deepcopy(t)
        NewTree.lexLabel(work_tree)
        parent_dic, dep_label_set = NewTree.getParentDic(work_tree)

        print len([item for item in parts if item != None])
        parent_list = []
        label_list = []
        for ind in xrange(1, (len(t.leaves()) + 1)):
            p = str(int(parent_dic[str(ind)]) - 1)
            parent_list.append(p)
        for ind in xrange(1, (len(t.leaves()) + 1)):
            l = dep_label_set[str(ind)]
            label_list.append(l)
        for p in parts:
            if p != None:
                print " ".join(p)
            else:
                pass
    sys.stderr.write("\n")
    f.close()
开发者ID:jungikim,项目名称:PAD,代码行数:49,代码来源:Generator.py


注:本文中的tree.Tree.fromstring方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。