当前位置: 首页>>代码示例>>Python>>正文


Python Corpus.test_with_opennlp方法代码示例

本文整理汇总了Python中corpus.Corpus.test_with_opennlp方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.test_with_opennlp方法的具体用法?Python Corpus.test_with_opennlp怎么用?Python Corpus.test_with_opennlp使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在corpus.Corpus的用法示例。


在下文中一共展示了Corpus.test_with_opennlp方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _process_parsed_conn

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
 def _process_parsed_conn(self, articles, which='test'):
     """
     generate explicit relation for each true discourse connective
     """
     connParser = Connective()
     conn_feat_name = FILE_PATH + '/../tmp/conn.feat'
     conn_feat_file = codecs.open(conn_feat_name, 'w', 'utf-8')
     checked_conns = []
     for art in articles:
         checked_conns.append(connParser.print_features(art, which, conn_feat_file))
     conn_feat_file.close()
     conn_pred_name = FILE_PATH + '/../tmp/conn.pred'
     Corpus.test_with_opennlp(conn_feat_name, connParser.model_file, conn_pred_name)
     conn_res = [l.strip().split()[-1] for l in codecs.open(conn_pred_name, 'r', 'utf-8')]
     assert len(checked_conns) == len(articles), 'article size not match'
     s = 0
     for art, cand_conns in zip(articles, checked_conns):
         length = len(cand_conns)
         cand_res = conn_res[s:s+length]
         s += length
         for conn, label in zip(cand_conns, cand_res):
             if label == '1':
                 rel = Relation()
                 rel.doc_id = art.id
                 rel.rel_type = 'Explicit'
                 rel.article = art
                 rel.conn_leaves = conn
                 rel.conn_addr = [n.leaf_id for n in conn]
                 art.exp_relations.append(rel)
     assert s == len(conn_res), 'conn size not match'
开发者ID:qcl6355,项目名称:conll2016,代码行数:32,代码来源:end2end.py

示例2: _process_nonexp_sense

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
    def _process_nonexp_sense(self, articles, which):
        nonexp_feat_name = FILE_PATH + '/../tmp/nonexp.feat'
        nonexp_sense_file = codecs.open(nonexp_feat_name, 'w', 'utf-8')
        nonexpParser = NonExplicit()  # change name later
        for art in articles:
            self.generate_nonexp_relations(art)
            for rel in art.nonexp_relations:
                nonexpParser.print_features(rel, ['xxxxx'], nonexp_sense_file)
        nonexp_sense_file.close()
        nonexp_pred_name = FILE_PATH + '/../tmp/nonexp.pred'
        Corpus.test_with_opennlp(nonexp_feat_name, nonexpParser.model_file, nonexp_pred_name)
        nonexp_res = [l.strip().split()[-1] for l in codecs.open(nonexp_pred_name, 'r', 'utf-8')]

        rid = 0
        for art in articles:
            for rel in art.nonexp_relations:
                pred_sense = nonexp_res[rid]
                if pred_sense == 'EntRel':
                    r_type = 'EntRel'
                elif pred_sense == 'NoRel':
                    r_type = 'NoRel'
                else:
                    r_type = 'Implicit'
                rel.rel_type = r_type
                rel.sense = [pred_sense]
                rid += 1

        assert len(nonexp_res) == rid, 'nonexp relations size not match'
开发者ID:qcl6355,项目名称:conll2016,代码行数:30,代码来源:end2end.py

示例3: _process_parsed_argpos

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
 def _process_parsed_argpos(self, articles, which='test'):
     argpos_feat_name = FILE_PATH + '/../tmp/argpos.feat'
     argpos_feat_file = codecs.open(argpos_feat_name, 'w', 'utf-8')
     argpos_checked = []
     argposParser = ArgPos()
     for art in articles:
         for rel in art.exp_relations:
             argpos_checked.append(argposParser.print_features(rel, which, argpos_feat_file))
     argpos_feat_file.close()
     argpos_pred_name = FILE_PATH + '/../tmp/argpos.pred'
     Corpus.test_with_opennlp(argpos_feat_name, argposParser.model_file, argpos_pred_name)
     argpos_res = [l.strip().split()[-1] for l in codecs.open(argpos_pred_name, 'r', 'utf-8')]
     return argpos_res
开发者ID:qcl6355,项目名称:conll2016,代码行数:15,代码来源:end2end.py

示例4: _process_exp_sense

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
    def _process_exp_sense(self, articles, which='test'):
        exp_feat_name = FILE_PATH + '/../tmp/exp.feat'
        expParser = Explicit()
        exp_sense_file = codecs.open(exp_feat_name, 'w', 'utf-8')
        for art in articles:
            for rel in art.exp_relations:
                expParser.print_features(rel, ['xxxxx'], which, exp_sense_file)
        exp_sense_file.close()
        exp_pred = FILE_PATH + '/../tmp/exp.pred'
        Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred)

        exp_res = [l.strip().split()[-1] for l in codecs.open(exp_pred, 'r', 'utf-8')]
        rid = 0
        for art in articles:
            for rel in art.exp_relations:
                pred_sense = exp_res[rid]
                rel.sense = [pred_sense]
                rid += 1
开发者ID:qcl6355,项目名称:conll2016,代码行数:20,代码来源:end2end.py

示例5: _process_exp_sense

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
    def _process_exp_sense(self, articles, which='test'):
        exp_feat_name = FILE_PATH + '/../tmp/exp.feat'
        expParser = Explicit()
        exp_sense_file = open(exp_feat_name, 'w')
        for art in articles:
            for rel in art.exp_relations:
                expParser.print_features(rel, ['Conjunction'], which, exp_sense_file)
        exp_sense_file.close()
        exp_vec = FILE_PATH + '/../tmp/exp.vec'
        exp_pred = FILE_PATH + '/../tmp/exp.pred'
        # Corpus.test_with_svm(exp_feat_name, expParser.feat_map_file, exp_vec, expParser.model_file, exp_pred)
        Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred)

        exp_res = [LABEL_SENSES_MAP[l.strip().split()[-1]] for l in open(exp_pred, 'r')]
        rid = 0
        for art in articles:
            for rel in art.exp_relations:
                pred_sense = exp_res[rid]
                rel.sense = [pred_sense]
                rid += 1
开发者ID:qcl6355,项目名称:conll2016,代码行数:22,代码来源:end2end.py

示例6: test

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
 def test(self):
     to_file = open(self.test_file, 'w')
     self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'test', to_file)
     to_file.close()
     Corpus.test_with_opennlp(self.test_file, self.model_file, self.predicted_file)
开发者ID:qcl6355,项目名称:conll2016,代码行数:7,代码来源:nonexp.py

示例7: predict

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
 def predict(self, test_file, predicted_file):
     Corpus.test_with_opennlp(test_file, self.model_file, predicted_file)
开发者ID:qcl6355,项目名称:conll2016,代码行数:4,代码来源:nonexp.py

示例8: _process_parsed_arg

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
    def _process_parsed_arg(self, articles, which='test'):
        arg_feat_name = FILE_PATH + '/../tmp/arg.feat'
        arg_feat_file = codecs.open(arg_feat_name, 'w', 'utf-8')
        arg_checked = []
        argParser = Argument()
        for art in articles:
            for rel in art.exp_relations:
                arg_checked.append(argParser.print_features(rel, which, arg_feat_file))
        arg_feat_file.close()
        arg_pred_name = FILE_PATH + '/../tmp/arg.pred'
        Corpus.test_with_opennlp(arg_feat_name, argParser.model_file, arg_pred_name)
        arg_res = [l.strip().split()[-1] for l in codecs.open(arg_pred_name, 'r', 'utf-8')]

        tmp_feat_name = FILE_PATH+'/../tmp/arg.prev.feat'
        tmp_file = codecs.open(tmp_feat_name, 'w', 'utf-8')
        for art in articles:
            for rel in art.exp_relations:
                conn_sid = rel.conn_leaves[0].goto_tree().sent_id
                if conn_sid > 0:
                    prev_tree = rel.article.sentences[conn_sid-1].tree
                    if not prev_tree.is_null():
                        prev_root = prev_tree.root
                        argParser.print_features(rel, which, tmp_file, prev_root)
        tmp_file.close()
        tmp_pred_name = FILE_PATH + '/../tmp/arg.prev.pred'
        Corpus.test_with_opennlp(tmp_feat_name, argParser.model_file, tmp_pred_name)
        arg_prev_res = [l.strip().split()[-1] for l in codecs.open(tmp_pred_name, 'r', 'utf-8')]

        rid = 0
        s = 0
        index = 0
        for art in articles:
            for rel in art.exp_relations:
                args = arg_checked[rid]
                labels = arg_res[s:s+len(args)]
                rid += 1
                s += len(args)
                merge_result = argParser.merge(rel, args, labels)
                rel.arg1_leaves = merge_result['arg1'] if 'arg1' in merge_result else []
                conn_sid = args[0].goto_tree().sent_id

                # if current sentence couldn't resovle any arg1 leaves, we
                # consider previous root
                if len(rel.arg1_leaves) == 0 and conn_sid > 0 and arg_prev_res[index] == 'arg1':
                    rel.arg1_leaves = rel.article.sentences[conn_sid-1].leaves

                if conn_sid > 0:
                    index += 1

                rel.arg1_leaves = self.remove_leading_tailing_punc(rel.arg1_leaves)
                rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
                rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
                rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)
                rel.arg2_leaves = merge_result['arg2'] if 'arg2' in merge_result else []
                rel.arg2_leaves = self.remove_leading_tailing_punc(rel.arg2_leaves)
                rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
                rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
                rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)

        assert len(arg_prev_res) == index, 'arg prev size not match'
        assert len(arg_res) == s, 'arg candidate size not match'
开发者ID:qcl6355,项目名称:conll2016,代码行数:63,代码来源:end2end.py

示例9: _post_process_nonexp_arguments

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
    def _post_process_nonexp_arguments(self, articles, which='test'):
        attrParser = Attribution()
        attr_feat_name = FILE_PATH + '/../tmp/attr.feat'
        attr_file = codecs.open(attr_feat_name, 'w', 'utf-8')
        for art in articles:
            for rel in art.nonexp_relations:
                if rel.rel_type == 'Implicit':
                    s1_clauses = art.sentences[rel.arg1_sid].clauses
                    s2_clauses = art.sentences[rel.arg2_sid].clauses

                    # for argument 1
                    for idx, clause in enumerate(s1_clauses):
                        prev_clause = s1_clauses[idx-1] if idx > 0 else None
                        next_clause = s1_clauses[idx+1] if idx < len(s1_clauses) -1 else None
                        attrParser.print_features(clause, prev_clause, next_clause, rel.arg1_leaves, which, attr_file)

                    # for argument 2
                    for idx, clause in enumerate(s2_clauses):
                        prev_clause = s2_clauses[idx-1] if idx > 0 else None
                        next_clause = s2_clauses[idx+1] if idx < len(s2_clauses) -1 else None
                        attrParser.print_features(clause, prev_clause, next_clause, rel.arg2_leaves, which, attr_file)
        attr_file.close()

        attr_pred_name = FILE_PATH + '/../tmp/attr.pred'
        Corpus.test_with_opennlp(attr_feat_name, attrParser.model_file, attr_pred_name)
        attr_res = [l.strip().split()[-1] for l in codecs.open(attr_pred_name, 'r', 'utf-8')]

        # combine results
        idx = 0
        for art in articles:
            for rel in art.nonexp_relations:
                if rel.rel_type == 'Implicit':
                    s1_clauses = art.sentences[rel.arg1_sid].clauses
                    s2_clauses = art.sentences[rel.arg2_sid].clauses

                    # for argument 1
                    arg1_leaves = []
                    for clause in s1_clauses:
                        if len(clause) == 1 and is_punc(clause[0].parent_node.value):
                            arg1_leaves += clause
                        elif attr_res[idx] != '1':
                            arg1_leaves += clause
                        idx += 1

                    rel.arg1_leaves = arg1_leaves

                    # for argument 2
                    arg2_leaves = []
                    for clause in s2_clauses:
                        if len(clause) == 1 and is_punc(clause[0].parent_node.value):
                            arg2_leaves += clause
                        elif attr_res[idx] != '1':
                            arg2_leaves += clause
                        idx += 1

                    rel.arg2_leaves = arg2_leaves

                    rel.arg1_leaves = self.remove_leading_tailing_punc(rel.arg1_leaves)
                    rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
                    rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)

                    rel.arg2_leaves = self.remove_leading_tailing_punc(rel.arg2_leaves)
                    rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
                    rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)

        assert len(attr_res) == idx, 'attrbution counts not match'
开发者ID:qcl6355,项目名称:conll2016,代码行数:68,代码来源:end2end.py


注:本文中的corpus.Corpus.test_with_opennlp方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。