Python Corpus.read_parses方法代码示例

本文整理汇总了Python中corpus.Corpus.read_parses方法的典型用法代码示例。如果您正苦于以下问题：Python Corpus.read_parses方法的具体用法？Python Corpus.read_parses怎么用？Python Corpus.read_parses使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus的用法示例。

在下文中一共展示了Corpus.read_parses方法的6个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: output_json_format

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
    def output_json_format(self, parse_path, rel_path):
        preds = [it.strip().split()[-1] for it in open(self.predicted_file)]
        rel_dict = Corpus.read_relations(rel_path)
        idx = 0
        for art in Corpus.read_parses(parse_path, rel_dict):
            for rel in art.relations:
                if rel.rel_type == 'Explicit':
                    continue
                pred_sense = preds[idx]
                json_dict = {}
                json_dict['DocID'] = rel.doc_id
                if pred_sense == 'EntRel':
                    r_type = 'EntRel'
                elif pred_sense == 'NoRel':
                    r_type = 'NoRel'
                else:
                    r_type = 'Implicit'

                json_dict['Type'] = r_type
                json_dict['Sense'] = [pred_sense.replace('_', ' ')]
                json_dict['Connective'] = {}
                json_dict['Connective']['TokenList'] = []
                json_dict['Arg1'] = {}
                json_dict['Arg1']['TokenList'] = []
                json_dict['Arg2'] = {}
                json_dict['Arg2']['TokenList'] = []
                print json.dumps(json_dict)
                idx += 1

开发者ID:qcl6355，项目名称:conll2016，代码行数:30，代码来源:nonexp.py

示例2: prepare_data

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
 def prepare_data(self, parse_path, rel_path, which, to_file):
     rel_dict = Corpus.read_relations(rel_path)
     for art in Corpus.read_parses(parse_path, rel_dict):
         for rel in art.relations:
             if rel.rel_type != 'Explicit':
                 continue
             rel.article = art
             rel.get_conn_leaves()
         self.print_features(art, which, to_file)

开发者ID:qcl6355，项目名称:conll2016，代码行数:11，代码来源:connective.py

示例3: prepare_data

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
    def prepare_data(self, parse_path, rel_path, which, to_file):
        rel_dict = Corpus.read_relations(rel_path)
        for art in Corpus.read_parses(parse_path, rel_dict):
            for rel in art.relations:
                if rel.rel_type != 'Explicit':
                    continue
                rel.article = art
                rel.get_conn_leaves()
                labels = {s.replace(' ','_') for s in rel.sense}
                labels = {s for s in labels if s in SENSES}
                if which == 'test':
                    labels = ['|'.join(labels)]

                self.print_features(rel, labels, which, to_file)

开发者ID:qcl6355，项目名称:conll2016，代码行数:16，代码来源:explicit.py

示例4: parse

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
    def parse(self, parse_path, raw_home, output):
        """
        @param parse_path: json format parse file
        @param raw_home: raw text home
        """
        which = 'test'
        # TODO: connective identification
        articles =[]
        for art in Corpus.read_parses(parse_path):
            art.read_raw_text(raw_home+'/'+art.id)
            art.set_article_level_word_id()
            articles.append(art)
        articles.sort(key=lambda x:x.id)

        print >> logs, "===read data compelete==="

        print >> logs, "===1. connective identification==="
        self._process_parsed_conn(articles, which)

        # TODO: explicit argument extraction
        print >> logs, "===2. argument extraction==="
        self._process_parsed_arg(articles, which)

        # TODO: explicit sense classification
        print >> logs, "===3. explicit sense classification==="
        self._process_exp_sense(articles, which)

        # TODO: nonexp sense classification
        # generate candidate nonexp relations
        print >> logs, "===4. nonexp sense classification==="
        self._process_nonexp_sense(articles, which)

        # TODO: nonexp argument post-process
        print >> logs, "===5. nonexp arguments postproceesing==="
        # self._post_process_nonexp_arguments(articles, which)

        print >> logs, "===6. convert into json format==="
        # output conll json format
        for art in articles:
            for rel in art.exp_relations + art.nonexp_relations:
                if rel.rel_type != 'NoRel':
                    print >> output, rel.output_json_format()

        print >> logs, "===done.==="

开发者ID:qcl6355，项目名称:conll2016，代码行数:46，代码来源:end2end.py

示例5: prepare_data

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
    def prepare_data(self, parse_path, rel_path, which, to_file):
        count = 0
        processed = []
        rel_dict = Corpus.read_relations(rel_path)
        for art in Corpus.read_parses(parse_path, rel_dict):
            for rel in art.relations:
                if rel.rel_type != 'Explicit':
                    continue
                rel.article = art
                rel.get_conn_leaves()
                rel.get_arg_leaves()

                # add a filter function (2015/9/29)
                if which == 'train' and not self.need_extract(rel):
                    continue
                count += 1

                processed.append(self.print_features(rel, which, to_file))

        print >> logs, "processed %d instances" % count
        return processed

开发者ID:qcl6355，项目名称:conll2016，代码行数:23，代码来源:argument.py

示例6: prepare_data

# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
    def prepare_data(self, parse_path, rel_path, which, to_file):
        rel_dict = Corpus.read_relations(rel_path)
        articles = []
        dist = defaultdict(int)
        for art in Corpus.read_parses(parse_path, rel_dict):
            articles.append(art)
            for rel in art.relations:
                rel.article = art
                rel.get_arg_leaves()
                if rel.rel_type == 'Explicit':
                    continue
                labels = {s.replace(' ','_') for s in rel.sense}
                for l in labels:
                    dist[l] += 1
                if which == 'test':
                    labels = ['|'.join(labels)]

                self.print_features(rel, labels, to_file)

        # add NoRel relations
        for art in articles:
            for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]):
                if not art.has_inter_relation(s1.id):
                    rel = Relation()
                    rel.article = art
                    rel.doc_id = art.id
                    rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else []
                    rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves)
                    rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
                    rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
                    rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)

                    rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else []
                    rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves)
                    rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
                    rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
                    rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
                    self.print_features(rel, ['NoRel'], to_file)

开发者ID:qcl6355，项目名称:conll2016，代码行数:40，代码来源:nonexp.py

注：本文中的corpus.Corpus.read_parses方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。