Python Feature.extract_production_rules方法代码示例

本文整理汇总了Python中feature.Feature.extract_production_rules方法的典型用法代码示例。如果您正苦于以下问题：Python Feature.extract_production_rules方法的具体用法？Python Feature.extract_production_rules怎么用？Python Feature.extract_production_rules使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类feature.Feature的用法示例。

在下文中一共展示了Feature.extract_production_rules方法的1个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: init

# 需要导入模块: from feature import Feature [as 别名]
# 或者: from feature.Feature import extract_production_rules [as 别名]
class NonExplicit:
    def __init__(self):
        self.train_file = FILE_PATH + '/../data/conll.nonexp.train'
        self.test_file = FILE_PATH + '/../data/conll.nonexp.test'
        self.model_file = FILE_PATH + '/../data/conll.nonexp.model'
        self.predicted_file = FILE_PATH + '/../data/conll.nonexp.test.predicted'

        self.feat_handle = Feature()

    def print_features(self, relation, labels, to_file):
        feat_vec = []
        feat_vec += self.feat_handle.extract_arg2_first3(relation)
        feat_vec += self.feat_handle.extract_dependency_rules(relation)
        feat_vec += self.feat_handle.extract_production_rules(relation)
        feat_vec += self.feat_handle.extract_word_pair(relation)
        feat_vec += self.feat_handle.extract_brown_cluster(relation)
        to_line = ' '.join(feat_vec)
        for label in labels:
            to_file.write('%s %s\n' % (to_line, label.encode('utf-8')))

    def remove_leading_tailing_punc(self, leaves):
        if len(leaves) > 0:
            start = 0
            end = len(leaves)
            if is_punc(leaves[0].parent_node.value):
                start = 1
            if is_punc(leaves[-1].parent_node.value):
                end -= 1
            return leaves[start:end]
        else:
            return leaves

    def prepare_data(self, parse_path, rel_path, which, to_file):
        rel_dict = Corpus.read_relations(rel_path)
        articles = []
        dist = defaultdict(int)
        for art in Corpus.read_parses(parse_path, rel_dict):
            articles.append(art)
            for rel in art.relations:
                rel.article = art
                rel.get_arg_leaves()
                if rel.rel_type == 'Explicit':
                    continue
                labels = {s.replace(' ','_') for s in rel.sense}
                for l in labels:
                    dist[l] += 1
                if which == 'test':
                    labels = ['|'.join(labels)]

                self.print_features(rel, labels, to_file)

        # add NoRel relations
        for art in articles:
            for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]):
                if not art.has_inter_relation(s1.id):
                    rel = Relation()
                    rel.article = art
                    rel.doc_id = art.id
                    rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else []
                    rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves)
                    rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
                    rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
                    rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)

                    rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else []
                    rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves)
                    rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
                    rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
                    rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
                    self.print_features(rel, ['NoRel'], to_file)


    def predict(self, test_file, predicted_file):
        Corpus.test_with_opennlp(test_file, self.model_file, predicted_file)

    def test(self):
        to_file = open(self.test_file, 'w')
        self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'test', to_file)
        to_file.close()
        Corpus.test_with_opennlp(self.test_file, self.model_file, self.predicted_file)

    def train(self):
        to_file = open(self.train_file, 'w')
        # self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'train', to_file)
        self.prepare_data(TRAIN_PARSE_PATH, TRAIN_REL_PATH, 'train', to_file)
        to_file.close()
        Corpus.train_with_opennlp(self.train_file, self.model_file)

    def print_performance(self):
        gold = [it.strip().split()[-1].split('|') for it in open(self.test_file)]
        pred = [it.strip().split()[-1] for it in open(self.predicted_file)]
        evaluate(gold, pred)

    def output_json_format(self, parse_path, rel_path):
        preds = [it.strip().split()[-1] for it in open(self.predicted_file)]
        rel_dict = Corpus.read_relations(rel_path)
        idx = 0
        for art in Corpus.read_parses(parse_path, rel_dict):
            for rel in art.relations:
                if rel.rel_type == 'Explicit':
#.........这里部分代码省略.........

开发者ID:qcl6355，项目名称:conll2016，代码行数:103，代码来源:nonexp.py

注：本文中的feature.Feature.extract_production_rules方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。

示例1: __init__

示例1: init