本文整理汇总了Python中feature.Feature.extract_production_rules方法的典型用法代码示例。如果您正苦于以下问题:Python Feature.extract_production_rules方法的具体用法?Python Feature.extract_production_rules怎么用?Python Feature.extract_production_rules使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类feature.Feature
的用法示例。
在下文中一共展示了Feature.extract_production_rules方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from feature import Feature [as 别名]
# 或者: from feature.Feature import extract_production_rules [as 别名]
class NonExplicit:
def __init__(self):
self.train_file = FILE_PATH + '/../data/conll.nonexp.train'
self.test_file = FILE_PATH + '/../data/conll.nonexp.test'
self.model_file = FILE_PATH + '/../data/conll.nonexp.model'
self.predicted_file = FILE_PATH + '/../data/conll.nonexp.test.predicted'
self.feat_handle = Feature()
def print_features(self, relation, labels, to_file):
feat_vec = []
feat_vec += self.feat_handle.extract_arg2_first3(relation)
feat_vec += self.feat_handle.extract_dependency_rules(relation)
feat_vec += self.feat_handle.extract_production_rules(relation)
feat_vec += self.feat_handle.extract_word_pair(relation)
feat_vec += self.feat_handle.extract_brown_cluster(relation)
to_line = ' '.join(feat_vec)
for label in labels:
to_file.write('%s %s\n' % (to_line, label.encode('utf-8')))
def remove_leading_tailing_punc(self, leaves):
if len(leaves) > 0:
start = 0
end = len(leaves)
if is_punc(leaves[0].parent_node.value):
start = 1
if is_punc(leaves[-1].parent_node.value):
end -= 1
return leaves[start:end]
else:
return leaves
def prepare_data(self, parse_path, rel_path, which, to_file):
rel_dict = Corpus.read_relations(rel_path)
articles = []
dist = defaultdict(int)
for art in Corpus.read_parses(parse_path, rel_dict):
articles.append(art)
for rel in art.relations:
rel.article = art
rel.get_arg_leaves()
if rel.rel_type == 'Explicit':
continue
labels = {s.replace(' ','_') for s in rel.sense}
for l in labels:
dist[l] += 1
if which == 'test':
labels = ['|'.join(labels)]
self.print_features(rel, labels, to_file)
# add NoRel relations
for art in articles:
for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]):
if not art.has_inter_relation(s1.id):
rel = Relation()
rel.article = art
rel.doc_id = art.id
rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else []
rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves)
rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)
rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else []
rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves)
rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
self.print_features(rel, ['NoRel'], to_file)
def predict(self, test_file, predicted_file):
Corpus.test_with_opennlp(test_file, self.model_file, predicted_file)
def test(self):
to_file = open(self.test_file, 'w')
self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'test', to_file)
to_file.close()
Corpus.test_with_opennlp(self.test_file, self.model_file, self.predicted_file)
def train(self):
to_file = open(self.train_file, 'w')
# self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'train', to_file)
self.prepare_data(TRAIN_PARSE_PATH, TRAIN_REL_PATH, 'train', to_file)
to_file.close()
Corpus.train_with_opennlp(self.train_file, self.model_file)
def print_performance(self):
gold = [it.strip().split()[-1].split('|') for it in open(self.test_file)]
pred = [it.strip().split()[-1] for it in open(self.predicted_file)]
evaluate(gold, pred)
def output_json_format(self, parse_path, rel_path):
preds = [it.strip().split()[-1] for it in open(self.predicted_file)]
rel_dict = Corpus.read_relations(rel_path)
idx = 0
for art in Corpus.read_parses(parse_path, rel_dict):
for rel in art.relations:
if rel.rel_type == 'Explicit':
#.........这里部分代码省略.........