本文整理汇总了Python中corpus.Corpus.test_with_opennlp方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.test_with_opennlp方法的具体用法?Python Corpus.test_with_opennlp怎么用?Python Corpus.test_with_opennlp使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.test_with_opennlp方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_parsed_conn
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_parsed_conn(self, articles, which='test'):
"""
generate explicit relation for each true discourse connective
"""
connParser = Connective()
conn_feat_name = FILE_PATH + '/../tmp/conn.feat'
conn_feat_file = codecs.open(conn_feat_name, 'w', 'utf-8')
checked_conns = []
for art in articles:
checked_conns.append(connParser.print_features(art, which, conn_feat_file))
conn_feat_file.close()
conn_pred_name = FILE_PATH + '/../tmp/conn.pred'
Corpus.test_with_opennlp(conn_feat_name, connParser.model_file, conn_pred_name)
conn_res = [l.strip().split()[-1] for l in codecs.open(conn_pred_name, 'r', 'utf-8')]
assert len(checked_conns) == len(articles), 'article size not match'
s = 0
for art, cand_conns in zip(articles, checked_conns):
length = len(cand_conns)
cand_res = conn_res[s:s+length]
s += length
for conn, label in zip(cand_conns, cand_res):
if label == '1':
rel = Relation()
rel.doc_id = art.id
rel.rel_type = 'Explicit'
rel.article = art
rel.conn_leaves = conn
rel.conn_addr = [n.leaf_id for n in conn]
art.exp_relations.append(rel)
assert s == len(conn_res), 'conn size not match'
示例2: _process_nonexp_sense
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_nonexp_sense(self, articles, which):
nonexp_feat_name = FILE_PATH + '/../tmp/nonexp.feat'
nonexp_sense_file = codecs.open(nonexp_feat_name, 'w', 'utf-8')
nonexpParser = NonExplicit() # change name later
for art in articles:
self.generate_nonexp_relations(art)
for rel in art.nonexp_relations:
nonexpParser.print_features(rel, ['xxxxx'], nonexp_sense_file)
nonexp_sense_file.close()
nonexp_pred_name = FILE_PATH + '/../tmp/nonexp.pred'
Corpus.test_with_opennlp(nonexp_feat_name, nonexpParser.model_file, nonexp_pred_name)
nonexp_res = [l.strip().split()[-1] for l in codecs.open(nonexp_pred_name, 'r', 'utf-8')]
rid = 0
for art in articles:
for rel in art.nonexp_relations:
pred_sense = nonexp_res[rid]
if pred_sense == 'EntRel':
r_type = 'EntRel'
elif pred_sense == 'NoRel':
r_type = 'NoRel'
else:
r_type = 'Implicit'
rel.rel_type = r_type
rel.sense = [pred_sense]
rid += 1
assert len(nonexp_res) == rid, 'nonexp relations size not match'
示例3: _process_parsed_argpos
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_parsed_argpos(self, articles, which='test'):
argpos_feat_name = FILE_PATH + '/../tmp/argpos.feat'
argpos_feat_file = codecs.open(argpos_feat_name, 'w', 'utf-8')
argpos_checked = []
argposParser = ArgPos()
for art in articles:
for rel in art.exp_relations:
argpos_checked.append(argposParser.print_features(rel, which, argpos_feat_file))
argpos_feat_file.close()
argpos_pred_name = FILE_PATH + '/../tmp/argpos.pred'
Corpus.test_with_opennlp(argpos_feat_name, argposParser.model_file, argpos_pred_name)
argpos_res = [l.strip().split()[-1] for l in codecs.open(argpos_pred_name, 'r', 'utf-8')]
return argpos_res
示例4: _process_exp_sense
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_exp_sense(self, articles, which='test'):
exp_feat_name = FILE_PATH + '/../tmp/exp.feat'
expParser = Explicit()
exp_sense_file = codecs.open(exp_feat_name, 'w', 'utf-8')
for art in articles:
for rel in art.exp_relations:
expParser.print_features(rel, ['xxxxx'], which, exp_sense_file)
exp_sense_file.close()
exp_pred = FILE_PATH + '/../tmp/exp.pred'
Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred)
exp_res = [l.strip().split()[-1] for l in codecs.open(exp_pred, 'r', 'utf-8')]
rid = 0
for art in articles:
for rel in art.exp_relations:
pred_sense = exp_res[rid]
rel.sense = [pred_sense]
rid += 1
示例5: _process_exp_sense
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_exp_sense(self, articles, which='test'):
exp_feat_name = FILE_PATH + '/../tmp/exp.feat'
expParser = Explicit()
exp_sense_file = open(exp_feat_name, 'w')
for art in articles:
for rel in art.exp_relations:
expParser.print_features(rel, ['Conjunction'], which, exp_sense_file)
exp_sense_file.close()
exp_vec = FILE_PATH + '/../tmp/exp.vec'
exp_pred = FILE_PATH + '/../tmp/exp.pred'
# Corpus.test_with_svm(exp_feat_name, expParser.feat_map_file, exp_vec, expParser.model_file, exp_pred)
Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred)
exp_res = [LABEL_SENSES_MAP[l.strip().split()[-1]] for l in open(exp_pred, 'r')]
rid = 0
for art in articles:
for rel in art.exp_relations:
pred_sense = exp_res[rid]
rel.sense = [pred_sense]
rid += 1
示例6: test
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def test(self):
to_file = open(self.test_file, 'w')
self.prepare_data(DEV_PARSE_PATH, DEV_REL_PATH, 'test', to_file)
to_file.close()
Corpus.test_with_opennlp(self.test_file, self.model_file, self.predicted_file)
示例7: predict
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def predict(self, test_file, predicted_file):
Corpus.test_with_opennlp(test_file, self.model_file, predicted_file)
示例8: _process_parsed_arg
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _process_parsed_arg(self, articles, which='test'):
arg_feat_name = FILE_PATH + '/../tmp/arg.feat'
arg_feat_file = codecs.open(arg_feat_name, 'w', 'utf-8')
arg_checked = []
argParser = Argument()
for art in articles:
for rel in art.exp_relations:
arg_checked.append(argParser.print_features(rel, which, arg_feat_file))
arg_feat_file.close()
arg_pred_name = FILE_PATH + '/../tmp/arg.pred'
Corpus.test_with_opennlp(arg_feat_name, argParser.model_file, arg_pred_name)
arg_res = [l.strip().split()[-1] for l in codecs.open(arg_pred_name, 'r', 'utf-8')]
tmp_feat_name = FILE_PATH+'/../tmp/arg.prev.feat'
tmp_file = codecs.open(tmp_feat_name, 'w', 'utf-8')
for art in articles:
for rel in art.exp_relations:
conn_sid = rel.conn_leaves[0].goto_tree().sent_id
if conn_sid > 0:
prev_tree = rel.article.sentences[conn_sid-1].tree
if not prev_tree.is_null():
prev_root = prev_tree.root
argParser.print_features(rel, which, tmp_file, prev_root)
tmp_file.close()
tmp_pred_name = FILE_PATH + '/../tmp/arg.prev.pred'
Corpus.test_with_opennlp(tmp_feat_name, argParser.model_file, tmp_pred_name)
arg_prev_res = [l.strip().split()[-1] for l in codecs.open(tmp_pred_name, 'r', 'utf-8')]
rid = 0
s = 0
index = 0
for art in articles:
for rel in art.exp_relations:
args = arg_checked[rid]
labels = arg_res[s:s+len(args)]
rid += 1
s += len(args)
merge_result = argParser.merge(rel, args, labels)
rel.arg1_leaves = merge_result['arg1'] if 'arg1' in merge_result else []
conn_sid = args[0].goto_tree().sent_id
# if current sentence couldn't resovle any arg1 leaves, we
# consider previous root
if len(rel.arg1_leaves) == 0 and conn_sid > 0 and arg_prev_res[index] == 'arg1':
rel.arg1_leaves = rel.article.sentences[conn_sid-1].leaves
if conn_sid > 0:
index += 1
rel.arg1_leaves = self.remove_leading_tailing_punc(rel.arg1_leaves)
rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)
rel.arg2_leaves = merge_result['arg2'] if 'arg2' in merge_result else []
rel.arg2_leaves = self.remove_leading_tailing_punc(rel.arg2_leaves)
rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
assert len(arg_prev_res) == index, 'arg prev size not match'
assert len(arg_res) == s, 'arg candidate size not match'
示例9: _post_process_nonexp_arguments
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import test_with_opennlp [as 别名]
def _post_process_nonexp_arguments(self, articles, which='test'):
attrParser = Attribution()
attr_feat_name = FILE_PATH + '/../tmp/attr.feat'
attr_file = codecs.open(attr_feat_name, 'w', 'utf-8')
for art in articles:
for rel in art.nonexp_relations:
if rel.rel_type == 'Implicit':
s1_clauses = art.sentences[rel.arg1_sid].clauses
s2_clauses = art.sentences[rel.arg2_sid].clauses
# for argument 1
for idx, clause in enumerate(s1_clauses):
prev_clause = s1_clauses[idx-1] if idx > 0 else None
next_clause = s1_clauses[idx+1] if idx < len(s1_clauses) -1 else None
attrParser.print_features(clause, prev_clause, next_clause, rel.arg1_leaves, which, attr_file)
# for argument 2
for idx, clause in enumerate(s2_clauses):
prev_clause = s2_clauses[idx-1] if idx > 0 else None
next_clause = s2_clauses[idx+1] if idx < len(s2_clauses) -1 else None
attrParser.print_features(clause, prev_clause, next_clause, rel.arg2_leaves, which, attr_file)
attr_file.close()
attr_pred_name = FILE_PATH + '/../tmp/attr.pred'
Corpus.test_with_opennlp(attr_feat_name, attrParser.model_file, attr_pred_name)
attr_res = [l.strip().split()[-1] for l in codecs.open(attr_pred_name, 'r', 'utf-8')]
# combine results
idx = 0
for art in articles:
for rel in art.nonexp_relations:
if rel.rel_type == 'Implicit':
s1_clauses = art.sentences[rel.arg1_sid].clauses
s2_clauses = art.sentences[rel.arg2_sid].clauses
# for argument 1
arg1_leaves = []
for clause in s1_clauses:
if len(clause) == 1 and is_punc(clause[0].parent_node.value):
arg1_leaves += clause
elif attr_res[idx] != '1':
arg1_leaves += clause
idx += 1
rel.arg1_leaves = arg1_leaves
# for argument 2
arg2_leaves = []
for clause in s2_clauses:
if len(clause) == 1 and is_punc(clause[0].parent_node.value):
arg2_leaves += clause
elif attr_res[idx] != '1':
arg2_leaves += clause
idx += 1
rel.arg2_leaves = arg2_leaves
rel.arg1_leaves = self.remove_leading_tailing_punc(rel.arg1_leaves)
rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)
rel.arg2_leaves = self.remove_leading_tailing_punc(rel.arg2_leaves)
rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
assert len(attr_res) == idx, 'attrbution counts not match'