本文整理汇总了Python中corpus.Corpus.read_parses方法的典型用法代码示例。如果您正苦于以下问题:Python Corpus.read_parses方法的具体用法?Python Corpus.read_parses怎么用?Python Corpus.read_parses使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类corpus.Corpus
的用法示例。
在下文中一共展示了Corpus.read_parses方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: output_json_format
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def output_json_format(self, parse_path, rel_path):
preds = [it.strip().split()[-1] for it in open(self.predicted_file)]
rel_dict = Corpus.read_relations(rel_path)
idx = 0
for art in Corpus.read_parses(parse_path, rel_dict):
for rel in art.relations:
if rel.rel_type == 'Explicit':
continue
pred_sense = preds[idx]
json_dict = {}
json_dict['DocID'] = rel.doc_id
if pred_sense == 'EntRel':
r_type = 'EntRel'
elif pred_sense == 'NoRel':
r_type = 'NoRel'
else:
r_type = 'Implicit'
json_dict['Type'] = r_type
json_dict['Sense'] = [pred_sense.replace('_', ' ')]
json_dict['Connective'] = {}
json_dict['Connective']['TokenList'] = []
json_dict['Arg1'] = {}
json_dict['Arg1']['TokenList'] = []
json_dict['Arg2'] = {}
json_dict['Arg2']['TokenList'] = []
print json.dumps(json_dict)
idx += 1
示例2: prepare_data
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def prepare_data(self, parse_path, rel_path, which, to_file):
rel_dict = Corpus.read_relations(rel_path)
for art in Corpus.read_parses(parse_path, rel_dict):
for rel in art.relations:
if rel.rel_type != 'Explicit':
continue
rel.article = art
rel.get_conn_leaves()
self.print_features(art, which, to_file)
示例3: prepare_data
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def prepare_data(self, parse_path, rel_path, which, to_file):
rel_dict = Corpus.read_relations(rel_path)
for art in Corpus.read_parses(parse_path, rel_dict):
for rel in art.relations:
if rel.rel_type != 'Explicit':
continue
rel.article = art
rel.get_conn_leaves()
labels = {s.replace(' ','_') for s in rel.sense}
labels = {s for s in labels if s in SENSES}
if which == 'test':
labels = ['|'.join(labels)]
self.print_features(rel, labels, which, to_file)
示例4: parse
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def parse(self, parse_path, raw_home, output):
"""
@param parse_path: json format parse file
@param raw_home: raw text home
"""
which = 'test'
# TODO: connective identification
articles =[]
for art in Corpus.read_parses(parse_path):
art.read_raw_text(raw_home+'/'+art.id)
art.set_article_level_word_id()
articles.append(art)
articles.sort(key=lambda x:x.id)
print >> logs, "===read data compelete==="
print >> logs, "===1. connective identification==="
self._process_parsed_conn(articles, which)
# TODO: explicit argument extraction
print >> logs, "===2. argument extraction==="
self._process_parsed_arg(articles, which)
# TODO: explicit sense classification
print >> logs, "===3. explicit sense classification==="
self._process_exp_sense(articles, which)
# TODO: nonexp sense classification
# generate candidate nonexp relations
print >> logs, "===4. nonexp sense classification==="
self._process_nonexp_sense(articles, which)
# TODO: nonexp argument post-process
print >> logs, "===5. nonexp arguments postproceesing==="
# self._post_process_nonexp_arguments(articles, which)
print >> logs, "===6. convert into json format==="
# output conll json format
for art in articles:
for rel in art.exp_relations + art.nonexp_relations:
if rel.rel_type != 'NoRel':
print >> output, rel.output_json_format()
print >> logs, "===done.==="
示例5: prepare_data
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def prepare_data(self, parse_path, rel_path, which, to_file):
count = 0
processed = []
rel_dict = Corpus.read_relations(rel_path)
for art in Corpus.read_parses(parse_path, rel_dict):
for rel in art.relations:
if rel.rel_type != 'Explicit':
continue
rel.article = art
rel.get_conn_leaves()
rel.get_arg_leaves()
# add a filter function (2015/9/29)
if which == 'train' and not self.need_extract(rel):
continue
count += 1
processed.append(self.print_features(rel, which, to_file))
print >> logs, "processed %d instances" % count
return processed
示例6: prepare_data
# 需要导入模块: from corpus import Corpus [as 别名]
# 或者: from corpus.Corpus import read_parses [as 别名]
def prepare_data(self, parse_path, rel_path, which, to_file):
rel_dict = Corpus.read_relations(rel_path)
articles = []
dist = defaultdict(int)
for art in Corpus.read_parses(parse_path, rel_dict):
articles.append(art)
for rel in art.relations:
rel.article = art
rel.get_arg_leaves()
if rel.rel_type == 'Explicit':
continue
labels = {s.replace(' ','_') for s in rel.sense}
for l in labels:
dist[l] += 1
if which == 'test':
labels = ['|'.join(labels)]
self.print_features(rel, labels, to_file)
# add NoRel relations
for art in articles:
for s1, s2 in zip(art.sentences[:-1], art.sentences[1:]):
if not art.has_inter_relation(s1.id):
rel = Relation()
rel.article = art
rel.doc_id = art.id
rel.arg1s['parsed'] = [s1.tree.root] if not s1.tree.is_null() else []
rel.arg1_leaves = self.remove_leading_tailing_punc(s1.leaves)
rel.arg1_addr = [n.leaf_id for n in rel.arg1_leaves]
rel.arg1_sid = rel.arg1_leaves[-1].goto_tree().sent_id if len(rel.arg1_leaves) > 0 else -1
rel.arg1_text = ' '.join(n.value for n in rel.arg1_leaves)
rel.arg2s['parsed'] = [s2.tree.root] if not s2.tree.is_null() else []
rel.arg2_leaves = self.remove_leading_tailing_punc(s2.leaves)
rel.arg2_addr = [n.leaf_id for n in rel.arg2_leaves]
rel.arg2_sid = rel.arg2_leaves[0].goto_tree().sent_id if len(rel.arg2_leaves) > 0 else -1
rel.arg2_text = ' '.join(n.value for n in rel.arg2_leaves)
self.print_features(rel, ['NoRel'], to_file)