本文整理汇总了Python中util.flatten方法的典型用法代码示例。如果您正苦于以下问题:Python util.flatten方法的具体用法?Python util.flatten怎么用?Python util.flatten使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类util
的用法示例。
在下文中一共展示了util.flatten方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: issue_to_changelog
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def issue_to_changelog(issue):
return dict(
[
('key', issue.key),
(
'changelog',
[
(u'Created', parse_date(issue.fields.created))
] + flatten([
[
(i.toString, parse_date(h.created))
for i in h.items if i.field == 'status'
] for h in issue.changelog.histories
])
)
]
)
示例2: encode
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def encode(self, data=None):
"""
Use a generator to yield each encoded bit. This supports being able to
encode a list of values, where each value will be sequentially encoded.
This function can also encode single values.
@param data: The data to encode. If it isn't provided the encoder's
data is used.
"""
if data is None:
data = self.raw_data
# Make a list to account for single inputs
for x in flatten([data]):
for bit in flatten([self._encode(x)]):
yield bit
示例3: load_eval_data
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def load_eval_data(self):
eval_data = []
eval_tensors = []
coref_eval_data = []
with open(self.config["eval_path"]) as f:
eval_examples = [json.loads(jsonline) for jsonline in f.readlines()]
populate_sentence_offset(eval_examples)
for doc_id, example in enumerate(eval_examples):
doc_tensors = []
num_mentions_in_doc = 0
for e in self.split_document_example(example):
# Because each batch=1 document at test time, we do not need to offset cluster ids.
e["cluster_id_offset"] = 0
e["doc_id"] = doc_id + 1
doc_tensors.append(self.tensorize_example(e, is_training=False))
#num_mentions_in_doc += len(e["coref"])
#assert num_mentions_in_doc == len(util.flatten(example["clusters"]))
eval_tensors.append(doc_tensors)
eval_data.extend(srl_eval_utils.split_example_for_eval(example))
coref_eval_data.append(example)
print("Loaded {} eval examples.".format(len(eval_data)))
return eval_data, eval_tensors, coref_eval_data
示例4: count
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def count(data_file):
f = open(data_file)
max_num_sp = 0
overlap, total = 0, 0
for i, line in enumerate(f):
# print('---', line)
data = json.loads(line)
clusters = util.flatten(data['clusters'])
clusters = [tuple(c) for c in clusters]
for c1 in clusters:
for c2 in clusters:
if c1 == c2:
continue
total += 1
if (is_overlap(c1, c2)) or (is_overlap(c2, c1)):
overlap += 1
# print('overlap', c1, c2)
# else:
# print('non-overlap', c1, c2)
print(overlap, total, overlap * 100.0 / total)
print('max_num_sp', max_num_sp)
示例5: intFloor
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def intFloor(*args):
return [int(math.floor(x)) for x in flatten(args)]
示例6: print_predictions
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def print_predictions(example):
words = util.flatten(example["sentences"])
for cluster in example["predicted_clusters"]:
print(u"Predicted cluster: {}".format([" ".join(words[m[0]:m[1]+1]) for m in cluster]))
示例7: finalize
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def finalize(self):
merged_clusters = []
for c1 in self.clusters.values():
existing = None
for m in c1:
for c2 in merged_clusters:
if m in c2:
existing = c2
break
if existing is not None:
break
if existing is not None:
print("Merging clusters (shouldn't happen very often.)")
existing.update(c1)
else:
merged_clusters.append(set(c1))
merged_clusters = [list(c) for c in merged_clusters]
all_mentions = util.flatten(merged_clusters)
assert len(all_mentions) == len(set(all_mentions))
return {
"doc_key": self.doc_key,
"sentences": self.sentences,
"speakers": self.speakers,
"constituents": self.span_dict_to_list(self.constituents),
"ner": self.span_dict_to_list(self.ner),
"clusters": merged_clusters
}
示例8: papers_file
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def papers_file(self):
for file_obj in util.flatten(self.input()):
if 'paper' in file_obj.path:
return file_obj
示例9: get_children
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def get_children(self) -> List[AstNode]:
return util.flatten(x.get_children() for x in self.children)
示例10: evaluate
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def evaluate(fname):
p, r, f1 = [], [], []
pronoun_text = defaultdict(int)
num_gold_pairs, num_pred_pairs = 0, 0
total_gold_singletons, total_pred_singletons, total_singleton_intersection = 0, 0, 0
with open(fname) as f:
for line in f:
datum = json.loads(line)
tokens = flatten(datum['sentences'])
#pronouns = flatten(datum['clusters'])
pair_fn = get_mention_pairs
# for pidx in pronouns:
# pronoun_text[(tokens[pidx].lower())] += 1
gold_pronoun_mention_pairs, gold_singletons = pair_fn(datum['clusters'], flatten(datum['clusters']))
pred_pronoun_mention_pairs, pred_singletons = pair_fn(datum['predicted_clusters'], flatten(datum['predicted_clusters']))
total_gold_singletons += len(gold_singletons)
total_pred_singletons += len(pred_singletons)
total_singleton_intersection += len(gold_singletons.intersection(pred_singletons))
intersection = gold_pronoun_mention_pairs.intersection(pred_pronoun_mention_pairs)
num_gold_pairs += len(gold_pronoun_mention_pairs)
num_pred_pairs += len(pred_pronoun_mention_pairs)
this_recall = len(intersection) / len(gold_pronoun_mention_pairs) if len(gold_pronoun_mention_pairs) > 0 else 1.0
this_prec = len(intersection) / len(pred_pronoun_mention_pairs) if len(pred_pronoun_mention_pairs) > 0 else 1.0
this_f1 = 2 * this_recall * this_prec / (this_recall + this_prec) if this_recall + this_prec > 0 else 0
p += [this_prec]
r += [this_recall]
f1 += [this_f1]
print('gold_singletons: {}, pred_singletons: {} intersection: {}'.format(total_gold_singletons, total_pred_singletons, total_singleton_intersection))
print('num_gold: {}, num_pred: {}, P: {}, R: {} F1: {}'.format(num_gold_pairs, num_pred_pairs, sum(p) / len(p), sum(r) / len(r), sum(f1) / len(f1)))
#print(sum(pronoun_text.values()), sorted(list(pronoun_text.items()), key=lambda k : k[1]))
示例11: print_clusters
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def print_clusters(data_file):
f = open(data_file)
for i, line in enumerate(f):
data = json.loads(line)
text = util.flatten(data['sentences'])
# clusters = [[text[s:e+1] for s,e in cluster] for cluster in data['clusters']]
#print(text)
for ci, cluster in enumerate(data['clusters']):
spans = [text[s:e+1] for s,e in cluster]
print(i, ci, spans)
if i > 5:
break
示例12: compare_json
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def compare_json(json1, json2):
json1 = read_file(json1)
json2 = read_file(json2)
for i, (l1, l2) in enumerate(zip(json1, json2)):
assert l1['doc_key'] == l2['doc_key']
if tuple(util.flatten(l1['sentences'])) != tuple(util.flatten(l2['sentences'])):
print(i, l1['doc_key'], list(enumerate(util.flatten(l1['sentences']))), list(enumerate(util.flatten(l2['sentences']))))
for j, (w1, w2) in enumerate(zip(util.flatten(l1['sentences']), util.flatten(l2['sentences']))):
if w1 != w2:
print(j, w1, w2)
break
示例13: avg_len
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def avg_len(data_file):
f = open(data_file)
total = 0
max_num_sp = 0
segments = []
for i, line in enumerate(f):
# print('---', line)
data = json.loads(line)
text = util.flatten(data['sentences'])
segments.append(len(data['sentences']))
total += len(text)
max_num_sp = max(max_num_sp, len(text))
print(total / i)
print(max_num_sp)
print(len(segments), sum(segments) / len(segments), max(segments), sum([1 for s in segments if s == 1]))
示例14: convert
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def convert(json_file, tsv_file):
data = read_json(json_file)
tsv = read_tsv_file(tsv_file) if tsv_file is not None else None
predictions = ['\t'.join(['ID', 'A-coref', 'B-coref'])]
for key, datum in data.items():
prediction = data[key]
sents = util.flatten(prediction['sentences'])
if tsv is not None:
print(list(enumerate(tsv[key])))
a_offset, b_offset, pronoun_offset = tuple(map(int, tsv[key][5].split(':'))), tuple(map(int, tsv[key][8].split(':'))), tuple(map(int, tsv[key][3].split(':')))
assert ' '.join(sents[a_offset[0]:a_offset[1]]) == tsv[key][4], (sents[a_offset[0]:a_offset[1]], tsv[key][4])
assert ' '.join(sents[b_offset[0]:b_offset[1]]) == tsv[key][7], (sents[b_offset[0]:b_offset[1]], tsv[key][7])
assert ' '.join(sents[pronoun_offset[0]:pronoun_offset[1]]) == tsv[key][2], (sents[pronoun_offset[0]:pronoun_offset[1]], tsv[key][2])
# continue
pronoun_cluster = find_pronoun_cluster(prediction, prediction['pronoun_subtoken_span'])
a_coref, b_coref = 'FALSE', 'FALSE'
a_text, b_text = (tsv[key][4], tsv[key][7]) if tsv is not None else (None, None)
for span in pronoun_cluster:
a_aligned = is_aligned(span, prediction['a_subtoken_span']) if tsv is None else is_substring_aligned(span, sents, a_text)
b_aligned = is_aligned(span, prediction['b_subtoken_span']) if tsv is None else is_substring_aligned(span, sents, b_text)
if a_aligned:
a_coref = 'TRUE'
if b_aligned:
b_coref = 'TRUE'
predictions += ['\t'.join([key, a_coref, b_coref])]
# write file
with open(json_file.replace('jsonlines', 'tsv'), 'w') as f:
f.write('\n'.join(predictions))
示例15: tensorize_example
# 需要导入模块: import util [as 别名]
# 或者: from util import flatten [as 别名]
def tensorize_example(self, example, is_training):
clusters = example["clusters"]
gold_mentions = sorted(tuple(m) for m in util.flatten(clusters))
gold_mention_map = {m:i for i,m in enumerate(gold_mentions)}
cluster_ids = np.zeros(len(gold_mentions))
for cluster_id, cluster in enumerate(clusters):
for mention in cluster:
cluster_ids[gold_mention_map[tuple(mention)]] = cluster_id + 1
sentences = example["sentences"]
num_words = sum(len(s) for s in sentences)
speakers = util.flatten(example["speakers"])
assert num_words == len(speakers)
max_sentence_length = max(len(s) for s in sentences)
max_word_length = max(max(max(len(w) for w in s) for s in sentences), max(self.config["filter_widths"]))
text_len = np.array([len(s) for s in sentences])
tokens = [[""] * max_sentence_length for _ in sentences]
context_word_emb = np.zeros([len(sentences), max_sentence_length, self.context_embeddings.size])
head_word_emb = np.zeros([len(sentences), max_sentence_length, self.head_embeddings.size])
char_index = np.zeros([len(sentences), max_sentence_length, max_word_length])
for i, sentence in enumerate(sentences):
for j, word in enumerate(sentence):
tokens[i][j] = word
context_word_emb[i, j] = self.context_embeddings[word]
head_word_emb[i, j] = self.head_embeddings[word]
char_index[i, j, :len(word)] = [self.char_dict[c] for c in word]
tokens = np.array(tokens)
speaker_dict = { s:i for i,s in enumerate(set(speakers)) }
speaker_ids = np.array([speaker_dict[s] for s in speakers])
doc_key = example["doc_key"]
genre = self.genres[doc_key[:2]]
gold_starts, gold_ends = self.tensorize_mentions(gold_mentions)
lm_emb = self.load_lm_embeddings(doc_key)
example_tensors = (tokens, context_word_emb, head_word_emb, lm_emb, char_index, text_len, speaker_ids, genre, is_training, gold_starts, gold_ends, cluster_ids)
if is_training and len(sentences) > self.config["max_training_sentences"]:
return self.truncate_example(*example_tensors)
else:
return example_tensors