本文整理汇总了Python中cytoolz.compose方法的典型用法代码示例。如果您正苦于以下问题:Python cytoolz.compose方法的具体用法?Python cytoolz.compose怎么用?Python cytoolz.compose使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类cytoolz
的用法示例。
在下文中一共展示了cytoolz.compose方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: all_of
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def all_of(inners, arg):
"""All of the inner valudators must pass.
The order of inner validators matters.
Parameters
----------
inners : List[validator]
Functions are applied from right to left so allof([rule1, rule2], arg) is
the same as rule1(rule2(arg)).
arg : Any
Value to be validated.
Returns
-------
arg : Any
Value maybe coerced by inner validators to the appropiate types
"""
return compose(*inners)(arg)
示例2: label
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def label(split):
start = time()
print('start processing {} split...'.format(split))
data_dir = join(DATA_DIR, split)
n_data = count_data(data_dir)
for i in range(n_data):
print('processing {}/{} ({:.2f}%%)\r'.format(i, n_data, 100*i/n_data),
end='')
with open(join(data_dir, '{}.json'.format(i))) as f:
data = json.loads(f.read())
tokenize = compose(list, _split_words)
art_sents = tokenize(data['article'])
abs_sents = tokenize(data['abstract'])
extracted, scores = get_extract_label(art_sents, abs_sents)
data['extracted'] = extracted
data['score'] = scores
with open(join(data_dir, '{}.json'.format(i)), 'w') as f:
json.dump(data, f, indent=4)
print('finished in {}'.format(timedelta(seconds=time()-start)))
示例3: label
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def label(data_dir, split):
start = time()
print('start processing {} split...'.format(split))
data_dir = os.path.join(data_dir, split)
n_data = count_data(data_dir)
for i in range(n_data):
print('processing {}/{} ({:.2f}%%)\r'.format(i, n_data, 100*i/n_data), end='')
with open(os.path.join(data_dir, '{}.json'.format(i))) as f:
data = json.loads(f.read())
tokenize = compose(list, _split_words)
art_sents = tokenize(data['article'])
abs_sents = tokenize(data['abstract'])
extracted, scores = get_extract_label(art_sents, abs_sents)
data['extracted'] = extracted
data['score'] = scores
with open(os.path.join(data_dir, '{}.json'.format(i)), 'w') as f:
json.dump(data, f, indent=4)
print('finished in {}'.format(timedelta(seconds=time()-start)))
示例4: build_batchers
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def build_batchers(word2id, data_dir, cuda, debug):
prepro = prepro_fn(args.max_art, args.max_abs)
def sort_key(sample):
src, target = sample
return (len(target), len(src))
batchify = compose(batchify_fn_copy(PAD, START, END, cuda=cuda),
convert_batch_copy(UNK, word2id))
train_loader = DataLoader(
MatchDataset('train', data_dir), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn)
val_loader = DataLoader(
MatchDataset('val', data_dir), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn
)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=False)
val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
single_run=True, fork=False)
return train_batcher, val_batcher
示例5: process
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def process(split, i):
data_dir = join(DATA_DIR, split)
with open(join(data_dir, '{}.json'.format(i))) as f:
data = json.loads(f.read())
tokenize = compose(list, _split_words)
art_sents = tokenize(data['article'])
abs_sents = tokenize(data['abstract'])
if art_sents and abs_sents: # some data contains empty article/abstract
extracted, scores = get_extract_label(art_sents, abs_sents)
else:
extracted, scores = [], []
data['extracted'] = extracted
data['score'] = scores
with open(join(data_dir, '{}.json'.format(i)), 'w') as f:
json.dump(data, f, indent=4)
示例6: build_batchers
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def build_batchers(net_type, word2id, cuda, debug):
assert net_type in ['ff', 'rnn']
prepro = prepro_fn_extract(args.max_word, args.max_sent)
def sort_key(sample):
src_sents, _ = sample
return len(src_sents)
batchify_fn = (batchify_fn_extract_ff if net_type == 'ff'
else batchify_fn_extract_ptr)
convert_batch = (convert_batch_extract_ff if net_type == 'ff'
else convert_batch_extract_ptr)
batchify = compose(batchify_fn(PAD, cuda=cuda),
convert_batch(UNK, word2id))
train_loader = DataLoader(
ExtractDataset('train'), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract
)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=not debug)
val_loader = DataLoader(
ExtractDataset('val'), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract
)
val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
single_run=True, fork=not debug)
return train_batcher, val_batcher
示例7: build_batchers
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def build_batchers(word2id, cuda, debug):
prepro = prepro_fn(args.max_art, args.max_abs)
def sort_key(sample):
src, target = sample
return (len(target), len(src))
batchify = compose(
batchify_fn_copy(PAD, START, END, cuda=cuda),
convert_batch_copy(UNK, word2id)
)
train_loader = DataLoader(
MatchDataset('train'), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn
)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=not debug)
val_loader = DataLoader(
MatchDataset('val'), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn
)
val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
single_run=True, fork=not debug)
return train_batcher, val_batcher
示例8: process
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def process(data_dir, split, i):
data_dir = os.path.join(data_dir, split)
with open(os.path.join(data_dir, '{}.json'.format(i))) as f:
data = json.loads(f.read())
tokenize = compose(list, _split_words)
art_sents = tokenize(data['article'])
abs_sents = tokenize(data['abstract'])
if art_sents and abs_sents: # some data contains empty article/abstract
extracted, scores = get_extract_label(art_sents, abs_sents)
else:
extracted, scores = [], []
data['extracted'] = extracted
data['score'] = scores
with open(os.path.join(data_dir, '{}.json'.format(i)), 'w') as f:
json.dump(data, f, indent=4)
示例9: build_batchers
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def build_batchers(data_dir, net_type, word2id, cuda, debug):
assert net_type in ['ff', 'rnn']
prepro = prepro_fn_extract(args.max_word, args.max_sent)
def sort_key(sample):
src_sents, _ = sample
return len(src_sents)
batchify_fn = (batchify_fn_extract_ff if net_type == 'ff'
else batchify_fn_extract_ptr)
convert_batch = (convert_batch_extract_ff if net_type == 'ff'
else convert_batch_extract_ptr)
batchify = compose(batchify_fn(PAD, cuda=cuda), convert_batch(UNK, word2id))
train_loader = DataLoader(
ExtractDataset('train', data_dir), batch_size=BUCKET_SIZE,
shuffle=not debug,
num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract)
val_loader = DataLoader(
ExtractDataset('val', data_dir), batch_size=BUCKET_SIZE,
shuffle=False, num_workers=4 if cuda and not debug else 0,
collate_fn=coll_fn_extract)
train_batcher = BucketedGenerater(train_loader, prepro, sort_key, batchify,
single_run=False, fork=False)
val_batcher = BucketedGenerater(val_loader, prepro, sort_key, batchify,
single_run=True, fork=False)
return train_batcher, val_batcher
示例10: test_extract_links
# 需要导入模块: import cytoolz [as 别名]
# 或者: from cytoolz import compose [as 别名]
def test_extract_links():
first_link = compose(tuple, next, iter, extract_links)
assert_equal(first_link("[[foo|bar]]"), ("Foo", "bar"))
assert_equal(first_link("[[foo]]"), ("Foo", "foo"))
assert_equal(first_link("[[File:picture!]] [[foo]]"), ("Foo", "foo"))
assert_equal(first_link("[[foo]]bar."), ("Foo", "foobar"))
assert_equal(first_link("[[baz|foobar]];"), ("Baz", "foobar"))
assert_equal(first_link("[[baz#quux]]"), ("Baz", "baz#quux"))
assert_equal(first_link("[[baz#quux|bla]]"), ("Baz", "bla"))
assert_equal(first_link("[[FOO_BAR|foo bar]]"), ("FOO BAR", "foo bar"))
# Links like these commonly occur in nlwiki (and presumably dewiki and
# other compounding languages):
assert_equal(first_link("foo[[baz|bar]]"), ("Baz", "foobar"))
# MediaWiki only considers alphabetic characters outside [[]] part of the
# anchor.
assert_equal(first_link("foo-[[bar]]"), ("Bar", "bar"))
assert_equal(first_link("[[bar]]/baz"), ("Bar", "bar"))
# XXX The following are broken. They do occur in the wild, e.g.,
# -18[[Celsius|°C]] and 700[[Megabyte|MB]]-cd (found in nlwiki dump).
# assert_equal(first_link("[[bar]]0"), ("Bar", "bar"))
# assert_equal(first_link("[[bar]]_"), ("Bar", "bar"))
# We're not interested in section links
assert_equal(first_link("[[#Some section|elsewhere]] [[other_article]]"),
("Other article", "other_article"))
# This construct appears in enwiki for chemical formulae etc., but also in
# nlwiki (and dewiki?) for more general compound nouns. The current
# handling may not be exactly what we want; any fix should update the test
# accordingly.
assert_equal(list(extract_links("[[Lithium|Li]][[Fluorine|F]]")),
[("Lithium", "Li"), ("Fluorine", "F")])
assert_equal(list(extract_links("[[tera-|tera]][[becquerel]]s")),
[("Tera-", "tera"), ("Becquerel", "becquerels")])
assert_equal(list(extract_links("""[[Lord's
prayer]]
[[Dismissal
(cricket)|dismissal]] [[Badass|Chuck
Norris]]""")),
[("Lord's prayer", "Lord's prayer"),
("Dismissal (cricket)", "dismissal"),
("Badass", "Chuck Norris")])
assert_equal(list(extract_links("[[C. Stephen Evans | Evans, C. Stephen]]")),
[('C. Stephen Evans', 'Evans, C. Stephen')])