本文整理匯總了Python中multiprocess.Pool方法的典型用法代碼示例。如果您正苦於以下問題:Python multiprocess.Pool方法的具體用法?Python multiprocess.Pool怎麽用?Python multiprocess.Pool使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類multiprocess
的用法示例。
在下文中一共展示了multiprocess.Pool方法的12個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: format_to_bert
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def format_to_bert(args):
if (args.dataset != ''):
datasets = [args.dataset]
else:
datasets = ['train', 'valid', 'test']
for corpus_type in datasets:
a_lst = []
for json_f in glob.glob(pjoin(args.raw_path, '*' + corpus_type + '.*.json')):
real_name = json_f.split('/')[-1]
a_lst.append((corpus_type, json_f, args, pjoin(args.save_path, real_name.replace('json', 'bert.pt'))))
print(a_lst)
pool = Pool(args.n_cpus)
for d in pool.imap(_format_to_bert, a_lst):
pass
pool.close()
pool.join()
示例2: format_to_bert
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def format_to_bert(args):
if (args.dataset != ''):
datasets = [args.dataset]
else:
datasets = ['train', 'valid', 'test']
for corpus_type in datasets:
a_lst = []
for json_f in glob.glob(pjoin(args.raw_path, '*' + corpus_type + '.*.json')):
real_name = json_f.split('/')[-1]
a_lst.append((json_f, args, pjoin(args.save_path, real_name.replace('json', 'bert.pt'))))
print(a_lst)
pool = Pool(args.n_cpus)
for d in pool.imap(_format_to_bert, a_lst):
pass
pool.close()
pool.join()
示例3: format_xsum_to_lines
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def format_xsum_to_lines(args):
if (args.dataset != ''):
datasets = [args.dataset]
else:
datasets = ['train', 'test', 'valid']
corpus_mapping = json.load(open(pjoin(args.raw_path, 'XSum-TRAINING-DEV-TEST-SPLIT-90-5-5.json')))
for corpus_type in datasets:
mapped_fnames = corpus_mapping[corpus_type]
root_src = pjoin(args.raw_path, 'restbody')
root_tgt = pjoin(args.raw_path, 'firstsentence')
# realnames = [fname.split('.')[0] for fname in os.listdir(root_src)]
realnames = mapped_fnames
a_lst = [(root_src, root_tgt, n) for n in realnames]
pool = Pool(args.n_cpus)
dataset = []
p_ct = 0
for d in pool.imap_unordered(_format_xsum_to_lines, a_lst):
if (d is None):
continue
dataset.append(d)
if (len(dataset) > args.shard_size):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
pool.close()
pool.join()
if (len(dataset) > 0):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
示例4: update_catalog_collection
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def update_catalog_collection(elements, max_miller, n_processes=1, mp_query=None):
'''
This function will add enumerate and add adsorption sites to our `catalog`
Mongo collection.
Args:
elements A list of strings indicating the elements you are
looking for, e.g., ['Cu', 'Al']
max_miller An integer indicating the maximum Miller index to be
enumerated
n_processes An integer indicating how many threads you want to use
when running the tasks. If you do not expect many
updates, stick to the default of 1, or go up to 4. If
you are re-creating your collection from scratch, you
may want to want to increase this argument as high as
you can.
mp_query We get our bulks from The Materials Project. This
dictionary argument is used as a Mongo query to The
Materials Project Database. If you do not supply this
argument, then it will automatically filter out bulks
whose energies above the hull are greater than 0.1 eV
and whose formation energy per atom are above 0 eV.
'''
# Python doesn't like mutable arguments
if mp_query is None:
mp_query = {}
# Figure out the MPIDs we need to enumerate
get_mpid_task = _GetMpids(elements=elements, mp_query=mp_query)
schedule_tasks([get_mpid_task])
mpids = get_task_output(get_mpid_task)
# For each MPID, enumerate all the sites and then add them to our `catalog`
# Mongo collection. Do this in parallel because it can be.
if n_processes > 1:
with multiprocess.Pool(n_processes) as pool:
list(pool.imap(func=lambda mpid: __run_insert_to_catalog_task(mpid, max_miller),
iterable=mpids, chunksize=20))
else:
for mpid in mpids:
__run_insert_to_catalog_task(mpid, max_miller)
示例5: Pool
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def Pool(n=1): # workaround
class mpool():
def map(self,f,xs):
return [f(x) for x in xs]
def terminate(self): return None # dummy function
return mpool()
示例6: set_cores
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def set_cores(n=1):
global cores
cores = n
#mainpool = None
#def initialize():
# global mainpool
# if cores>1:
# mainpool = Pool(cores) # create pool
# return mainpool
#def finish(): mainpool=None # delete pool
示例7: pcall_mp
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def pcall_mp(fun,args,cores=cores):
"""Calls a function for every input in args"""
mainpool = Pool(cores) # create pool
# print("Using",cores,"cores")
out = mainpool.map(fun,args) # return list
mainpool.terminate() # clear the pool
del mainpool # delete pool
return out
#except:
# print("Multiprocessing not found, running in a single core")
# def pcall_mp(fun,args,cores=1): return pcall_serial(fun,args)
示例8: format_to_lines
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def format_to_lines(args):
corpus_mapping = {}
for corpus_type in ['valid', 'test', 'train']:
temp = []
for line in open(pjoin(args.map_path, 'mapping_' + corpus_type + '.txt')):
temp.append(hashhex(line.strip()))
corpus_mapping[corpus_type] = {key.strip(): 1 for key in temp}
train_files, valid_files, test_files = [], [], []
for f in glob.glob(pjoin(args.raw_path, '*.json')):
real_name = f.split('/')[-1].split('.')[0]
if (real_name in corpus_mapping['valid']):
valid_files.append(f)
elif (real_name in corpus_mapping['test']):
test_files.append(f)
elif (real_name in corpus_mapping['train']):
train_files.append(f)
# else:
# train_files.append(f)
corpora = {'train': train_files, 'valid': valid_files, 'test': test_files}
for corpus_type in ['train', 'valid', 'test']:
a_lst = [(f, args) for f in corpora[corpus_type]]
pool = Pool(args.n_cpus)
dataset = []
p_ct = 0
for d in pool.imap_unordered(_format_to_lines, a_lst):
dataset.append(d)
if (len(dataset) > args.shard_size):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
# save.write('\n'.join(dataset))
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
pool.close()
pool.join()
if (len(dataset) > 0):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
# save.write('\n'.join(dataset))
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
示例9: format_to_lines
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def format_to_lines(args):
corpus_mapping = {}
for corpus_type in ['valid', 'test', 'train']:
temp = []
for line in open(pjoin(args.map_path, 'mapping_' + corpus_type + '.txt')):
temp.append(hashhex(line.strip()))
corpus_mapping[corpus_type] = {key.strip(): 1 for key in temp}
train_files, valid_files, test_files = [], [], []
for f in glob.glob(pjoin(args.raw_path, '*.json')):
real_name = f.split('/')[-1].split('.')[0]
if (real_name in corpus_mapping['valid']):
valid_files.append(f)
elif (real_name in corpus_mapping['test']):
test_files.append(f)
elif (real_name in corpus_mapping['train']):
train_files.append(f)
corpora = {'train': train_files, 'valid': valid_files, 'test': test_files}
for corpus_type in ['train', 'valid', 'test']:
a_lst = [(f, args) for f in corpora[corpus_type]]
pool = Pool(args.n_cpus)
dataset = []
p_ct = 0
for d in pool.imap_unordered(_format_to_lines, a_lst):
dataset.append(d)
if (len(dataset) > args.shard_size):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
# save.write('\n'.join(dataset))
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
pool.close()
pool.join()
if (len(dataset) > 0):
pt_file = "{:s}.{:s}.{:d}.json".format(args.save_path, corpus_type, p_ct)
with open(pt_file, 'w') as save:
# save.write('\n'.join(dataset))
save.write(json.dumps(dataset))
p_ct += 1
dataset = []
示例10: tabix
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def tabix(bins, pairs_path, cool_path, metadata, assembly, nproc, zero_based, max_split, **kwargs):
"""
Bin a tabix-indexed contact list file.
{}
See also: 'cooler csort' to sort and index a contact list file
Tabix manpage: <http://www.htslib.org/doc/tabix.html>.
"""
logger = get_logger(__name__)
chromsizes, bins = parse_bins(bins)
if metadata is not None:
with open(metadata, 'r') as f:
metadata = json.load(f)
try:
if nproc > 1:
pool = Pool(nproc)
logger.info("Using {} cores".format(nproc))
map = pool.imap
else:
map = six.moves.map
opts = {}
if 'chrom2' in kwargs:
opts['C2'] = kwargs['chrom2'] - 1
if 'pos2' in kwargs:
opts['P2'] = kwargs['pos2'] - 1
iterator = TabixAggregator(
pairs_path,
chromsizes,
bins,
map=map,
is_one_based=(not zero_based),
n_chunks=max_split,
**opts
)
create_cooler(
cool_path, bins, iterator,
metadata=metadata,
assembly=assembly,
ordered=True)
finally:
if nproc > 1:
pool.close()
示例11: pairix
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def pairix(bins, pairs_path, cool_path, metadata, assembly, nproc, zero_based, max_split):
"""
Bin a pairix-indexed contact list file.
{}
See also: 'cooler csort' to sort and index a contact list file
Pairix on GitHub: <https://github.com/4dn-dcic/pairix>.
"""
logger = get_logger(__name__)
chromsizes, bins = parse_bins(bins)
if metadata is not None:
with open(metadata, 'r') as f:
metadata = json.load(f)
try:
if nproc > 1:
pool = Pool(nproc)
logger.info("Using {} cores".format(nproc))
map = pool.imap
else:
map = six.moves.map
iterator = PairixAggregator(
pairs_path,
chromsizes,
bins,
map=map,
is_one_based=(not zero_based),
n_chunks=max_split)
create_cooler(
cool_path, bins, iterator,
metadata=metadata,
assembly=assembly,
ordered=True)
finally:
if nproc > 1:
pool.close()
示例12: multimap
# 需要導入模塊: import multiprocess [as 別名]
# 或者: from multiprocess import Pool [as 別名]
def multimap(function, inputs, chunked=False, processes=32, maxtasksperchild=1,
chunksize=1, n_calcs=None):
'''
This function is a wrapper to parallelize a function.
Args:
function The function you want to execute
inputs An iterable that yields proper arguments to the
function
chunked A Boolean indicating whether your function expects
single arguments or "chunked" iterables, e.g.,
lists.
processes The number of threads/processes you want to be using
maxtasksperchild The maximum number of tasks that a child process
may do before terminating (and therefore clearing
its memory cache to avoid memory overload).
chunksize How many calculations you want to have each single
processor do per task. Smaller chunks means more
memory shuffling. Bigger chunks means more RAM
requirements.
n_calcs How many calculations you have. Only necessary for
adding a percentage timer to the progress bar.
Returns:
outputs A list of the inputs mapped through the function
'''
# Collect garbage before we begin multiprocessing to make sure we don't
# pass things we don't need to
gc.collect()
# If we have one thread, there's no use multiprocessing
if processes == 1:
output = [function(input_) for input_ in tqdm(inputs, total=n_calcs)]
return output
with Pool(processes=processes, maxtasksperchild=maxtasksperchild) as pool:
# Use multiprocessing to perform the calculations. We use imap instead
# of map so that we get an iterator, which we need for tqdm (the
# progress bar) to work. imap also requires less disk memory, which
# can be an issue for some of our large systems.
if not chunked:
iterator = pool.imap(function, inputs, chunksize=chunksize)
total = n_calcs
outputs = list(tqdm(iterator, total=total))
# If our function expects chunks, then we have to unpack our inputs
# appropriately
else:
iterator = pool.imap(function, _chunk(inputs, n=chunksize))
total = n_calcs / chunksize
outputs = list(np.concatenate(list(tqdm(iterator, total=total))))
return outputs