本文整理汇总了Python中joblib.Parallel方法的典型用法代码示例。如果您正苦于以下问题:Python joblib.Parallel方法的具体用法?Python joblib.Parallel怎么用?Python joblib.Parallel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类joblib
的用法示例。
在下文中一共展示了joblib.Parallel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def __init__(self, path, split, tokenizer, bucket_size, ascending=False):
# Setup
self.path = path
self.bucket_size = bucket_size
# List all wave files
file_list = []
for s in split:
split_list = list(Path(join(path, s)).rglob("*.flac"))
assert len(split_list) > 0, "No data found @ {}".format(join(path,s))
file_list += split_list
# Read text
text = Parallel(n_jobs=READ_FILE_THREADS)(
delayed(read_text)(str(f)) for f in file_list)
#text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
text = [tokenizer.encode(txt) for txt in text]
# Sort dataset by text length
#file_len = Parallel(n_jobs=READ_FILE_THREADS)(delayed(getsize)(f) for f in file_list)
self.file_list, self.text = zip(*[(f_name, txt)
for f_name, txt in sorted(zip(file_list, text), reverse=not ascending, key=lambda x:len(x[1]))])
示例2: get_graph_stats
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def get_graph_stats(graph_obj_handle, prop='degrees'):
# if prop == 'degrees':
num_cores = multiprocessing.cpu_count()
inputs = [int(i*len(graph_obj_handle)/num_cores) for i in range(num_cores)] + [len(graph_obj_handle)]
res = Parallel(n_jobs=num_cores)(delayed(get_values)(graph_obj_handle, inputs[i], inputs[i+1], prop) for i in range(num_cores))
stat_dict = {}
if 'degrees' in prop:
stat_dict['degrees'] = list(set([d for core_res in res for file_res in core_res for d in file_res['degrees']]))
if 'edge_labels' in prop:
stat_dict['edge_labels'] = list(set([d for core_res in res for file_res in core_res for d in file_res['edge_labels']]))
if 'target_mean' in prop or 'target_std' in prop:
param = np.array([file_res['params'] for core_res in res for file_res in core_res])
if 'target_mean' in prop:
stat_dict['target_mean'] = np.mean(param, axis=0)
if 'target_std' in prop:
stat_dict['target_std'] = np.std(param, axis=0)
return stat_dict
示例3: build_save_containers
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def build_save_containers(platforms, registry, load_cache) -> int:
"""
Entry point to build and upload all built dockerimages in parallel
:param platforms: List of platforms
:param registry: Docker registry name
:param load_cache: Load cache before building
:return: 1 if error occurred, 0 otherwise
"""
from joblib import Parallel, delayed
if len(platforms) == 0:
return 0
platform_results = Parallel(n_jobs=len(platforms), backend="multiprocessing")(
delayed(_build_save_container)(platform, registry, load_cache)
for platform in platforms)
is_error = False
for platform_result in platform_results:
if platform_result is not None:
logging.error('Failed to generate %s', platform_result)
is_error = True
return 1 if is_error else 0
示例4: next_minibatch
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def next_minibatch(self):
image_filenames_minibatch = self.image_filenames[self.current_index: self.current_index + self.minibatch_size]
label_filenames_minibatch = self.label_filenames[self.current_index: self.current_index + self.minibatch_size]
self.current_index += self.minibatch_size
if self.current_index >= self.dataset_size:
self.current_index = 0
# Multithread image processing
# Reference: https://www.kaggle.com/inoryy/fast-image-pre-process-in-parallel
results = Parallel(n_jobs=self.num_jobs)(delayed(self.process_func)(image_filename, label_filename) for image_filename, label_filename in zip(image_filenames_minibatch, label_filenames_minibatch))
images, labels = zip(*results)
images = np.asarray(images)
labels = np.asarray(labels)
return images, labels
示例5: wrapper_compute_average_precision
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def wrapper_compute_average_precision(self):
"""Computes average precision for each class in the subset.
"""
ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))
recall = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))
precision = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))
matched_gt_id = np.zeros((len(self.tiou_thresholds), len(self.prediction)))
results = Parallel(n_jobs=len(self.activity_index))(
delayed(compute_average_precision_detection)(
ground_truth=self.ground_truth.loc[self.ground_truth['label'] == cidx].reset_index(drop=True),
prediction=self.prediction.loc[self.prediction['label'] == cidx].reset_index(drop=True),
tiou_thresholds=self.tiou_thresholds,
normalize_ap=self.normalize_ap,
average_num_instance_per_class=self.average_num_instance_per_class,
minimum_normalized_precision_threshold_for_detection=self.minimum_normalized_precision_threshold_for_detection,
) for cidx in self.activity_index.values())
for i, cidx in enumerate(self.activity_index.values()):
ap[:,cidx], matched_this_cls_gt_id, this_cls_prediction_ids, recall[:,cidx], precision[:,cidx] = results[i]
matched_gt_id[:,this_cls_prediction_ids] = matched_this_cls_gt_id
return ap, matched_gt_id, recall, precision
示例6: wrapper_analyze_fp_error_types
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def wrapper_analyze_fp_error_types(self):
self.fp_error_types_legned = {'True Positive': 0,
'Double Detection Err': 1,
'Wrong Label Err': 2,
'Localization Err': 3,
'Confusion Err': 4,
'Background Err': 5}
self.fp_error_types_inverse_legned = dict([(v, k) for k, v in self.fp_error_types_legned.iteritems()])
fp_error_types = Parallel(n_jobs=len(self.tiou_thresholds))(
delayed(analyze_fp_error_types)(
prediction=self.prediction,
ground_truth=self.ground_truth,
tiou_thr=tiou_thr,
matched_gt_id_col_name=matched_gt_id_col_name,
min_tiou_thr=self.min_tiou_thr,
fp_error_types_legned=self.fp_error_types_legned,
) for tiou_thr, matched_gt_id_col_name in zip(self.tiou_thresholds, self.matched_gt_id_cols))
return fp_error_types
示例7: _parallel_predict
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def _parallel_predict(self, contexts: np.ndarray, is_predict: bool):
# Total number of contexts to predict
n_contexts = len(contexts)
# Partition contexts by job
n_jobs, n_contexts, starts = self._partition_contexts(n_contexts)
total_contexts = sum(n_contexts)
# Get seed value for each context
seeds = self.rng.randint(np.iinfo(np.int32).max, size=total_contexts)
# Perform parallel predictions
predictions = Parallel(n_jobs=n_jobs, backend=self.backend)(
delayed(self._predict_contexts)(
contexts[starts[i]:starts[i + 1]],
is_predict,
seeds[starts[i]:starts[i + 1]],
starts[i])
for i in range(n_jobs))
# Reduce
predictions = list(chain.from_iterable(t for t in predictions))
return predictions if len(predictions) > 1 else predictions[0]
示例8: fit
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def fit(self,X):
def func(ss):
length = len(ss.unique())
if length <= 1:
return True
else:
return False
df = X.data
todo_cols = X.cat_cols + X.multi_cat_cols + X.num_cols + X.time_cols + X.binary_cols
res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
drop_cols = []
for col,unique in zip(todo_cols,res):
if unique:
drop_cols.append(col)
self.drop_cols = drop_cols
示例9: recognize_binary_col
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def recognize_binary_col(self,data,cat_cols):
def func(ss):
ss = ss.unique()
if len(ss) == 3:
if pd.isna(ss).sum() == 1:
return True
if len(ss) == 2:
return True
return False
binary_cols = []
res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols)
for col,is_binary in zip(cat_cols,res):
if is_binary:
binary_cols.append(col)
return binary_cols
示例10: simulate_walks
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0):
layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl')
layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl')
layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl')
gamma = pd.read_pickle(self.temp_path+'gamma.pkl')
walks = []
initialLayer = 0
nodes = self.idx # list(self.g.nodes())
results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in
partition_num(num_walks, workers))
walks = list(itertools.chain(*results))
return walks
示例11: prefer_parallel_execution
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def prefer_parallel_execution(functions_to_be_called): # pragma: no cover
try:
import joblib
import multiprocessing
except ImportError:
print('Joblib not installed, switching to serial execution')
[run_function(fn) for fn in functions_to_be_called]
else:
try:
import tqdm
except ImportError:
inputs = functions_to_be_called
else:
inputs = tqdm.tqdm(functions_to_be_called)
n_jobs = multiprocessing.cpu_count()
print('Parallelizing execution using Joblib')
joblib.Parallel(n_jobs=n_jobs)(
joblib.delayed(run_function)(fn) for fn in inputs)
示例12: parallelize
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def parallelize(bucket, only, _except, fn, args=(), versions=False):
bucket = s3().Bucket(bucket)
# use prefix for performance
prefix = None
if only:
# get the first prefix before wildcard
prefix = '/'.join(only.split('*')[0].split('/')[:-1])
if prefix:
prefix = prefix + '/'
if versions:
object_versions = bucket.object_versions.filter(Prefix=prefix) if prefix else bucket.object_versions.all()
# delete markers have no size
return Parallel(n_jobs=24)(delayed(fn)(bucket.name, ov.object_key, ov.id, *args) for ov in object_versions if object_matches(ov.object_key, only, _except) and not ov.is_latest and ov.size is not None)
else:
objects = bucket.objects.filter(Prefix=prefix) if prefix else bucket.objects.all()
if only and not '*' in only:
objects = [s3().Object(bucket, only)]
return Parallel(n_jobs=24)(delayed(fn)(bucket.name, os.key, *args) for os in objects if object_matches(os.key, only, _except))
示例13: recompute_factors_batched
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def recompute_factors_batched(Y, S, lambda_reg, W=None, X=None,
dtype='float32', batch_size=10000, n_jobs=4):
m = S.shape[0] # m = number of users
f = Y.shape[1] # f = number of factors
YTY = np.dot(Y.T, Y) # precompute this
YTYpR = YTY + lambda_reg * np.eye(f)
if W is not None:
WX = lambda_reg * (X.dot(W)).T
else:
WX = None
X_new = np.zeros((m, f), dtype=dtype)
num_batches = int(np.ceil(m / float(batch_size)))
res = Parallel(n_jobs=n_jobs)(delayed(solve_batch)(b, S, Y, WX, YTYpR,
batch_size, m, f, dtype)
for b in xrange(num_batches))
X_new = np.concatenate(res, axis=0)
return X_new
示例14: _joblib_resample_A_given_W
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def _joblib_resample_A_given_W(self, data):
"""
Resample A given W. This must be immediately followed by an
update of z | A, W. This version uses joblib to parallelize
over columns of A.
:return:
"""
# Use the module trick to avoid copying globals
import pyhawkes.internals.parallel_adjacency_resampling as par
par.model = self.model
par.data = data
par.K = self.model.K
if len(data) == 0:
self.A = np.random.rand(self.K, self.K) < self.network.P
return
# We can naively parallelize over receiving neurons, k2
# To avoid serializing and copying the data object, we
# manually extract the required arrays Sk, Fk, etc.
A_cols = Parallel(n_jobs=-1, backend="multiprocessing")(
delayed(par._resample_column_of_A)(k2)for k2 in range(self.K))
self.A = np.array(A_cols).T
示例15: build_strain_specific_models
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import Parallel [as 别名]
def build_strain_specific_models(self, joblib=False, cores=1, force_rerun=False):
"""Wrapper function for _build_strain_specific_model"""
if len(self.df_orthology_matrix) == 0:
raise RuntimeError('Empty orthology matrix, please calculate first!')
ref_functional_genes = [g.id for g in self.reference_gempro.functional_genes]
log.info('Building strain specific models...')
if joblib:
result = DictList(Parallel(n_jobs=cores)(delayed(self._build_strain_specific_model)(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun) for s in self.strain_ids))
# if sc:
# strains_rdd = sc.parallelize(self.strain_ids)
# result = strains_rdd.map(self._build_strain_specific_model).collect()
else:
result = []
for s in tqdm(self.strain_ids):
result.append(self._build_strain_specific_model(s, ref_functional_genes, self.df_orthology_matrix, force_rerun=force_rerun))
for strain_id, gp_noseqs_path in result:
self.strain_infodict[strain_id]['gp_noseqs_path'] = gp_noseqs_path