本文整理汇总了Python中joblib.delayed方法的典型用法代码示例。如果您正苦于以下问题:Python joblib.delayed方法的具体用法?Python joblib.delayed怎么用?Python joblib.delayed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类joblib
的用法示例。
在下文中一共展示了joblib.delayed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_graph_stats
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def get_graph_stats(graph_obj_handle, prop='degrees'):
# if prop == 'degrees':
num_cores = multiprocessing.cpu_count()
inputs = [int(i*len(graph_obj_handle)/num_cores) for i in range(num_cores)] + [len(graph_obj_handle)]
res = Parallel(n_jobs=num_cores)(delayed(get_values)(graph_obj_handle, inputs[i], inputs[i+1], prop) for i in range(num_cores))
stat_dict = {}
if 'degrees' in prop:
stat_dict['degrees'] = list(set([d for core_res in res for file_res in core_res for d in file_res['degrees']]))
if 'edge_labels' in prop:
stat_dict['edge_labels'] = list(set([d for core_res in res for file_res in core_res for d in file_res['edge_labels']]))
if 'target_mean' in prop or 'target_std' in prop:
param = np.array([file_res['params'] for core_res in res for file_res in core_res])
if 'target_mean' in prop:
stat_dict['target_mean'] = np.mean(param, axis=0)
if 'target_std' in prop:
stat_dict['target_std'] = np.std(param, axis=0)
return stat_dict
示例2: __init__
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def __init__(self, path, split, tokenizer, bucket_size, ascending=False):
# Setup
self.path = path
self.bucket_size = bucket_size
# List all wave files
file_list = []
for s in split:
split_list = list(Path(join(path, s)).rglob("*.flac"))
assert len(split_list) > 0, "No data found @ {}".format(join(path,s))
file_list += split_list
# Read text
text = Parallel(n_jobs=READ_FILE_THREADS)(
delayed(read_text)(str(f)) for f in file_list)
#text = Parallel(n_jobs=-1)(delayed(tokenizer.encode)(txt) for txt in text)
text = [tokenizer.encode(txt) for txt in text]
# Sort dataset by text length
#file_len = Parallel(n_jobs=READ_FILE_THREADS)(delayed(getsize)(f) for f in file_list)
self.file_list, self.text = zip(*[(f_name, txt)
for f_name, txt in sorted(zip(file_list, text), reverse=not ascending, key=lambda x:len(x[1]))])
示例3: partial_fit
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def partial_fit(self, X, y, classes=None):
if self.partial_method == "gamma":
w_all = -np.log(self
.random_state
.random(size=(X.shape[0], self.nsamples))
.clip(min=1e-12, max=None))
appear_times = None
rng = None
elif self.partial_method == "poisson":
w_all = None
appear_times = self.random_state.poisson(1, size = (X.shape[0], self.nsamples))
rng = np.arange(X.shape[0])
else:
raise ValueError(_unexpected_err_msg)
Parallel(n_jobs=self.njobs, verbose=0, require="sharedmem")\
(delayed(self._partial_fit_single)\
(sample, w_all, appear_times, rng, X, y) \
for sample in range(self.nsamples))
示例4: next_minibatch
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def next_minibatch(self):
image_filenames_minibatch = self.image_filenames[self.current_index: self.current_index + self.minibatch_size]
label_filenames_minibatch = self.label_filenames[self.current_index: self.current_index + self.minibatch_size]
self.current_index += self.minibatch_size
if self.current_index >= self.dataset_size:
self.current_index = 0
# Multithread image processing
# Reference: https://www.kaggle.com/inoryy/fast-image-pre-process-in-parallel
results = Parallel(n_jobs=self.num_jobs)(delayed(self.process_func)(image_filename, label_filename) for image_filename, label_filename in zip(image_filenames_minibatch, label_filenames_minibatch))
images, labels = zip(*results)
images = np.asarray(images)
labels = np.asarray(labels)
return images, labels
示例5: main_kinetics400
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def main_kinetics400(v_root, f_root, dim=150):
print('extracting Kinetics400 ... ')
for basename in ['train_split', 'val_split']:
v_root_real = v_root + '/' + basename
if not os.path.exists(v_root_real):
print('Wrong v_root'); sys.exit()
f_root_real = '/scratch/local/ssd/htd/kinetics400/frame_full' + '/' + basename
print('Extract to: \nframe: %s' % f_root_real)
if not os.path.exists(f_root_real): os.makedirs(f_root_real)
v_act_root = glob.glob(os.path.join(v_root_real, '*/'))
v_act_root = sorted(v_act_root)
# if resume, remember to delete the last video folder
for i, j in tqdm(enumerate(v_act_root), total=len(v_act_root)):
v_paths = glob.glob(os.path.join(j, '*.mp4'))
v_paths = sorted(v_paths)
# for resume:
v_class = j.split('/')[-2]
out_dir = os.path.join(f_root_real, v_class)
if os.path.exists(out_dir): print(out_dir, 'exists!'); continue
print('extracting: %s' % v_class)
# dim = 150 (crop to 128 later) or 256 (crop to 224 later)
Parallel(n_jobs=32)(delayed(extract_video_opencv)(p, f_root_real, dim=dim) for p in tqdm(v_paths, total=len(v_paths)))
示例6: fit
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def fit(self,X):
def func(ss):
length = len(ss.unique())
if length <= 1:
return True
else:
return False
df = X.data
todo_cols = X.cat_cols + X.multi_cat_cols + X.num_cols + X.time_cols + X.binary_cols
res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(df[col]) for col in todo_cols)
drop_cols = []
for col,unique in zip(todo_cols,res):
if unique:
drop_cols.append(col)
self.drop_cols = drop_cols
示例7: recognize_binary_col
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def recognize_binary_col(self,data,cat_cols):
def func(ss):
ss = ss.unique()
if len(ss) == 3:
if pd.isna(ss).sum() == 1:
return True
if len(ss) == 2:
return True
return False
binary_cols = []
res = Parallel(n_jobs=CONSTANT.JOBS,require='sharedmem')(delayed(func)(data[col]) for col in cat_cols)
for col,is_binary in zip(cat_cols,res):
if is_binary:
binary_cols.append(col)
return binary_cols
示例8: prefer_parallel_execution
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def prefer_parallel_execution(functions_to_be_called): # pragma: no cover
try:
import joblib
import multiprocessing
except ImportError:
print('Joblib not installed, switching to serial execution')
[run_function(fn) for fn in functions_to_be_called]
else:
try:
import tqdm
except ImportError:
inputs = functions_to_be_called
else:
inputs = tqdm.tqdm(functions_to_be_called)
n_jobs = multiprocessing.cpu_count()
print('Parallelizing execution using Joblib')
joblib.Parallel(n_jobs=n_jobs)(
joblib.delayed(run_function)(fn) for fn in inputs)
示例9: parallelize
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def parallelize(bucket, only, _except, fn, args=(), versions=False):
bucket = s3().Bucket(bucket)
# use prefix for performance
prefix = None
if only:
# get the first prefix before wildcard
prefix = '/'.join(only.split('*')[0].split('/')[:-1])
if prefix:
prefix = prefix + '/'
if versions:
object_versions = bucket.object_versions.filter(Prefix=prefix) if prefix else bucket.object_versions.all()
# delete markers have no size
return Parallel(n_jobs=24)(delayed(fn)(bucket.name, ov.object_key, ov.id, *args) for ov in object_versions if object_matches(ov.object_key, only, _except) and not ov.is_latest and ov.size is not None)
else:
objects = bucket.objects.filter(Prefix=prefix) if prefix else bucket.objects.all()
if only and not '*' in only:
objects = [s3().Object(bucket, only)]
return Parallel(n_jobs=24)(delayed(fn)(bucket.name, os.key, *args) for os in objects if object_matches(os.key, only, _except))
示例10: recompute_factors_batched
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def recompute_factors_batched(Y, S, lambda_reg, W=None, X=None,
dtype='float32', batch_size=10000, n_jobs=4):
m = S.shape[0] # m = number of users
f = Y.shape[1] # f = number of factors
YTY = np.dot(Y.T, Y) # precompute this
YTYpR = YTY + lambda_reg * np.eye(f)
if W is not None:
WX = lambda_reg * (X.dot(W)).T
else:
WX = None
X_new = np.zeros((m, f), dtype=dtype)
num_batches = int(np.ceil(m / float(batch_size)))
res = Parallel(n_jobs=n_jobs)(delayed(solve_batch)(b, S, Y, WX, YTYpR,
batch_size, m, f, dtype)
for b in xrange(num_batches))
X_new = np.concatenate(res, axis=0)
return X_new
示例11: convert_video_wapper
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def convert_video_wapper(src_videos,
dst_videos,
cmd_format,
in_parallel=True):
commands = []
for src, dst in zip(src_videos, dst_videos):
cmd = cmd_format.format(src, dst)
commands.append(cmd)
logging.info("- {} commonds to excute".format(len(commands)))
if not in_parallel:
for i, cmd in enumerate(commands):
# if i % 100 == 0:
# logging.info("{} / {}: '{}'".format(i, len(commands), cmd))
exe_cmd(cmd=cmd)
else:
num_jobs = 24
logging.info("processing videos in parallel, num_jobs={}".format(num_jobs))
Parallel(n_jobs=num_jobs)(delayed(exe_cmd)(cmd) for cmd in commands)
示例12: fitEnsemble
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def fitEnsemble(self, normMean, samples, factor):
minWindowLength = 5
maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
self.logger.Log("Windows: %s" % windows)
correctTraining = 0
self.results = []
self.logger.Log("%s Fitting for a norm of %s" % (self.NAME, str(normMean)))
Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))
# Find best correctTraining
for i in range(len(self.results)):
if self.results[i].correct > correctTraining:
correctTraining = self.results[i].correct
# Remove Results that are no longer satisfactory
new_results = []
for i in range(len(self.results)):
if self.results[i].correct >= (correctTraining * factor):
new_results.append(self.results[i])
return new_results, correctTraining
示例13: fitEnsemble
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def fitEnsemble(self, windows, normMean, samples):
correctTraining = 0
self.results = []
self.logger.Log("%s Fitting for a norm of %s" % (self.NAME, str(normMean)))
Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))
# Find best correctTraining
for i in range(len(self.results)):
if self.results[i].score > correctTraining:
correctTraining = self.results[i].score
# Remove Results that are no longer satisfactory
new_results = []
self.logger.Log("Stored Models for Norm=%s" % normMean)
for i in range(len(self.results)):
if self.results[i].score >= (correctTraining * self.factor):
self.logger.Log("WindowLength:%s Features:%s TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
new_results.append(self.results[i])
return new_results
示例14: fitEnsemble
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def fitEnsemble(self, normMean, samples, factor):
minWindowLength = 5
maxWindowLength = getMax(samples, self.MAX_WINDOW_LENGTH)
windows = self.getWindowsBetween(minWindowLength, maxWindowLength)
self.logger.Log("Windows: %s" % windows)
correctTraining = 0
self.results = []
self.logger.Log("%s Fitting for a norm of %s" % (self.NAME, str(normMean)))
Parallel(n_jobs=-1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(normMean, samples, windows, i) for i in range(len(windows)))
# Find best correctTraining
for i in range(len(self.results)):
if self.results[i].correct > correctTraining:
correctTraining = self.results[i].correct
# Remove Results that are no longer satisfactory
new_results = []
for i in range(len(self.results)):
if self.results[i].correct >= (correctTraining * factor):
new_results.append(self.results[i])
return new_results, correctTraining
示例15: fitEnsemble
# 需要导入模块: import joblib [as 别名]
# 或者: from joblib import delayed [as 别名]
def fitEnsemble(self, NormMean, samples):
correctTraining = 0
self.results = []
self.logger.Log("%s Fitting for a norm of %s" % (self.NAME, str(NormMean)))
Parallel(n_jobs=1, backend="threading")(delayed(self.fitIndividual, check_pickle=False)(NormMean, samples, i) for i in range(len(self.windows)))
#Find best correctTraining
for i in range(len(self.results)):
if self.results[i].score > correctTraining:
correctTraining = self.results[i].score
self.logger.Log("CorrectTrain for a norm of %s" % (correctTraining))
# Remove Results that are no longer satisfactory
new_results = []
self.logger.Log("Stored Models for Norm=%s" % NormMean)
for i in range(len(self.results)):
if self.results[i].score >= (correctTraining * self.factor):
self.logger.Log("WindowLength:%s Features:%s TrainScore:%s" % (self.results[i].windowLength, self.results[i].features, self.results[i].score))
new_results.append(self.results[i])
return new_results, correctTraining