本文整理汇总了Python中sklearn.externals.joblib.delayed方法的典型用法代码示例。如果您正苦于以下问题:Python joblib.delayed方法的具体用法?Python joblib.delayed怎么用?Python joblib.delayed使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.externals.joblib
的用法示例。
在下文中一共展示了joblib.delayed方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: batch_predict
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def batch_predict(fn):
def _predict(self, df, preprocessor=None, **kwargs):
# print('Is given instance a df? ', isinstance(df, pd.DataFrame))
if isinstance(df, pd.DataFrame):
if preprocessor:
preprocessor(df)
rows = []
if self.n_jobs != 1:
with Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend=self.backend) as parallel:
rows = parallel([delayed(fn)(*(self, row), **kwargs) for idx, row in df.iterrows()])
else:
with tqdm(total=df.shape[0]) as pbar:
for idx, row in df.iterrows():
rows.append(fn(self, row, **{**row, **kwargs}))
pbar.update()
return rows
else:
return fn(self, df, **kwargs)
return _predict
示例2: calc_fitness
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def calc_fitness(self,X,labels,fit_choice,sel):
"""computes fitness of individual output yhat.
yhat: output of a program.
labels: correct outputs
fit_choice: choice of fitness function
"""
if 'lexicase' in sel:
# return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
return np.asarray(
[self.proper(self.f_vec[fit_choice](labels,
yhat)) for yhat in X],
order='F')
# return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
else:
# return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
order='F').reshape(-1)
# return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
示例3: _generateFragments
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def _generateFragments(self):
voc=set(self.vocabulary)
fpsdict = dict([(idx,{}) for idx in self.moldata.index])
nrows = self.moldata.shape[0]
counter = 0
with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
while counter < nrows:
nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
result = parallel(delayed(_generateMolFrags)(mollist, voc,
self.fragmentMethod,
self.fragIdx)
for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
for r in result:
counter+=len(r)
fpsdict.update(r)
self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]
# construct the molecule-fragment matrix as input for the LDA algorithm
示例4: run
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def run(n_seeds, n_jobs, _run, _seed):
seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
size=n_seeds)
exps = []
exps += [{'method': 'sgd',
'step_size': step_size}
for step_size in np.logspace(-3, 3, 7)]
exps += [{'method': 'gram',
'reduction': reduction}
for reduction in [1, 4, 6, 8, 12, 24]]
rundir = join(basedir, str(_run._id), 'run')
if not os.path.exists(rundir):
os.makedirs(rundir)
Parallel(n_jobs=n_jobs,
verbose=10)(delayed(single_run)(config_updates, rundir, i)
for i, config_updates in enumerate(exps))
示例5: run
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def run(n_seeds, n_jobs, _run, _seed):
seed_list = check_random_state(_seed).randint(np.iinfo(np.uint32).max,
size=n_seeds)
exps = []
exps += [{'method': 'sgd',
'step_size': step_size}
for step_size in np.logspace(-7, -7, 1)]
exps += [{'method': 'gram',
'reduction': reduction}
for reduction in [12]]
rundir = join(basedir, str(_run._id), 'run')
if not os.path.exists(rundir):
os.makedirs(rundir)
Parallel(n_jobs=n_jobs,
verbose=10)(delayed(single_run)(config_updates, rundir, i)
for i, config_updates in enumerate(exps))
示例6: fit_transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def fit_transform(self, X, y=None, **fit_params):
"""
Fits the transformer using ``X`` (and possibly ``y``). Transforms
``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
Returns:
``self``
"""
verify_x_type(X)
verify_y_type(y)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
return self.__concat(Xts)
示例7: _base_est_fit
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def _base_est_fit(self, X, y, **fit_params):
"""Fit the base estimators on X and y.
"""
fit_params_ests = self._extract_fit_params(**fit_params)
_jobs = []
for name, est in self.estimator_list[:-1]:
_jobs.append(delayed(_fit_est)(
clone(est), X, y, **fit_params_ests[name]))
_out = Parallel(
n_jobs=self.n_jobs,
verbose=self.verbose,
pre_dispatch=self.pre_dispatch)(_jobs)
for name, _ in self.estimator_list[:-1]:
self._replace_est('estimator_list', name, _out.pop(0))
示例8: _run_algorithm
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def _run_algorithm(self):
""" Runs nearest neighbor (NN) identification and feature scoring to yield SURF scores. """
sm = cnt = 0
for i in range(self._datalen):
sm += sum(self._distance_array[i])
cnt += len(self._distance_array[i])
avg_dist = sm / float(cnt)
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
示例9: _distarray_missing
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def _distarray_missing(self, xc, xd, cdiffs):
"""Distance array calculation for data with missing values"""
cindices = []
dindices = []
# Get Boolean mask locating missing values for continuous and discrete features separately. These correspond to xc and xd respectively.
for i in range(self._datalen):
cindices.append(np.where(np.isnan(xc[i]))[0])
dindices.append(np.where(np.isnan(xd[i]))[0])
if self.n_jobs != 1:
dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(
xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
else:
# For each instance calculate distance from all other instances (in non-redundant manner) (i.e. computes triangle, and puts zeros in for rest to form square).
dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices)
for index in range(self._datalen)]
return np.array(dist_array)
#==================================================================#
############################# ReliefF ############################################
示例10: _run_algorithm
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def _run_algorithm(self):
""" Runs nearest neighbor (NN) identification and feature scoring to yield ReliefF scores. """
# Find nearest neighbors
NNlist = map(self._find_neighbors, range(self._datalen))
# Feature scoring - using identified nearest neighbors
nan_entries = np.isnan(self._X) # boolean mask for missing data values
# Call the scoring method for the ReliefF algorithm
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
ReliefF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
示例11: fit_score
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def fit_score(self, X, Y):
if isinstance(self.cv, int):
n_folds = self.cv
self.cv = KFold(n_splits=n_folds).split(X)
# Formatting is kinda ugly but provides best debugging view
out = Parallel(n_jobs=self.n_jobs,
verbose=self.verbose,
pre_dispatch=self.pre_dispatch)\
(delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric,
train, test, self.verbose, {},
{}, return_parameters=False,
error_score='raise')
for train, test in self.cv)
# Out is a list of triplet: score, estimator, n_test_samples
scores = list(zip(*out))[0]
return np.mean(scores), np.std(scores)
示例12: setupGamma
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def setupGamma(self, ranking_size):
if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size:
print("UniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True)
return
gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z'
if os.path.exists(gammaFile):
self.gammas=joblib.load(gammaFile)
self.gammaRankingSize=ranking_size
print("UniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True)
else:
self.gammas={}
self.gammaRankingSize=ranking_size
candidateSet=set(self.dataset.docsPerQuery)
responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(UniformGamma)(i, ranking_size, self.allowRepetitions) for i in candidateSet)
for tup in responses:
self.gammas[tup[0]]=tup[1]
joblib.dump(self.gammas, gammaFile, compress=9, protocol=-1)
print("", flush=True)
print("UniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True)
示例13: compute_splits_parallel
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def compute_splits_parallel(G, output_path, owa=True, train_frac=0.51, num_fe_train=None, num_fe_test=None,
num_splits=10):
r"""
Computes in parallel the required number of train/test splits of edges and non-edges from an input graph
and writes the data to files. The train sets are always connected / weakly connected and span all nodes
of the input graph. Input graphs (digraphs) cannot contain more than one (weakly) connected component.
Parameters
----------
G : graph
A NetworkX graph
output_path : string
Indicates the path where data will be stored. Can include a name for all splits to share.
owa : bool, optional
Encodes the belief that the network respects or not the open world assumption. Default is True.
If OWA=True, false train edges can be true test edges. False edges sampled from train graph.
If OWA=False, closed world is assumed so false train edges are known to be false (not in G)
train_frac : float, optional
The relative size (in range (0.0, 1.0]) of the train set with respect to the total number of edges in the graph.
Default is 0.51.
num_fe_train : int, optional
The number of train false edges to generate. Default is same number as true train edges.
num_fe_test : int, optional
The number of test false edges to generate. Default is same number as true test edges.
num_splits : int, optional
The number of train/test splits to generate. Default is 10.
"""
# Compute the splits sequentially or in parallel
backend = 'multiprocessing'
path_func = delayed(_compute_one_split)
Parallel(n_jobs=num_splits, verbose=True, backend=backend)(
path_func(G, output_path, owa, train_frac, num_fe_train, num_fe_test, split) for split in range(num_splits))
示例14: transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def transform(self,x,inds=None,labels = None):
"""return a transformation of x using population outputs"""
if inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in inds]).transpose()
elif self._best_inds:
# return np.asarray(Parallel(n_jobs=10)(delayed(self.out)(I,x,labels,self.otype)
# for I in self._best_inds)).transpose()
return np.asarray(
[self.out(I,x,labels,self.otype) for I in self._best_inds]).transpose()
else:
return x
示例15: transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import delayed [as 别名]
def transform(self, X, y=None):
parallel = Parallel(
n_jobs=self.n_jobs,
pre_dispatch=self.pre_dispatch,
verbose=self.verbose
)
stats_list = parallel(delayed(self._get_stats)(X[i_smpl, :]) for i_smpl in range(len(X)))
return np.array(stats_list)