本文整理汇总了Python中sklearn.externals.joblib.Parallel方法的典型用法代码示例。如果您正苦于以下问题:Python joblib.Parallel方法的具体用法?Python joblib.Parallel怎么用?Python joblib.Parallel使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.externals.joblib
的用法示例。
在下文中一共展示了joblib.Parallel方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _do_fit
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _do_fit(n_jobs, verbose, pre_dispatch, base_estimator,
X, y, scorer, parameter_iterable, fit_params,
error_score, cv, **kwargs):
groups = kwargs.pop('groups')
# test_score, n_samples, parameters
out = Parallel(n_jobs=n_jobs, verbose=verbose, pre_dispatch=pre_dispatch)(
delayed(_fit_and_score)(
clone(base_estimator), X, y, scorer,
train, test, verbose, parameters,
fit_params=fit_params,
return_train_score=False,
return_n_test_samples=True,
return_times=False,
return_parameters=True,
error_score=error_score)
for parameters in parameter_iterable
for train, test in cv.split(X, y, groups))
# test_score, n_samples, _, parameters
return [(mod[0], mod[1], None, mod[2]) for mod in out]
示例2: batch_predict
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def batch_predict(fn):
def _predict(self, df, preprocessor=None, **kwargs):
# print('Is given instance a df? ', isinstance(df, pd.DataFrame))
if isinstance(df, pd.DataFrame):
if preprocessor:
preprocessor(df)
rows = []
if self.n_jobs != 1:
with Parallel(n_jobs=self.n_jobs, verbose=self.verbose, backend=self.backend) as parallel:
rows = parallel([delayed(fn)(*(self, row), **kwargs) for idx, row in df.iterrows()])
else:
with tqdm(total=df.shape[0]) as pbar:
for idx, row in df.iterrows():
rows.append(fn(self, row, **{**row, **kwargs}))
pbar.update()
return rows
else:
return fn(self, df, **kwargs)
return _predict
示例3: calc_fitness
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def calc_fitness(self,X,labels,fit_choice,sel):
"""computes fitness of individual output yhat.
yhat: output of a program.
labels: correct outputs
fit_choice: choice of fitness function
"""
if 'lexicase' in sel:
# return list(map(lambda yhat: self.f_vec[fit_choice](labels,yhat),X))
return np.asarray(
[self.proper(self.f_vec[fit_choice](labels,
yhat)) for yhat in X],
order='F')
# return list(Parallel(n_jobs=-1)(delayed(self.f_vec[fit_choice])(labels,yhat) for yhat in X))
else:
# return list(map(lambda yhat: self.f[fit_choice](labels,yhat),X))
return np.asarray([self.f[fit_choice](labels,yhat) for yhat in X],
order='F').reshape(-1)
# return list(Parallel(n_jobs=-1)(delayed(self.f[fit_choice])(labels,yhat) for yhat in X))
示例4: _generateFragments
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _generateFragments(self):
voc=set(self.vocabulary)
fpsdict = dict([(idx,{}) for idx in self.moldata.index])
nrows = self.moldata.shape[0]
counter = 0
with Parallel(n_jobs=self.n_jobs,verbose=self.verbose) as parallel:
while counter < nrows:
nextChunk = min(counter+(self.n_jobs*self.chunksize),nrows)
result = parallel(delayed(_generateMolFrags)(mollist, voc,
self.fragmentMethod,
self.fragIdx)
for mollist in self._produceDataChunks(counter,nextChunk,self.chunksize))
for r in result:
counter+=len(r)
fpsdict.update(r)
self.moldata['fps'] = np.array(sorted(fpsdict.items()))[:,1]
# construct the molecule-fragment matrix as input for the LDA algorithm
示例5: fit_transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def fit_transform(self, X, y=None, **fit_params):
self._validate_transformers()
result = Parallel(n_jobs=self.n_jobs)(
delayed(_fit_transform_one)(
transformer=trans,
X=X,
y=y,
weight=weight,
**fit_params)
for name, trans, weight in self._iter())
if not result:
# All transformers are None
return np.zeros((X.shape[0], 0))
Xs, transformers = zip(*result)
self._update_transformer_list(transformers)
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = self.merge_dataframes_by_column(Xs)
return Xs
示例6: transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def transform(self, X):
Xs = Parallel(n_jobs=self.n_jobs)(
delayed(_transform_one)(
transformer=trans,
X=X,
y=None,
weight=weight)
for name, trans, weight in self._iter())
if not Xs:
# All transformers are None
return np.zeros((X.shape[0], 0))
if any(sparse.issparse(f) for f in Xs):
Xs = sparse.hstack(Xs).tocsr()
else:
Xs = self.merge_dataframes_by_column(Xs)
return Xs
示例7: fit_transform
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def fit_transform(self, X, y=None, **fit_params):
"""
Fits the transformer using ``X`` (and possibly ``y``). Transforms
``X`` using the transformers, uses :func:`pandas.concat`
to horizontally concatenate the results.
Returns:
``self``
"""
verify_x_type(X)
verify_y_type(y)
Xts = joblib.Parallel(n_jobs=self.n_jobs)(
joblib.delayed(_fit_transform)(trans, weight, X, y, **fit_params) for _, trans, weight in self._iter())
return self.__concat(Xts)
示例8: _base_est_fit
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _base_est_fit(self, X, y, **fit_params):
"""Fit the base estimators on X and y.
"""
fit_params_ests = self._extract_fit_params(**fit_params)
_jobs = []
for name, est in self.estimator_list[:-1]:
_jobs.append(delayed(_fit_est)(
clone(est), X, y, **fit_params_ests[name]))
_out = Parallel(
n_jobs=self.n_jobs,
verbose=self.verbose,
pre_dispatch=self.pre_dispatch)(_jobs)
for name, _ in self.estimator_list[:-1]:
self._replace_est('estimator_list', name, _out.pop(0))
示例9: _run_algorithm
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _run_algorithm(self):
""" Runs nearest neighbor (NN) identification and feature scoring to yield SURF scores. """
sm = cnt = 0
for i in range(self._datalen):
sm += sum(self._distance_array[i])
cnt += len(self._distance_array[i])
avg_dist = sm / float(cnt)
nan_entries = np.isnan(self._X)
NNlist = [self._find_neighbors(datalen, avg_dist) for datalen in range(self._datalen)]
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
SURF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
示例10: _distarray_missing
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _distarray_missing(self, xc, xd, cdiffs):
"""Distance array calculation for data with missing values"""
cindices = []
dindices = []
# Get Boolean mask locating missing values for continuous and discrete features separately. These correspond to xc and xd respectively.
for i in range(self._datalen):
cindices.append(np.where(np.isnan(xc[i]))[0])
dindices.append(np.where(np.isnan(xd[i]))[0])
if self.n_jobs != 1:
dist_array = Parallel(n_jobs=self.n_jobs)(delayed(get_row_missing)(
xc, xd, cdiffs, index, cindices, dindices) for index in range(self._datalen))
else:
# For each instance calculate distance from all other instances (in non-redundant manner) (i.e. computes triangle, and puts zeros in for rest to form square).
dist_array = [get_row_missing(xc, xd, cdiffs, index, cindices, dindices)
for index in range(self._datalen)]
return np.array(dist_array)
#==================================================================#
############################# ReliefF ############################################
示例11: _run_algorithm
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _run_algorithm(self):
""" Runs nearest neighbor (NN) identification and feature scoring to yield ReliefF scores. """
# Find nearest neighbors
NNlist = map(self._find_neighbors, range(self._datalen))
# Feature scoring - using identified nearest neighbors
nan_entries = np.isnan(self._X) # boolean mask for missing data values
# Call the scoring method for the ReliefF algorithm
scores = np.sum(Parallel(n_jobs=self.n_jobs)(delayed(
ReliefF_compute_scores)(instance_num, self.attr, nan_entries, self._num_attributes, self.mcmap,
NN, self._headers, self._class_type, self._X, self._y, self._labels_std, self.data_type)
for instance_num, NN in zip(range(self._datalen), NNlist)), axis=0)
return np.array(scores)
示例12: _extract_and_write
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def _extract_and_write(self, X, neighbor_id_lists, distances_to_neighbors, fileName = "l2r_train", y = None):
labels_in_neighborhood = Parallel(n_jobs=self.n_jobs)(
delayed(_create_training_samples)(cur_doc, neighbor_list, X, y, cur_doc + 1, distances_to_neighbors,
self.count_concepts, self.count_terms, self.number_of_concepts,
self.ibm1 if self.n_jobs == 1 and self.translation_probability else None) for cur_doc, neighbor_list in enumerate(neighbor_id_lists))
doc_to_neighborhood_dict = self._merge_dicts(labels_in_neighborhood)
filenames = ["samples_" + str(qid + 1) + ".tmp" for qid in range(len(doc_to_neighborhood_dict))]
with open(fileName, 'w') as outfile:
for fname in filenames:
with open(fname) as infile:
for line in infile:
outfile.write(line)
outfile.write('\n')
return doc_to_neighborhood_dict
示例13: fit_score
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def fit_score(self, X, Y):
if isinstance(self.cv, int):
n_folds = self.cv
self.cv = KFold(n_splits=n_folds).split(X)
# Formatting is kinda ugly but provides best debugging view
out = Parallel(n_jobs=self.n_jobs,
verbose=self.verbose,
pre_dispatch=self.pre_dispatch)\
(delayed(_fit_and_score)(clone(self.clf), X, Y, self.metric,
train, test, self.verbose, {},
{}, return_parameters=False,
error_score='raise')
for train, test in self.cv)
# Out is a list of triplet: score, estimator, n_test_samples
scores = list(zip(*out))[0]
return np.mean(scores), np.std(scores)
示例14: setupGamma
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def setupGamma(self, ranking_size):
if self.gammaRankingSize is not None and self.gammaRankingSize==ranking_size:
print("UniformPolicy:setupGamma [INFO] Gamma has been pre-computed for this ranking_size. Size of Gamma cache:", len(self.gammas), flush=True)
return
gammaFile=Settings.DATA_DIR+self.dataset.name+'_'+self.name+'_'+str(ranking_size)+'.z'
if os.path.exists(gammaFile):
self.gammas=joblib.load(gammaFile)
self.gammaRankingSize=ranking_size
print("UniformPolicy:setupGamma [INFO] Using precomputed gamma", gammaFile, flush=True)
else:
self.gammas={}
self.gammaRankingSize=ranking_size
candidateSet=set(self.dataset.docsPerQuery)
responses=joblib.Parallel(n_jobs=-2, verbose=50)(joblib.delayed(UniformGamma)(i, ranking_size, self.allowRepetitions) for i in candidateSet)
for tup in responses:
self.gammas[tup[0]]=tup[1]
joblib.dump(self.gammas, gammaFile, compress=9, protocol=-1)
print("", flush=True)
print("UniformPolicy:setupGamma [INFO] Finished creating Gamma_pinv cache. Size", len(self.gammas), flush=True)
示例15: fit
# 需要导入模块: from sklearn.externals import joblib [as 别名]
# 或者: from sklearn.externals.joblib import Parallel [as 别名]
def fit(self, Z, **fit_params):
"""TODO: rewrite docstring
Fit all transformers using X.
Parameters
----------
X : array-like or sparse matrix, shape (n_samples, n_features)
Input data, used to fit transformers.
"""
fit_params_steps = dict((step, {})
for step, _ in self.transformer_list)
for pname, pval in six.iteritems(fit_params):
step, param = pname.split('__', 1)
fit_params_steps[step][param] = pval
transformers = Parallel(n_jobs=self.n_jobs, backend="threading")(
delayed(_fit_one_transformer)(trans, Z, **fit_params_steps[name])
for name, trans in self.transformer_list)
self._update_transformer_list(transformers)
return self