本文整理汇总了Python中sklearn.grid_search.RandomizedSearchCV方法的典型用法代码示例。如果您正苦于以下问题:Python grid_search.RandomizedSearchCV方法的具体用法?Python grid_search.RandomizedSearchCV怎么用?Python grid_search.RandomizedSearchCV使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.grid_search
的用法示例。
在下文中一共展示了grid_search.RandomizedSearchCV方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: GridParamSearch
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def GridParamSearch(param_dist, clf, X, y, n_iter_search=15) :
'''
Searches using rand.search for best model paramters
diff paramters searched by model type..
http://nbviewer.ipython.org/github/treycausey/thespread/blob/master/notebooks/basic_random_forest_wp_model.ipynb?create=1
@param clf: estimator/predictor used.
@param param_dist: Grid of Parameter ranges to tune for the predictor,
using randomized CV search.
'''
print("Starting grid parameter search")
random_search = RandomizedSearchCV(clf, param_distributions=param_dist,
n_iter=n_iter_search,n_jobs=-1)
start = time()
# random_search.fit(features, target)
random_search.fit(X, y)
print("RandomizedSearchCV took %.2f seconds for %d candidates"
" parameter settings." % ((time() - start), n_iter_search))
report(random_search.grid_scores_)
示例2: svm_class_and_score
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def svm_class_and_score(
X_train, y_train, X_test, y_test, labels, search_type=RandomizedSearchCV,
parameter_space={
'kernel': ['linear', 'rbf', 'poly'], 'gamma': ['auto', 1e-3, 1e-4],
'C': [0.01, .1, 1, 10, 100, 1000],
'class_weight': [
{0: 0.01}, {1: 1}, {1: 2}, {1: 10}, {1: 50}, 'balanced']},
score='recall_weighted', iid=True, bagged=False, svm_results=True):
"""Build an SVM and return its scoring metrics
"""
print("# Tuning hyper-parameters for %s" % score)
print()
# Find the Hyperparameters
clf = search_type(SVC(C=1), parameter_space, cv=10,
scoring=score, iid=iid)
# Build the SVM
clf.fit(X_train, y_train)
print("Hyperparameters found:")
print(clf.best_params_)
# Make the predictions
y_pred = clf.predict(X_test)
print()
print()
print("Results for basic SVM")
clf_scoring(y_test, y_pred, labels)
if bagged is True:
bgg = BaggingClassifier(base_estimator=clf)
bgg.fit(X_train, y_train)
y_pred = bgg.predict(X_test)
print()
print()
print("Results for bagging:")
clf_scoring(y_test, y_pred, labels)
return clf, bgg
else:
return clf
示例3: runGridSearch
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def runGridSearch(self, model):
logging.debug("run grid search on model: {}".format(model.__class__.__name__))
logging.debug("cross validation strategy: {}".format(model.holdout_split))
logging.debug("used features: {}".format(model.usedFeatures))
logging.debug("tuned parameters: {}".format(model.getTunedParamterOptions()))
features,labels,cv = model.getFeaturesLabel()
# do grid search
if self.do_random_gridsearch:
estimator = RandomizedSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv, n_jobs=self.n_jobs,
scoring=mean_absolute_percentage_error_scoring, verbose = 500, n_iter=self.n_iter_randomsearch)
else:
estimator = GridSearchCV(model.clf, model.getTunedParamterOptions(), cv=cv,n_jobs=-self.n_jobs,
fit_params=model.get_fit_params(),
scoring=mean_absolute_percentage_error_scoring, verbose = 500)
estimator.fit(features, labels)
model.clf = estimator.best_estimator_
model.save_final_model = True
model.save_model()
# model.dispFeatureImportance()
logging.debug('estimaator parameters: {}'.format(estimator.get_params))
logging.debug('Best parameters: {}'.format(estimator.best_params_))
logging.debug('Best Scores: {}'.format(-estimator.best_score_))
logging.debug('Score grid: {}'.format(estimator.grid_scores_ ))
for i in estimator.grid_scores_ :
logging.debug('parameters: {}'.format(i.parameters ))
logging.debug('mean_validation_score: {}'.format(np.absolute(i.mean_validation_score)))
logging.debug('cv_validation_scores: {}'.format(np.absolute(i.cv_validation_scores) ))
return
示例4: create_classif_search
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def create_classif_search(name_clf, clf_pipeline, nb_labels,
search_type='random', cross_val=10,
eval_metric='f1', nb_iter=250, nb_workers=5):
""" create sklearn search depending on spec. random or grid
:param int nb_labels: number of labels
:param str search_type: hyper-params search type
:param str eval_metric: evaluation metric
:param int nb_iter: for random number of tries
:param str name_clf: name of classif.
:param obj clf_pipeline: object
:param obj cross_val: obj specific CV for fix train-test
:param int nb_workers: number jobs running in parallel
:return:
"""
score_weight = 'weighted' if nb_labels > 2 else 'binary'
scoring = metrics.make_scorer(DICT_SCORING[eval_metric.lower()],
average=score_weight)
if search_type == 'grid':
clf_parameters = create_clf_param_search_grid(name_clf)
logging.info('init Grid search...')
clf_search = GridSearchCV(
clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
n_jobs=nb_workers, verbose=1, refit=True)
else:
clf_parameters = create_clf_param_search_distrib(name_clf)
nb_iter = search_params_cut_down_max_nb_iter(clf_parameters, nb_iter)
logging.info('init Randomized search...')
clf_search = RandomizedSearchCV(
clf_pipeline, clf_parameters, scoring=scoring, cv=cross_val,
n_jobs=nb_workers, n_iter=nb_iter, verbose=1, refit=True)
return clf_search
示例5: test_RandomGlobalParams
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_RandomGlobalParams(self):
clf = RandomizedSearchCV(
self.__estimator__(layers=[L("Sigmoid")], n_iter=1),
param_distributions={'learning_rate': uniform(0.001, 0.01)},
n_iter=2)
clf.fit(self.a_in, self.a_out)
示例6: test_RandomLayerParams
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_RandomLayerParams(self):
clf = RandomizedSearchCV(
self.__estimator__(layers=[L("Rectifier", units=12), L(self.__output__)], n_iter=1),
param_distributions={'hidden0__units': randint(4, 12)},
n_iter=2)
clf.fit(self.a_in, self.a_out)
示例7: test_RandomMultipleJobs
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_RandomMultipleJobs(self):
clf = RandomizedSearchCV(
self.__estimator__(layers=[L("Sigmoid", units=12), L(self.__output__)], n_iter=1),
param_distributions={'hidden0__units': randint(4, 12)},
n_iter=4, n_jobs=4)
clf.fit(self.a_in, self.a_out)
示例8: test_example_randomized_search
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_example_randomized_search(self):
# The classic example from the sklearn documentation
iris = datasets.load_iris()
parameters = {'kernel': ('linear', 'rbf'), 'C': range(1, 10)}
svr = svm.SVC()
clf = grid_search.RandomizedSearchCV(svr, parameters, random_state=4)
clf.fit(iris.data, iris.target)
clf2 = RandomizedSearchCV(self.sc, svr, parameters, random_state=4)
clf2.fit(iris.data, iris.target)
b1 = clf.estimator
b2 = clf2.estimator
self.assertEqual(b1.get_params(), b2.get_params())
示例9: test_cv_linreg
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_cv_linreg(self):
pipeline = SKL_Pipeline([
('lasso', SKL_Lasso(max_iter=1))
])
parameters = {
'lasso__alpha': np.linspace(0.001, 0.01, 1000)
}
n_iter = 10
grid_search = RandomizedSearchCV(self.sc, pipeline, parameters, n_iter=n_iter)
X = scipy.sparse.vstack(map(lambda x: self.list2csr([x, x+1.0]), range(0, 100)))
y = np.array(list(range(0, 100))).reshape((100, 1))
skl_gs = grid_search.fit(X, y)
assert len(skl_gs.cv_results_['params']) == n_iter
示例10: fit_estimator
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def fit_estimator(estimator,
positive_data_matrix=None,
negative_data_matrix=None,
target=None,
cv=10,
n_jobs=-1,
n_iter_search=40,
random_state=1):
"""fit_estimator."""
# hyperparameter optimization
param_dist = {"n_iter": randint(5, 100),
"power_t": uniform(0.1),
"alpha": uniform(1e-08, 1e-03),
"eta0": uniform(1e-03, 1),
"penalty": ["l1", "l2", "elasticnet"],
"learning_rate": ["invscaling", "constant", "optimal"]}
scoring = 'roc_auc'
n_iter_search = n_iter_search
random_search = RandomizedSearchCV(estimator,
param_distributions=param_dist,
n_iter=n_iter_search,
cv=cv,
scoring=scoring,
n_jobs=n_jobs,
random_state=random_state,
refit=True)
X, y = make_data_matrix(positive_data_matrix=positive_data_matrix,
negative_data_matrix=negative_data_matrix,
target=target)
random_search.fit(X, y)
logger.debug('\nClassifier:')
logger.debug('%s' % random_search.best_estimator_)
logger.debug('\nPredictive performance:')
# assess the generalization capacity of the model via a 10-fold cross
# validation
scoring_strings = ['accuracy', 'precision', 'recall', 'f1',
'average_precision', 'roc_auc']
for scoring in scoring_strings:
scores = cross_validation.cross_val_score(
random_search.best_estimator_,
X,
y,
cv=cv,
scoring=scoring,
n_jobs=n_jobs)
logger.debug('%20s: %.3f +- %.3f' %
(scoring, np.mean(scores), np.std(scores)))
return random_search.best_estimator_
示例11: test_large_grid
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def test_large_grid():
"""In this test, we purposely overfit a RandomForest to completely random data
in order to assert that the test error will far supercede the train error.
"""
if not SK18:
custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
else:
custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)
# define the pipe
pipe = Pipeline([
('scaler', SelectiveScaler()),
('pca', SelectivePCA(weight=True)),
('rf', RandomForestClassifier(random_state=42))
])
# define hyper parameters
hp = {
'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
'pca__whiten': [True, False],
'pca__weight': [True, False],
'pca__n_components': uniform(0.75, 0.15),
'rf__n_estimators': randint(5, 10),
'rf__max_depth': randint(5, 15)
}
# define the grid
grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)
# this will fail because we haven't fit yet
assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)
# fit the grid
grid.fit(X_train, y_train)
# score for coverage -- this might warn...
with warnings.catch_warnings():
warnings.simplefilter("ignore")
grid.score(X_train, y_train)
# coverage:
assert grid._estimator_type == 'classifier'
# get predictions
tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)
# evaluate score (SHOULD be better than random...)
accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)
# grid score reports:
# assert fails for bad percentile
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})
# assert fails for bad y_axis
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})
# assert passes otherwise
report_grid_score_detail(grid, charts=True, percentile=0.95) # just ensure percentile works
示例12: main_local
# 需要导入模块: from sklearn import grid_search [as 别名]
# 或者: from sklearn.grid_search import RandomizedSearchCV [as 别名]
def main_local(log_dir, ntrain=800, ntest=200, niter=5, nsplits=3,
global_inhibition=True, ncores=4, seed=None):
"""
Perform CV on a subset of the MNIST dataset. Performs parallelizations on
a local machine.
@param log_dir: The directory to store the results in.
@param ntrain: The number of training samples to use.
@param ntest: The number of testing samples to use.
@param niter: The number of parameter iterations to use.
@param nsplits: The number of splits of the data to use.
@param global_inhibition: If True use global inhibition; otherwise, use
local inhibition.
@param ncores: The number of cores to use.
@param seed: The seed for the random number generators.
"""
# Run the initialization
x, y, kargs, params, cv = main(log_dir, ntrain, ntest, niter, nsplits,
seed)
# Build the classifier for doing CV
clf = RandomizedSearchCV(
estimator=SPRegion(**kargs),
param_distributions=params,
n_iter=niter, # Total runs
n_jobs=ncores, # Use this many number of cores
pre_dispatch=1 * ncores, # Give each core two jobs at a time
iid=True, # Data is iid across folds
cv=cv, # The CV split for the data
refit=False, # Disable fitting best estimator on full dataset
random_state=seed # Force same SP across runs
)
# Fit the models
clf.fit(x, y)
# Extract the CV results
parameter_names = sorted(clf.grid_scores_[0].parameters.keys())
parameter_names.pop(parameter_names.index('log_dir'))
parameter_values = np.zeros((niter, len(parameter_names)))
results = np.zeros((niter, nsplits))
for i, score in enumerate(clf.grid_scores_):
parameter_values[i] = np.array([score.parameters[k] for k in
parameter_names])
results[i] = score.cv_validation_scores
# Save the CV results
with open(os.path.join(log_dir, 'cv_results.pkl'), 'wb') as f:
cPickle.dump((parameter_names, parameter_values, results), f,
cPickle.HIGHEST_PROTOCOL)
with open(os.path.join(log_dir, 'cv_clf.pkl'), 'wb') as f:
cPickle.dump((clf.grid_scores_, clf.best_score_, clf.best_params_), f,
cPickle.HIGHEST_PROTOCOL)