本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV.fit方法的具体用法?Python RandomizedSearchCV.fit怎么用?Python RandomizedSearchCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.RandomizedSearchCV
的用法示例。
在下文中一共展示了RandomizedSearchCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_large_grid
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_large_grid():
"""In this test, we purposely overfit a RandomForest to completely random data
in order to assert that the test error will far supercede the train error.
"""
if not SK18:
custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
else:
custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)
# define the pipe
pipe = Pipeline([
('scaler', SelectiveScaler()),
('pca', SelectivePCA(weight=True)),
('rf', RandomForestClassifier(random_state=42))
])
# define hyper parameters
hp = {
'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
'pca__whiten': [True, False],
'pca__weight': [True, False],
'pca__n_components': uniform(0.75, 0.15),
'rf__n_estimators': randint(5, 10),
'rf__max_depth': randint(5, 15)
}
# define the grid
grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)
# this will fail because we haven't fit yet
assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)
# fit the grid
grid.fit(X_train, y_train)
# score for coverage -- this might warn...
with warnings.catch_warnings():
warnings.simplefilter("ignore")
grid.score(X_train, y_train)
# coverage:
assert grid._estimator_type == 'classifier'
# get predictions
tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)
# evaluate score (SHOULD be better than random...)
accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)
# grid score reports:
# assert fails for bad percentile
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})
# assert fails for bad y_axis
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})
# assert passes otherwise
report_grid_score_detail(grid, charts=True, percentile=0.95) # just ensure percentile works
示例2: fit
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def fit(x, y, estimator, dataframe, params):
vectorizer = CountVectorizer(stop_words=['go', '', ' '], binary=False, lowercase=True)
vectorizer.fit(dataframe[x].values)
fresh_estimator = clone(estimator)
x_np, y_np, feature_names, selector = \
select_features(
df = dataframe,
vectorizer=vectorizer,
feature_col=x,
label_col=y,
select_method=None,
continuous_col=None
)
estimator = RandomizedSearchCV(estimator, params, n_iter=60, cv=3, n_jobs=3, refit=True)
estimator.fit(x_np, y_np)
best_params = estimator.best_params_
if method not in ['lr', 'svm']:
print("Calibrating...")
estimator = CalibratedClassifierCV(fresh_estimator.set_params(**best_params), 'isotonic', 3)
estimator.fit(x_np, y_np)
from sklearn.base import _pprint
_pprint(estimator.get_params(deep=True), offset=2)
return estimator, selector, vectorizer
示例3: search
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def search(self, search_space, search_iter, n_estimators, x, y):
if 'n_estimators' in search_space:
del search_space['n_estimators']
params = {
'boosting_type': ['gbdt'],
'min_child_weight': [5],
'min_split_gain': [1.0],
'subsample': [0.8],
'colsample_bytree': [0.6],
'max_depth': [10],
'n_estimators': n_estimators,
'num_leaves': [70],
'learning_rate': [0.04],
}
params.update(search_space)
if self.verbose:
print(params)
folds = 3
score_metric, skf = self.get_skf(folds)
random_search = RandomizedSearchCV(self.lgbm, param_distributions=params, n_iter=search_iter,
scoring=score_metric,
n_jobs=1, cv=skf, verbose=0, random_state=1001)
random_search.fit(x, y)
self.clf = random_search.best_estimator_
return random_search.best_params_
示例4: parameter_search
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def parameter_search(model, X, y, params, metric, n=10):
'''
returns the best parameters of the classification model
'''
random_search = RandomizedSearchCV(model, param_distributions=params, \
scoring = metric, n_jobs=3, n_iter=n)
random_search.fit(X, y)
return random_search
示例5: pr_curve
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def pr_curve(i):
label = labels[i]
statistics_l = Statistics()
print('Doing label {}'.format(label))
for train_idx, valid_idx in folds:
rng = np.random.RandomState()
rng.seed(seeds[i])
training_fold = developement_df.loc[train_idx, ]
training_fold = training_fold.reset_index(drop=True)
validation_fold = developement_df.loc[valid_idx, ]
validation_fold = validation_fold.reset_index(drop=True)
base_estimators = make_classifiers(method, balanced, labels, random_state=rng)
# Find the best params, then do a final proper calibration.
base_estimator = base_estimators[label]
estimator = RandomizedSearchCV(
estimator=base_estimator, param_distributions=params,
n_iter=60, scoring='f1', cv=3, random_state=rng,
error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
refit=True
)
# Set up the vectorizer for the bag-of-words representation
if vectorizer_method == 'tf-idf':
vectorizer = TfidfVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True,
sublinear_tf=False, max_df=1.0, min_df=0
)
vectorizer.fit(training_fold['terms'].values)
elif vectorizer_method == 'count':
vectorizer = CountVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True
)
vectorizer.fit(training_fold['terms'].values)
# Fit an evaluate the performance of the classifier.
x_train = vectorizer.transform(training_fold['terms'].values)
y_train = np.asarray(training_fold[label].values, dtype=int)
x_valid = vectorizer.transform(validation_fold['terms'].values)
y_valid = np.asarray(validation_fold[label].values, dtype=int)
estimator.fit(x_train, y_train)
for t in thresholds:
y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)
statistics_l.frame()['reaction'] = label
return statistics_l
示例6: test_pickle
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_pickle():
# Test that a fit search can be pickled
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
grid_search.fit(X, y)
pickle.dumps(grid_search) # smoke test
random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
refit=True, n_iter=3)
random_search.fit(X, y)
pickle.dumps(random_search) # smoke test
示例7: test_trivial_cv_results_attr
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_trivial_cv_results_attr():
# Test search over a "grid" with only one point.
# Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1]})
grid_search.fit(X, y)
assert_true(hasattr(grid_search, "cv_results_"))
random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
random_search.fit(X, y)
assert_true(hasattr(grid_search, "cv_results_"))
示例8: build_nn
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def build_nn(x_train, y_train, x_test, y_test, n_features):
"""
Constructing a regression neural network model from input dataframe
:param x_train: features dataframe for model training
:param y_train: target dataframe for model training
:param x_test: features dataframe for model testing
:param y_test: target dataframe for model testing
:return: None
"""
net = NeuralNet(layers=[('input', InputLayer),
('hidden0', DenseLayer),
('hidden1', DenseLayer),
('output', DenseLayer)],
input_shape=(None, x_train.shape[1]), # Number of i/p nodes = number of columns in x
hidden0_num_units=15,
hidden0_nonlinearity=lasagne.nonlinearities.softmax,
hidden1_num_units=17,
hidden1_nonlinearity=lasagne.nonlinearities.softmax,
output_num_units=1, # Number of o/p nodes = number of columns in y
output_nonlinearity=lasagne.nonlinearities.softmax,
max_epochs=100,
update_learning_rate=0.01,
regression=True,
verbose=0)
# Finding the optimal set of params for each variable in the training of the neural network
param_dist = {'hidden0_num_units':sp_randint(3, 30), 'hidden1_num_units':sp_randint(3, 30)}
clf = RandomizedSearchCV(estimator=net, param_distributions=param_dist,
n_iter=15, n_jobs=-1)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
# Mean absolute error regression loss
mean_abs = sklearn.metrics.mean_absolute_error(y_test, y_pred)
# Mean squared error regression loss
mean_sq = sklearn.metrics.mean_squared_error(y_test, y_pred)
# Median absolute error regression loss
median_abs = sklearn.metrics.median_absolute_error(y_test, y_pred)
# R^2 (coefficient of determination) regression score function
r2 = sklearn.metrics.r2_score(y_test, y_pred)
# Explained variance regression score function
exp_var_score = sklearn.metrics.explained_variance_score(y_test, y_pred)
with open('../trained_networks/nn_%d_data.pkl' % n_features, 'wb') as results:
pickle.dump(clf, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(net, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(mean_abs, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(mean_sq, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(median_abs, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(r2, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(exp_var_score, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(y_pred, results, pickle.HIGHEST_PROTOCOL)
return
示例9: test_randomgridsearch_slm
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_randomgridsearch_slm(make_gaus_data):
X, y, Xs, ys = make_gaus_data
slm = StandardLinearModel(LinearBasis(onescol=True))
param_dict = {
'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
}
estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)
estimator.fit(X, y)
Ey = estimator.predict(Xs)
assert len(ys) == len(Ey) # we just want to make sure this all runs
示例10: test_randomgridsearch_glm
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_randomgridsearch_glm(make_gaus_data):
X, y, Xs, ys = make_gaus_data
glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
random_state=1, maxiter=100)
param_dict = {'batch_size': range(1, 11)}
estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
n_iter=2)
estimator.fit(X, y)
Ey = estimator.predict(Xs)
assert len(ys) == len(Ey) # we just want to make sure this all runs
示例11: test_pickle
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_pickle():
# Test that a fit search can be pickled
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
grid_search.fit(X, y)
grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
assert_array_almost_equal(grid_search.predict(X),
grid_search_pickled.predict(X))
random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
refit=True, n_iter=3)
random_search.fit(X, y)
random_search_pickled = pickle.loads(pickle.dumps(random_search))
assert_array_almost_equal(random_search.predict(X),
random_search_pickled.predict(X))
示例12: test__extract_arfftrace
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test__extract_arfftrace(self):
param_grid = {"max_depth": [3, None],
"max_features": [1, 2, 3, 4],
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]}
num_iters = 10
task = openml.tasks.get_task(20)
clf = RandomizedSearchCV(RandomForestClassifier(), param_grid, num_iters)
# just run the task
train, _ = task.get_train_test_split_indices(0, 0)
X, y = task.get_X_and_y()
clf.fit(X[train], y[train])
trace_attribute_list = _extract_arfftrace_attributes(clf)
trace_list = _extract_arfftrace(clf, 0, 0)
self.assertIsInstance(trace_attribute_list, list)
self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
self.assertIsInstance(trace_list, list)
self.assertEquals(len(trace_list), num_iters)
# found parameters
optimized_params = set()
for att_idx in range(len(trace_attribute_list)):
att_type = trace_attribute_list[att_idx][1]
att_name = trace_attribute_list[att_idx][0]
if att_name.startswith("parameter_"):
# add this to the found parameters
param_name = att_name[len("parameter_"):]
optimized_params.add(param_name)
for line_idx in range(len(trace_list)):
val = json.loads(trace_list[line_idx][att_idx])
legal_values = param_grid[param_name]
self.assertIn(val, legal_values)
else:
# repeat, fold, itt, bool
for line_idx in range(len(trace_list)):
val = trace_list[line_idx][att_idx]
if isinstance(att_type, list):
self.assertIn(val, att_type)
elif att_name in ['repeat', 'fold', 'iteration']:
self.assertIsInstance(trace_list[line_idx][att_idx], int)
else: # att_type = real
self.assertIsInstance(trace_list[line_idx][att_idx], float)
self.assertEqual(set(param_grid.keys()), optimized_params)
示例13: model_param_search
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def model_param_search(estimator, X, y, param_dist, scoring,
n_iter=1, n_cv=5, verbose=10, random_state=1, model_id='model', save_search=True):
start = time.time()
random_search = RandomizedSearchCV(estimator, param_distributions=param_dist,
n_iter=n_iter, scoring=scoring, cv=n_cv,
verbose=verbose, random_state=random_state)
random_search.fit(X, y)
print('Best param: ', random_search.best_params_)
print('Best score: ', random_search.best_score_)
print('Best model: ', random_search.best_estimator_)
if save_search:
with open(model_id+'.pickle', 'wb') as f:
pickle.dump(random_search, f)
print('Time searching param for {}: {}'.format(
model_id, (time.time() - start) / 60))
return random_search.best_estimator_
示例14: test_grid_search_with_multioutput_data
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_grid_search_with_multioutput_data():
# Test search with multi-output estimator
X, y = make_multilabel_classification(return_indicator=True,
random_state=0)
est_parameters = {"max_depth": [1, 2, 3, 4]}
cv = KFold(random_state=0)
estimators = [DecisionTreeRegressor(random_state=0),
DecisionTreeClassifier(random_state=0)]
# Test with grid search cv
for est in estimators:
grid_search = GridSearchCV(est, est_parameters, cv=cv)
grid_search.fit(X, y)
res_params = grid_search.cv_results_['params']
for cand_i in range(len(res_params)):
est.set_params(**res_params[cand_i])
for i, (train, test) in enumerate(cv.split(X, y)):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(
correct_score,
grid_search.cv_results_['split%d_test_score' % i][cand_i])
# Test with a randomized search
for est in estimators:
random_search = RandomizedSearchCV(est, est_parameters,
cv=cv, n_iter=3)
random_search.fit(X, y)
res_params = random_search.cv_results_['params']
for cand_i in range(len(res_params)):
est.set_params(**res_params[cand_i])
for i, (train, test) in enumerate(cv.split(X, y)):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(
correct_score,
random_search.cv_results_['split%d_test_score'
% i][cand_i])
示例15: test_random_search_cv_results
# 需要导入模块: from sklearn.model_selection import RandomizedSearchCV [as 别名]
# 或者: from sklearn.model_selection.RandomizedSearchCV import fit [as 别名]
def test_random_search_cv_results():
# Make a dataset with a lot of noise to get various kind of prediction
# errors across CV folds and parameter settings
X, y = make_classification(n_samples=200, n_features=100, n_informative=3,
random_state=0)
# scipy.stats dists now supports `seed` but we still support scipy 0.12
# which doesn't support the seed. Hence the assertions in the test for
# random_search alone should not depend on randomization.
n_splits = 3
n_search_iter = 30
params = dict(C=expon(scale=10), gamma=expon(scale=0.1))
random_search = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
cv=n_splits, iid=False,
param_distributions=params)
random_search.fit(X, y)
random_search_iid = RandomizedSearchCV(SVC(), n_iter=n_search_iter,
cv=n_splits, iid=True,
param_distributions=params)
random_search_iid.fit(X, y)
param_keys = ('param_C', 'param_gamma')
score_keys = ('mean_test_score', 'mean_train_score',
'rank_test_score',
'split0_test_score', 'split1_test_score',
'split2_test_score',
'split0_train_score', 'split1_train_score',
'split2_train_score',
'std_test_score', 'std_train_score',
'mean_fit_time', 'std_fit_time',
'mean_score_time', 'std_score_time')
n_cand = n_search_iter
for search, iid in zip((random_search, random_search_iid), (False, True)):
assert_equal(iid, search.iid)
cv_results = search.cv_results_
# Check results structure
check_cv_results_array_types(cv_results, param_keys, score_keys)
check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
# For random_search, all the param array vals should be unmasked
assert_false(any(cv_results['param_C'].mask) or
any(cv_results['param_gamma'].mask))
check_cv_results_grid_scores_consistency(search)