本文整理汇总了Python中sklearn.model_selection.RandomizedSearchCV类的典型用法代码示例。如果您正苦于以下问题:Python RandomizedSearchCV类的具体用法?Python RandomizedSearchCV怎么用?Python RandomizedSearchCV使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RandomizedSearchCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: search
def search(self, search_space, search_iter, n_estimators, x, y):
if 'n_estimators' in search_space:
del search_space['n_estimators']
params = {
'boosting_type': ['gbdt'],
'min_child_weight': [5],
'min_split_gain': [1.0],
'subsample': [0.8],
'colsample_bytree': [0.6],
'max_depth': [10],
'n_estimators': n_estimators,
'num_leaves': [70],
'learning_rate': [0.04],
}
params.update(search_space)
if self.verbose:
print(params)
folds = 3
score_metric, skf = self.get_skf(folds)
random_search = RandomizedSearchCV(self.lgbm, param_distributions=params, n_iter=search_iter,
scoring=score_metric,
n_jobs=1, cv=skf, verbose=0, random_state=1001)
random_search.fit(x, y)
self.clf = random_search.best_estimator_
return random_search.best_params_
示例2: parameter_search
def parameter_search(model, X, y, params, metric, n=10):
'''
returns the best parameters of the classification model
'''
random_search = RandomizedSearchCV(model, param_distributions=params, \
scoring = metric, n_jobs=3, n_iter=n)
random_search.fit(X, y)
return random_search
示例3: pr_curve
def pr_curve(i):
label = labels[i]
statistics_l = Statistics()
print('Doing label {}'.format(label))
for train_idx, valid_idx in folds:
rng = np.random.RandomState()
rng.seed(seeds[i])
training_fold = developement_df.loc[train_idx, ]
training_fold = training_fold.reset_index(drop=True)
validation_fold = developement_df.loc[valid_idx, ]
validation_fold = validation_fold.reset_index(drop=True)
base_estimators = make_classifiers(method, balanced, labels, random_state=rng)
# Find the best params, then do a final proper calibration.
base_estimator = base_estimators[label]
estimator = RandomizedSearchCV(
estimator=base_estimator, param_distributions=params,
n_iter=60, scoring='f1', cv=3, random_state=rng,
error_score=0.0, n_jobs=1, pre_dispatch='2*n_jobs',
refit=True
)
# Set up the vectorizer for the bag-of-words representation
if vectorizer_method == 'tf-idf':
vectorizer = TfidfVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True,
sublinear_tf=False, max_df=1.0, min_df=0
)
vectorizer.fit(training_fold['terms'].values)
elif vectorizer_method == 'count':
vectorizer = CountVectorizer(
stop_words=['go', '', ' '], binary=binary, lowercase=True
)
vectorizer.fit(training_fold['terms'].values)
# Fit an evaluate the performance of the classifier.
x_train = vectorizer.transform(training_fold['terms'].values)
y_train = np.asarray(training_fold[label].values, dtype=int)
x_valid = vectorizer.transform(validation_fold['terms'].values)
y_valid = np.asarray(validation_fold[label].values, dtype=int)
estimator.fit(x_train, y_train)
for t in thresholds:
y_pred = [int(p[1] >= t) for p in estimator.predict_proba(x_valid)]
precision = precision_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
recall = recall_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
f1 = f1_score(y_valid, y_pred, labels=[0, 1], pos_label=1)
statistics_l.update_statistics(label=t, s_type='Precision', data=precision)
statistics_l.update_statistics(label=t, s_type='Recall', data=recall)
statistics_l.update_statistics(label=t, s_type='F1-Score', data=f1)
statistics_l.frame()['reaction'] = label
return statistics_l
示例4: test_pickle
def test_pickle():
# Test that a fit search can be pickled
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
grid_search.fit(X, y)
pickle.dumps(grid_search) # smoke test
random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
refit=True, n_iter=3)
random_search.fit(X, y)
pickle.dumps(random_search) # smoke test
示例5: test_trivial_cv_results_attr
def test_trivial_cv_results_attr():
# Test search over a "grid" with only one point.
# Non-regression test: grid_scores_ wouldn't be set by GridSearchCV.
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1]})
grid_search.fit(X, y)
assert_true(hasattr(grid_search, "cv_results_"))
random_search = RandomizedSearchCV(clf, {'foo_param': [0]}, n_iter=1)
random_search.fit(X, y)
assert_true(hasattr(grid_search, "cv_results_"))
示例6: build_nn
def build_nn(x_train, y_train, x_test, y_test, n_features):
"""
Constructing a regression neural network model from input dataframe
:param x_train: features dataframe for model training
:param y_train: target dataframe for model training
:param x_test: features dataframe for model testing
:param y_test: target dataframe for model testing
:return: None
"""
net = NeuralNet(layers=[('input', InputLayer),
('hidden0', DenseLayer),
('hidden1', DenseLayer),
('output', DenseLayer)],
input_shape=(None, x_train.shape[1]), # Number of i/p nodes = number of columns in x
hidden0_num_units=15,
hidden0_nonlinearity=lasagne.nonlinearities.softmax,
hidden1_num_units=17,
hidden1_nonlinearity=lasagne.nonlinearities.softmax,
output_num_units=1, # Number of o/p nodes = number of columns in y
output_nonlinearity=lasagne.nonlinearities.softmax,
max_epochs=100,
update_learning_rate=0.01,
regression=True,
verbose=0)
# Finding the optimal set of params for each variable in the training of the neural network
param_dist = {'hidden0_num_units':sp_randint(3, 30), 'hidden1_num_units':sp_randint(3, 30)}
clf = RandomizedSearchCV(estimator=net, param_distributions=param_dist,
n_iter=15, n_jobs=-1)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
# Mean absolute error regression loss
mean_abs = sklearn.metrics.mean_absolute_error(y_test, y_pred)
# Mean squared error regression loss
mean_sq = sklearn.metrics.mean_squared_error(y_test, y_pred)
# Median absolute error regression loss
median_abs = sklearn.metrics.median_absolute_error(y_test, y_pred)
# R^2 (coefficient of determination) regression score function
r2 = sklearn.metrics.r2_score(y_test, y_pred)
# Explained variance regression score function
exp_var_score = sklearn.metrics.explained_variance_score(y_test, y_pred)
with open('../trained_networks/nn_%d_data.pkl' % n_features, 'wb') as results:
pickle.dump(clf, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(net, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(mean_abs, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(mean_sq, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(median_abs, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(r2, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(exp_var_score, results, pickle.HIGHEST_PROTOCOL)
pickle.dump(y_pred, results, pickle.HIGHEST_PROTOCOL)
return
示例7: test_randomgridsearch_slm
def test_randomgridsearch_slm(make_gaus_data):
X, y, Xs, ys = make_gaus_data
slm = StandardLinearModel(LinearBasis(onescol=True))
param_dict = {
'var': [Parameter(1.0 / v, Positive()) for v in range(1, 6)]
}
estimator = RandomizedSearchCV(slm, param_dict, n_jobs=-1, n_iter=2)
estimator.fit(X, y)
Ey = estimator.predict(Xs)
assert len(ys) == len(Ey) # we just want to make sure this all runs
示例8: test_randomgridsearch_glm
def test_randomgridsearch_glm(make_gaus_data):
X, y, Xs, ys = make_gaus_data
glm = GeneralizedLinearModel(Gaussian(), LinearBasis(onescol=True),
random_state=1, maxiter=100)
param_dict = {'batch_size': range(1, 11)}
estimator = RandomizedSearchCV(glm, param_dict, verbose=1, n_jobs=-1,
n_iter=2)
estimator.fit(X, y)
Ey = estimator.predict(Xs)
assert len(ys) == len(Ey) # we just want to make sure this all runs
示例9: test_pickle
def test_pickle():
# Test that a fit search can be pickled
clf = MockClassifier()
grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, refit=True)
grid_search.fit(X, y)
grid_search_pickled = pickle.loads(pickle.dumps(grid_search))
assert_array_almost_equal(grid_search.predict(X),
grid_search_pickled.predict(X))
random_search = RandomizedSearchCV(clf, {'foo_param': [1, 2, 3]},
refit=True, n_iter=3)
random_search.fit(X, y)
random_search_pickled = pickle.loads(pickle.dumps(random_search))
assert_array_almost_equal(random_search.predict(X),
random_search_pickled.predict(X))
示例10: test__extract_arfftrace
def test__extract_arfftrace(self):
param_grid = {"max_depth": [3, None],
"max_features": [1, 2, 3, 4],
"bootstrap": [True, False],
"criterion": ["gini", "entropy"]}
num_iters = 10
task = openml.tasks.get_task(20)
clf = RandomizedSearchCV(RandomForestClassifier(), param_grid, num_iters)
# just run the task
train, _ = task.get_train_test_split_indices(0, 0)
X, y = task.get_X_and_y()
clf.fit(X[train], y[train])
trace_attribute_list = _extract_arfftrace_attributes(clf)
trace_list = _extract_arfftrace(clf, 0, 0)
self.assertIsInstance(trace_attribute_list, list)
self.assertEquals(len(trace_attribute_list), 5 + len(param_grid))
self.assertIsInstance(trace_list, list)
self.assertEquals(len(trace_list), num_iters)
# found parameters
optimized_params = set()
for att_idx in range(len(trace_attribute_list)):
att_type = trace_attribute_list[att_idx][1]
att_name = trace_attribute_list[att_idx][0]
if att_name.startswith("parameter_"):
# add this to the found parameters
param_name = att_name[len("parameter_"):]
optimized_params.add(param_name)
for line_idx in range(len(trace_list)):
val = json.loads(trace_list[line_idx][att_idx])
legal_values = param_grid[param_name]
self.assertIn(val, legal_values)
else:
# repeat, fold, itt, bool
for line_idx in range(len(trace_list)):
val = trace_list[line_idx][att_idx]
if isinstance(att_type, list):
self.assertIn(val, att_type)
elif att_name in ['repeat', 'fold', 'iteration']:
self.assertIsInstance(trace_list[line_idx][att_idx], int)
else: # att_type = real
self.assertIsInstance(trace_list[line_idx][att_idx], float)
self.assertEqual(set(param_grid.keys()), optimized_params)
示例11: test_large_grid
def test_large_grid():
"""In this test, we purposely overfit a RandomForest to completely random data
in order to assert that the test error will far supercede the train error.
"""
if not SK18:
custom_cv = KFold(n=y_train.shape[0], n_folds=3, shuffle=True, random_state=42)
else:
custom_cv = KFold(n_splits=3, shuffle=True, random_state=42)
# define the pipe
pipe = Pipeline([
('scaler', SelectiveScaler()),
('pca', SelectivePCA(weight=True)),
('rf', RandomForestClassifier(random_state=42))
])
# define hyper parameters
hp = {
'scaler__scaler': [StandardScaler(), RobustScaler(), MinMaxScaler()],
'pca__whiten': [True, False],
'pca__weight': [True, False],
'pca__n_components': uniform(0.75, 0.15),
'rf__n_estimators': randint(5, 10),
'rf__max_depth': randint(5, 15)
}
# define the grid
grid = RandomizedSearchCV(pipe, hp, n_iter=2, scoring='accuracy', n_jobs=1, cv=custom_cv, random_state=42)
# this will fail because we haven't fit yet
assert_fails(grid.score, (ValueError, AttributeError), X_train, y_train)
# fit the grid
grid.fit(X_train, y_train)
# score for coverage -- this might warn...
with warnings.catch_warnings():
warnings.simplefilter("ignore")
grid.score(X_train, y_train)
# coverage:
assert grid._estimator_type == 'classifier'
# get predictions
tr_pred, te_pred = grid.predict(X_train), grid.predict(X_test)
# evaluate score (SHOULD be better than random...)
accuracy_score(y_train, tr_pred), accuracy_score(y_test, te_pred)
# grid score reports:
# assert fails for bad percentile
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 0.0})
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'percentile': 1.0})
# assert fails for bad y_axis
assert_fails(report_grid_score_detail, ValueError, **{'random_search': grid, 'y_axis': 'bad_axis'})
# assert passes otherwise
report_grid_score_detail(grid, charts=True, percentile=0.95) # just ensure percentile works
示例12: fit
def fit(x, y, estimator, dataframe, params):
vectorizer = CountVectorizer(stop_words=['go', '', ' '], binary=False, lowercase=True)
vectorizer.fit(dataframe[x].values)
fresh_estimator = clone(estimator)
x_np, y_np, feature_names, selector = \
select_features(
df = dataframe,
vectorizer=vectorizer,
feature_col=x,
label_col=y,
select_method=None,
continuous_col=None
)
estimator = RandomizedSearchCV(estimator, params, n_iter=60, cv=3, n_jobs=3, refit=True)
estimator.fit(x_np, y_np)
best_params = estimator.best_params_
if method not in ['lr', 'svm']:
print("Calibrating...")
estimator = CalibratedClassifierCV(fresh_estimator.set_params(**best_params), 'isotonic', 3)
estimator.fit(x_np, y_np)
from sklearn.base import _pprint
_pprint(estimator.get_params(deep=True), offset=2)
return estimator, selector, vectorizer
示例13: model_param_search
def model_param_search(estimator, X, y, param_dist, scoring,
n_iter=1, n_cv=5, verbose=10, random_state=1, model_id='model', save_search=True):
start = time.time()
random_search = RandomizedSearchCV(estimator, param_distributions=param_dist,
n_iter=n_iter, scoring=scoring, cv=n_cv,
verbose=verbose, random_state=random_state)
random_search.fit(X, y)
print('Best param: ', random_search.best_params_)
print('Best score: ', random_search.best_score_)
print('Best model: ', random_search.best_estimator_)
if save_search:
with open(model_id+'.pickle', 'wb') as f:
pickle.dump(random_search, f)
print('Time searching param for {}: {}'.format(
model_id, (time.time() - start) / 60))
return random_search.best_estimator_
示例14: test_grid_search_with_multioutput_data
def test_grid_search_with_multioutput_data():
# Test search with multi-output estimator
X, y = make_multilabel_classification(return_indicator=True,
random_state=0)
est_parameters = {"max_depth": [1, 2, 3, 4]}
cv = KFold(random_state=0)
estimators = [DecisionTreeRegressor(random_state=0),
DecisionTreeClassifier(random_state=0)]
# Test with grid search cv
for est in estimators:
grid_search = GridSearchCV(est, est_parameters, cv=cv)
grid_search.fit(X, y)
res_params = grid_search.cv_results_['params']
for cand_i in range(len(res_params)):
est.set_params(**res_params[cand_i])
for i, (train, test) in enumerate(cv.split(X, y)):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(
correct_score,
grid_search.cv_results_['split%d_test_score' % i][cand_i])
# Test with a randomized search
for est in estimators:
random_search = RandomizedSearchCV(est, est_parameters,
cv=cv, n_iter=3)
random_search.fit(X, y)
res_params = random_search.cv_results_['params']
for cand_i in range(len(res_params)):
est.set_params(**res_params[cand_i])
for i, (train, test) in enumerate(cv.split(X, y)):
est.fit(X[train], y[train])
correct_score = est.score(X[test], y[test])
assert_almost_equal(
correct_score,
random_search.cv_results_['split%d_test_score'
% i][cand_i])
示例15: train_classifier
def train_classifier(self, trainvectors, labels, c='1.0', kernel='linear', gamma='0.1', degree='1', class_weight='balanced', jobs=1, iterations=10, scoring='f1_micro', v=2):
if len(list(set(labels))) > 2: # more than two classes to distinguish
parameters = ['estimator__C', 'estimator__kernel', 'estimator__gamma', 'estimator__degree']
multi = True
else: # only two classes to distinguish
parameters = ['C', 'kernel', 'gamma', 'degree']
multi = False
if len(class_weight.split(':')) > 1: # dictionary
class_weight = dict([label_weight.split(':') for label_weight in class_weight.split()])
c_values = [0.001, 0.005, 0.01, 0.5, 1, 5, 10, 50, 100, 500, 1000] if c == 'search' else [float(x) for x in c.split()]
kernel_values = ['linear', 'rbf', 'poly'] if kernel == 'search' else [k for k in kernel.split()]
gamma_values = [0.0005, 0.002, 0.008, 0.032, 0.128, 0.512, 1.024, 2.048] if gamma == 'search' else [float(x) for x in gamma.split()]
degree_values = [1, 2, 3, 4] if degree == 'search' else [int(x) for x in degree.split()]
grid_values = [c_values, kernel_values, gamma_values, degree_values]
if not False in [len(x) == 1 for x in grid_values]: # only sinle parameter settings
settings = {}
for i, parameter in enumerate(parameters):
settings[parameter] = grid_values[i][0]
else:
param_grid = {}
for i, parameter in enumerate(parameters):
param_grid[parameter] = grid_values[i]
model = svm.SVC(probability=True)
if multi:
model = OutputCodeClassifier(model)
trainvectors = trainvectors.todense()
paramsearch = RandomizedSearchCV(model, param_grid, cv = 5, scoring=scoring, verbose = v, n_iter = iterations, n_jobs = jobs, pre_dispatch = 4)
paramsearch.fit(trainvectors, labels)
settings = paramsearch.best_params_
# train an SVC classifier with the settings that led to the best performance
self.model = svm.SVC(
probability = True,
C = settings[parameters[0]],
kernel = settings[parameters[1]],
gamma = settings[parameters[2]],
degree = settings[parameters[3]],
class_weight = class_weight,
cache_size = 1000,
verbose = v
)
self.model.fit(trainvectors, labels)