本文整理汇总了Python中sklearn.model_selection.GridSearchCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GridSearchCV.fit方法的具体用法?Python GridSearchCV.fit怎么用?Python GridSearchCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.GridSearchCV
的用法示例。
在下文中一共展示了GridSearchCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def build(X, y=None):
"""
Inner build function that builds a single model.
:param X:
:param y:
:return:
"""
model = Pipeline([
('vectorizer', TfidfVectorizer(
tokenizer=self.spacy_tokenizer, preprocessor=None, lowercase=False)),
('clf', SVC(C=1,kernel="linear",
probability=True,
class_weight='balanced'))])
from sklearn.model_selection import GridSearchCV
items,counts= np.unique(y, return_counts=True)
cv_splits = max(2, min(5, np.min(counts) // 5))
Cs = [0.01,0.25,1, 2, 5, 10, 20, 100]
param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
grid_search = GridSearchCV(model,
param_grid=param_grid,
scoring='f1_weighted',
cv=cv_splits,
verbose=2,
n_jobs=-1
)
grid_search.fit(X, y)
return grid_search
示例2: build_and_train
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def build_and_train():
data = pd.read_csv('../data/training.csv')
data = data.dropna(subset=['Gender', 'Married', 'Credit_History', 'LoanAmount'])
pred_var = ['Gender','Married','Dependents','Education','Self_Employed','ApplicantIncome','CoapplicantIncome',\
'LoanAmount','Loan_Amount_Term','Credit_History','Property_Area']
X_train, X_test, y_train, y_test = train_test_split(data[pred_var], data['Loan_Status'], \
test_size=0.25, random_state=42)
y_train = y_train.replace({'Y':1, 'N':0}).as_matrix()
y_test = y_test.replace({'Y':1, 'N':0}).as_matrix()
pipe = make_pipeline(PreProcessing(),
RandomForestClassifier())
param_grid = {"randomforestclassifier__n_estimators" : [10, 20, 30],
"randomforestclassifier__max_depth" : [None, 6, 8, 10],
"randomforestclassifier__max_leaf_nodes": [None, 5, 10, 20],
"randomforestclassifier__min_impurity_split": [0.1, 0.2, 0.3]}
grid = GridSearchCV(pipe, param_grid=param_grid, cv=3)
grid.fit(X_train, y_train)
return(grid)
示例3: optimize_model_regress
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def optimize_model_regress(data, tc):
train_data = data.sample(frac=.8)
test_data = data.drop(train_data.index)
train_y = train_data['temperature']/tc
train_X = train_data.drop(['T/Tc','temperature'], axis=1)
test_y = test_data['temperature']/tc
test_X = test_data.drop(['T/Tc','temperature'], axis=1)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1,.5,.1,1e-2,1e-3, 1e-4],
'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]},
{'kernel': ['linear'], 'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]}]
model = GridSearchCV(svm.SVR(), tuned_parameters, cv=5)
model.fit(train_X, train_y)
print()
print("Best parameters:")
print()
print(model.best_params_)
print()
print("Grid scores:")
print()
means = model.cv_results_['mean_test_score']
stds = model.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, model.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
y_true, y_pred = test_y, model.predict(test_X)
print("Mean Absolute Error : " + str(mean_absolute_error(y_pred,y_true)))
print()
示例4: _search_param
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def _search_param(self, metric, X, y):
'''
Find best potential parameters set using few n_estimators
'''
# Make sure user specified params are in the grid.
max_depth_grid = list(np.unique([self.model_instance.max_depth, 5, 7]))
colsample_bytree_grid = list(
np.unique([self.model_instance.colsample_bytree, 0.66, 0.9]))
reg_lambda_grid = list(np.unique([self.model_instance.reg_lambda, 1, 5]))
param_grid = {
'max_depth': max_depth_grid,
'learning_rate': [max(self.model_instance.learning_rate, 0.3)],
'n_estimators': [min(self.model_instance.n_estimators, 60)],
'gamma': [self.model_instance.gamma],
'min_child_weight': [self.model_instance.min_child_weight],
'max_delta_step': [self.model_instance.max_delta_step],
'subsample': [self.model_instance.subsample],
'colsample_bytree': colsample_bytree_grid,
'colsample_bylevel': [self.model_instance.colsample_bylevel],
'reg_alpha': [self.model_instance.reg_alpha],
'reg_lambda': reg_lambda_grid,
'scale_pos_weight': [self.model_instance.scale_pos_weight],
'base_score': [self.model_instance.base_score],
'seed': [self.model_instance.seed]
}
grid_search = GridSearchCV(
self.model_instance, param_grid, cv=2, refit=False, scoring=metric)
grid_search.fit(X, y)
best_params = grid_search.best_params_
# Change params back original params
best_params['learning_rate'] = self.model_instance.learning_rate
best_params['n_estimators'] = self.model_instance.n_estimators
return best_params
示例5: tweak_hyperparameters
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def tweak_hyperparameters(self, type_model):
self.load_corpus()
if type_model == "SVM":
y = self.gender
model = svm.SVC()
param_grid = [
{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
{'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
]
clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
clf.fit(self.X, y)
# Best parameter set
print('Best parameters found:\n', clf.best_params_)
else:
y = self.sentiment
model = MLPClassifier(max_iter=100)
param_grid = {
'hidden_layer_sizes': [(50, 50, 50), (50, 100, 50), (100,)],
'activation': ['tanh', 'relu'],
'solver': ['sgd', 'adam'],
'alpha': [0.0001, 0.05],
'learning_rate': ['constant', 'adaptive'],
}
clf = GridSearchCV(model, param_grid, n_jobs=-1, cv=3)
clf.fit(self.X, y)
# Best parameter set
print('Best parameters found:\n', clf.best_params_)
示例6: inner_cv_loop
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def inner_cv_loop(Xtrain,Ytrain,clf,parameters,
oversample=None,fa_dims=20,
verbose=False):
"""
use GridSearchCV to find best classifier for training set
"""
rocscore={}
best_est={}
facanal={}
for fa_d in [0,fa_dims]:
clfname='fa' if fa_d>0 else "nofa"
if fa_d>0:
facanal[clfname]=FactorAnalysis(fa_d)
Xtrain=facanal[clfname].fit_transform(Xtrain)
else:
facanal[clfname]=None
if verbose:
print(clfname)
gs=GridSearchCV(clf,parameters,scoring='roc_auc')
gs.fit(Xtrain,Ytrain)
rocscore[clfname]=gs.best_score_
best_est[clfname]=gs.best_estimator_
bestscore=numpy.max([rocscore[i] for i in rocscore.keys()])
bestclf=[i for i in rocscore.keys() if rocscore[i]==bestscore][0]
if verbose:
print('best:',bestclf,bestscore,best_est[bestclf],facanal[bestclf])
return best_est[bestclf],bestscore,facanal[bestclf]
示例7: plot_cross_val_selection
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def plot_cross_val_selection():
iris = load_iris()
X_trainval, X_test, y_trainval, y_test = train_test_split(iris.data,
iris.target,
random_state=0)
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100],
'gamma': [0.001, 0.01, 0.1, 1, 10, 100]}
grid_search = GridSearchCV(SVC(), param_grid, cv=5)
grid_search.fit(X_trainval, y_trainval)
scores = grid_search.grid_scores_[15:]
best = np.argmax([x.mean_validation_score for x in scores])
plt.figure(figsize=(10, 3))
plt.xlim(-1, len(scores))
plt.ylim(0, 1.1)
for i, score in enumerate(scores):
marker_cv, = plt.plot([i] * 5, score.cv_validation_scores, '^', c='gray', markersize=5, alpha=.5)
marker_mean, = plt.plot(i, score.mean_validation_score, 'v', c='none', alpha=1, markersize=10)
if i == best:
marker_best, = plt.plot(i, score.mean_validation_score, 'o', c='red', fillstyle="none", alpha=1, markersize=20, markeredgewidth=3)
plt.xticks(range(len(scores)), [str(score.parameters).strip("{}").replace("'", "") for score in scores], rotation=90);
plt.ylabel("validation accuracy")
plt.xlabel("parameter settings")
plt.legend([marker_cv, marker_mean, marker_best], ["cv accuracy", "mean accuracy", "best parameter setting"], loc=(1.05, .4))
示例8: train_sgd
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def train_sgd(data, result, scoring=None):
print("train SGDClassifier {}".format(len(data)))
#scaler = None
#scaler = preprocessing.MinMaxScaler()
print("Scale: {}".format(type(scaler)))
if scaler != None:
data = scaler.fit_transform(data)
#classifier = SGDClassifier(loss="hinge", penalty="l2")
#classifier.fit(data, result)
#return scaler, classifier
parameters = {
'loss': ('hinge', 'log', 'modified_huber', 'squared_hinge', 'perceptron',
'squared_loss', 'huber', 'epsilon_insensitive',
'squared_epsilon_insensitive'),
'penalty': ('none', 'l2', 'l1', 'elasticnet')
}
print(parameters)
search = GridSearchCV(SGDClassifier(), parameters, scoring=scoring, n_jobs=1)
search.fit(data, result)
print("best params: {}".format(search.best_params_))
print("best score: {}".format(search.best_score_))
print
return scaler, search.best_estimator_.fit(data,result)
示例9: svr_linear
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def svr_linear(X,Y,x,y):
reg = GridSearchCV(SVR(kernel='linear'), cv=10,param_grid={"C":[1e0, 1e1, 1e2, 1e3], "degree":[1,2,3,4]})
reg.fit(X, Y)
y_predict = reg.predict(x)
rmse = RMSE(y=y, y_predict=y_predict)
print "rmse: ", str(rmse)
return rmse, y_predict
示例10: main
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def main():
# 1 查看训练集和测试集的数据特征
train_data = pandas.read_csv('data/train.csv')
test_data = pandas.read_csv('data/test.csv')
print(train_data.info())
print(test_data.info())
# 2 人工选取预测有效的特征
selected_features = ['Pclass', 'Sex', 'Age', 'Embarked', 'SibSp', 'Parch', 'Fare']
x_train = train_data[selected_features]
x_test = test_data[selected_features]
y_train = train_data['Survived']
# 3 补充缺失值
# 得知Embared特征惨在缺失值,需要补完
print(x_train['Embarked'].value_counts())
print(x_test['Embarked'].value_counts())
# 对于类别型特征,使用出现频率最高的特征来填充,可以作为减少引入误差的方法之一
x_train['Embarked'].fillna('S', inplace=True)
x_test['Embarked'].fillna('S', inplace=True)
x_train['Age'].fillna(x_train['Age'].mean(), inplace=True)
x_test['Age'].fillna(x_test['Age'].mean(), inplace=True)
x_test['Fare'].fillna(x_test['Fare'].mean(), inplace=True)
print(x_train.info())
print(x_test.info())
# 4 采用DictVectorizer对特征向量化
dict_vectorizer = DictVectorizer(sparse=False)
x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
print(dict_vectorizer.feature_names_)
x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))
# 5 训练模型
forest_classifier = RandomForestClassifier()
xgb_classifier = XGBClassifier()
# 使用5折交叉验证的方式进行性能评估
forest_mean_score = cross_val_score(forest_classifier, x_train, y_train, cv=5).mean()
print(forest_mean_score)
xgb_mean_score = cross_val_score(xgb_classifier, x_train, y_train, cv=5).mean()
print(xgb_mean_score)
# 6 使用并行网格搜索的方式选择更好的超参组合
params = {
'max_depth': range(2, 8), 'n_estimators': range(100, 1200, 200),
'learning_rate': [0.05, 0.1, 0.25, 0.5, 1.0]
}
xgbc_best = XGBClassifier()
grid_search_cv = GridSearchCV(xgbc_best, params, n_jobs=-1, cv=5)
grid_search_cv.fit(x_train, y_train)
print(grid_search_cv.best_score_)
print(grid_search_cv.best_params_)
# 7 预测结果并写入文件
predict_result = grid_search_cv.predict(x_test)
submission_data = pandas.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': predict_result})
submission_data.to_csv('data/submission/titanic_submission.csv', index=False)
示例11: PipeFeauture
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def PipeFeauture(Xtrain, Ytrain):
pipeline = Pipeline([
('vect', CountVectorizer()),
('tfidf', TfidfTransformer()),
('clf', SGDClassifier()),
])
# uncommenting more parameters will give better exploring power but will
# increase processing time in a combinatorial way
parameters = {
'vect__max_df': (0.5, 0.75, 1.0),
#'vect__max_features': (None, 5000, 10000, 50000),
'vect__ngram_range': ((1, 1), (1, 2)), # unigrams or bigrams
#'tfidf__use_idf': (True, False),
#'tfidf__norm': ('l1', 'l2'),
'clf__alpha': (0.00001, 0.000001),
'clf__penalty': ('l2', 'elasticnet'),
#'clf__n_iter': (10, 50, 80),
}
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1, verbose=1)
print("Performing grid search...")
print("pipeline:", [name for name, _ in pipeline.steps])
print("parameters:")
pprint(parameters)
t0 = time()
grid_search.fit(Xtrain, Ytrain)
print("done in %0.3fs" % (time() - t0))
print()
print("Best score: %0.3f" % grid_search.best_score_)
print("Best parameters set:")
best_parameters = grid_search.best_estimator_.get_params()
for param_name in sorted(parameters.keys()):
print("\t%s: %r" % (param_name, best_parameters[param_name]))
示例12: nearest_neighbors
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def nearest_neighbors(self):
neighbors_array = [11, 31, 201, 401, 601]
tuned_parameters = {"n_neighbors" : neighbors_array}
knn = KNeighborsClassifier()
clf = GridSearchCV(knn, tuned_parameters, cv=5, n_jobs= 5, scoring = "f1")
clf.fit(self.train_data_x, self.train_labels_y)
self.models.append(clf)
示例13: logistic_regression
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def logistic_regression(self):
C_array = [2**i for i in range(-10, 10)]
tuned_parameters = {'C' : C_array}
logi_reg = LogisticRegression()
clf = GridSearchCV(logi_reg, tuned_parameters, cv=5, scoring = "recall")# make_scorer(my_scorer))
clf.fit(self.train_data_x, self.train_labels_y)
self.models.append(clf)
示例14: tuner
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def tuner(clf, parameters, data):
from sklearn.model_selection import GridSearchCV
labels, features = targetFeatureSplit(data)
scaler = MinMaxScaler()
select = SelectKBest()
steps = [("scale", scaler),
("select", select),
("classifier", clf)]
pipeline = Pipeline(steps)
shuffle = StratifiedShuffleSplit(n_splits=1000, test_size=0.3,
random_state=42)
my_scorer = make_scorer(my_score_func)
scoring_metric = my_scorer
grid_searcher = GridSearchCV(pipeline, param_grid=parameters,
cv=shuffle, scoring=scoring_metric)
features = select.fit_transform(features, labels)
grid_searcher.fit(features, labels)
print("Cross-validated {0} score: {1}".format(scoring_metric,
grid_searcher.best_score_))
print("Params: ", grid_searcher.best_params_)
示例15: score_nestedCV
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import fit [as 别名]
def score_nestedCV(self, G1, model, param_grid, effect, nested):
k_fold = model_selection.KFold(n_splits=self.n_folds).split(range(self.Y.shape[0]))
i_fold=0
scores = sp.zeros(self.n_folds)
params = list()
for train, test in k_fold:
(trainData, trainY) = self._packData(G1, train, effect)
(testData, testY) = self._packData(G1, test, effect)
if nested:
clf = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs = self.n_jobs_grid,
cv=self.n_folds_params, scoring=self.scoring, verbose=self.verbose)
clf.fit(trainData, trainY.flatten())
params.append(clf.best_params_)
scores[i_fold] = clf.score(testData, testY.flatten(), method_scorer=False)
else:
model.fit(trainData, trainY.flatten())
scores[i_fold] = SCORERS[self.scoring](model, testData, testY.flatten())
i_fold+=1
return scores,params