本文整理汇总了Python中sklearn.model_selection.GridSearchCV.predict方法的典型用法代码示例。如果您正苦于以下问题:Python GridSearchCV.predict方法的具体用法?Python GridSearchCV.predict怎么用?Python GridSearchCV.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.model_selection.GridSearchCV
的用法示例。
在下文中一共展示了GridSearchCV.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_grid_search_sparse_scoring
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def test_grid_search_sparse_scoring():
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator_.C
X_ = sp.csr_matrix(X_)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring="f1")
cv.fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator_.C
assert_array_equal(y_pred, y_pred2)
assert_equal(C, C2)
# Smoke test the score
# np.testing.assert_allclose(f1_score(cv.predict(X_[:180]), y[:180]),
# cv.score(X_[:180], y[:180]))
# test loss where greater is worse
def f1_loss(y_true_, y_pred_):
return -f1_score(y_true_, y_pred_)
F1Loss = make_scorer(f1_loss, greater_is_better=False)
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, scoring=F1Loss)
cv.fit(X_[:180], y_[:180])
y_pred3 = cv.predict(X_[180:])
C3 = cv.best_estimator_.C
assert_equal(C, C3)
assert_array_equal(y_pred, y_pred3)
示例2: model_select_rdf
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_rdf(dataset, cv=3, n_jobs=6):
X_train, X_test, y_train, y_test = dataset
w1 = sum(y_train)/len(y_train)
w0 = 1 - w1
sample_weight = np.array([w0 if x==0 else w1 for x in y_train])
# Set the parameters by cross-validation
params = dict(
max_depth=[5, 20, None],
n_estimators=[10, 30, 100],
class_weight=['balanced_subsample', 'balanced'],
# sample_weight=[sample_weight]
max_features=[50, 300, None, 'auto'],
min_samples_leaf=[1, 3]
)
scores = [
# 'recall',
'f1',
# 'precision',
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
RandomForestClassifier(),
param_grid=params, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=n_jobs, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=cv, # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
return clf
示例3: model_select_svc2
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_svc2(dataset, cv=3, n_jobs=6):
# Parameter grid es subconjunto de la de
# model_select_svc, con kernel y gamma limitados
# a los valores que siempre funcionaban mejor
X_train, X_test, y_train, y_test = dataset
# Set the parameters by cross-validation
parameters = [
{
'kernel': ['rbf'],
'gamma': [0.1],
'C': [0.01, 0.1, 1],
'class_weight': ['balanced', None]
}
]
scores = [
# 'precision',
# 'recall',
'f1'
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
SVC(),
param_grid=parameters, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=n_jobs, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=cv, # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
return clf
示例4: model_select_sgd
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_sgd(dataset, cv=3, n_jobs=6):
X_train, X_test, y_train, y_test = dataset
# Set the parameters by cross-validation
parameters = [
{
'alpha': (0.01, 0.001, 0.00001),
'penalty': ('l1', 'l2', 'elasticnet'),
'loss': ('hinge', 'log'),
'n_iter': (10, 50, 80),
}
]
scores = [
# 'precision',
'recall',
# 'f1'
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
SGDClassifier(),
param_grid=parameters, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=n_jobs, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=cv, # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
return clf
示例5: model_select_dtree
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_dtree(dataset):
# Split the dataset in two equal parts
X_train, X_test, y_train, y_test = dataset
# Set the parameters by cross-validation
params = {'max_depth':[5,10, None],
'min_samples_split':[2,8,32],
'min_samples_leaf':[1,5,10],
# 'compute_importances':[True],
# 'max_features': [25, 50, 75, 100, 150]
# 'max_features': [5, 10, 15]
}
scores = [
# 'precision',
'recall',
# 'f1'
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
DecisionTreeClassifier(),
param_grid=params, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=-1, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=StratifiedKFold(y_train, n_folds=3), # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
示例6: model_select_svc
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_svc(dataset):
X_train, X_test, y_train, y_test = dataset
# Set the parameters by cross-validation
parameters = [
{
'kernel': ['rbf', 'poly', 'sigmoid'],
'gamma': [10, 100, 150],
'C': [0.05, 0.1, 1]
},
]
scores = [
# 'precision',
'recall',
# 'f1'
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
SVC(),
param_grid=parameters, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=-1, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=StratifiedKFold(y_train, n_folds=3), # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
示例7: model_select_rdf
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def model_select_rdf(dataset):
X_train, X_test, y_train, y_test = dataset
# Set the parameters by cross-validation
params = dict(
max_depth=np.linspace(5,10,5),
n_estimators=[3, 4, 5, 10],
# max_features=[25, 50, 75, 100, 150]
# max_features = [5, 10, 15]
)
scores = [
# 'precision',
'recall',
'f1'
]
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(
RandomForestClassifier(),
param_grid=params, # parameters to tune via cross validation
refit=True, # fit using all data, on the best detected classifier
n_jobs=-1, # number of cores to use for parallelization; -1 for "all cores"
scoring=score, # what score are we optimizing?
cv=StratifiedKFold(y_train, n_folds=3), # what type of cross validation to use
)
clf.fit(X_train, y_train)
print("Best parameters set found on training set:")
print()
print(clf.best_params_)
print("Detailed classification report:")
print()
print("Scores on training set.")
y_true, y_pred = y_train, clf.predict(X_train)
print(classification_report(y_true, y_pred))
print()
print("Scores on test set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
示例8: optimize_model_regress
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def optimize_model_regress(data, tc):
train_data = data.sample(frac=.8)
test_data = data.drop(train_data.index)
train_y = train_data['temperature']/tc
train_X = train_data.drop(['T/Tc','temperature'], axis=1)
test_y = test_data['temperature']/tc
test_X = test_data.drop(['T/Tc','temperature'], axis=1)
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1,.5,.1,1e-2,1e-3, 1e-4],
'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]},
{'kernel': ['linear'], 'C': [.1,.5, 1,5, 10, 50, 100,500, 1000]}]
model = GridSearchCV(svm.SVR(), tuned_parameters, cv=5)
model.fit(train_X, train_y)
print()
print("Best parameters:")
print()
print(model.best_params_)
print()
print("Grid scores:")
print()
means = model.cv_results_['mean_test_score']
stds = model.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, model.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
y_true, y_pred = test_y, model.predict(test_X)
print("Mean Absolute Error : " + str(mean_absolute_error(y_pred,y_true)))
print()
示例9: test_grid_search_precomputed_kernel
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def test_grid_search_precomputed_kernel():
# Test that grid search works when the input features are given in the
# form of a precomputed kernel matrix
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
# compute the training kernel matrix corresponding to the linear kernel
K_train = np.dot(X_[:180], X_[:180].T)
y_train = y_[:180]
clf = SVC(kernel='precomputed')
cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
cv.fit(K_train, y_train)
assert_true(cv.best_score_ >= 0)
# compute the test kernel matrix
K_test = np.dot(X_[180:], X_[:180].T)
y_test = y_[180:]
y_pred = cv.predict(K_test)
assert_true(np.mean(y_pred == y_test) >= 0)
# test error is raised when the precomputed kernel is not array-like
# or sparse
assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)
示例10: main
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def main():
# 1 查看训练集和测试集的数据特征
train_data = pandas.read_csv('data/train.csv')
test_data = pandas.read_csv('data/test.csv')
print(train_data.info())
print(test_data.info())
# 2 人工选取预测有效的特征
selected_features = ['Pclass', 'Sex', 'Age', 'Embarked', 'SibSp', 'Parch', 'Fare']
x_train = train_data[selected_features]
x_test = test_data[selected_features]
y_train = train_data['Survived']
# 3 补充缺失值
# 得知Embared特征惨在缺失值,需要补完
print(x_train['Embarked'].value_counts())
print(x_test['Embarked'].value_counts())
# 对于类别型特征,使用出现频率最高的特征来填充,可以作为减少引入误差的方法之一
x_train['Embarked'].fillna('S', inplace=True)
x_test['Embarked'].fillna('S', inplace=True)
x_train['Age'].fillna(x_train['Age'].mean(), inplace=True)
x_test['Age'].fillna(x_test['Age'].mean(), inplace=True)
x_test['Fare'].fillna(x_test['Fare'].mean(), inplace=True)
print(x_train.info())
print(x_test.info())
# 4 采用DictVectorizer对特征向量化
dict_vectorizer = DictVectorizer(sparse=False)
x_train = dict_vectorizer.fit_transform(x_train.to_dict(orient='record'))
print(dict_vectorizer.feature_names_)
x_test = dict_vectorizer.transform(x_test.to_dict(orient='record'))
# 5 训练模型
forest_classifier = RandomForestClassifier()
xgb_classifier = XGBClassifier()
# 使用5折交叉验证的方式进行性能评估
forest_mean_score = cross_val_score(forest_classifier, x_train, y_train, cv=5).mean()
print(forest_mean_score)
xgb_mean_score = cross_val_score(xgb_classifier, x_train, y_train, cv=5).mean()
print(xgb_mean_score)
# 6 使用并行网格搜索的方式选择更好的超参组合
params = {
'max_depth': range(2, 8), 'n_estimators': range(100, 1200, 200),
'learning_rate': [0.05, 0.1, 0.25, 0.5, 1.0]
}
xgbc_best = XGBClassifier()
grid_search_cv = GridSearchCV(xgbc_best, params, n_jobs=-1, cv=5)
grid_search_cv.fit(x_train, y_train)
print(grid_search_cv.best_score_)
print(grid_search_cv.best_params_)
# 7 预测结果并写入文件
predict_result = grid_search_cv.predict(x_test)
submission_data = pandas.DataFrame({'PassengerId': test_data['PassengerId'], 'Survived': predict_result})
submission_data.to_csv('data/submission/titanic_submission.csv', index=False)
示例11: kernel_ridge_linear
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def kernel_ridge_linear(X,Y,x,y):
reg = GridSearchCV(KernelRidge(kernel='linear'), cv=10,param_grid={"alpha": [1e0,0.1,1e-2,1e-3],"degree":[1,2,3,4] })
reg.fit(X, Y)
y_predict = reg.predict(x)
rmse = RMSE(y=y, y_predict=y_predict)
print "rmse: ", str(rmse)
return y_predict
示例12: svr_linear
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def svr_linear(X,Y,x,y):
reg = GridSearchCV(SVR(kernel='linear'), cv=10,param_grid={"C":[1e0, 1e1, 1e2, 1e3], "degree":[1,2,3,4]})
reg.fit(X, Y)
y_predict = reg.predict(x)
rmse = RMSE(y=y, y_predict=y_predict)
print "rmse: ", str(rmse)
return rmse, y_predict
示例13: gridsearch_launch
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def gridsearch_launch(gridSamples, gridTarget, nb_clusters):
# print("SAMPLES")
# # print(gridSamples);
# print(gridTarget);
#
# print(len(gridTarget))
# print(len(gridSamples))
X = gridSamples.reshape((len(gridSamples), -1))
y = gridTarget
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=0)
# tuned_parameters = [
# {'init': ['k-means++'], 'n_clusters': [3, 4, 5, 6, 7], 'n_init': [5, 10, 20, 30, 40], 'algorithm': ['auto', 'full', 'elkan']},
# {'init': ['random'], 'n_clusters': [3, 4, 5, 6, 7], 'n_init': [5, 10, 20, 30, 40], 'algorithm': ['auto', 'full', 'elkan']},
# ]
tuned_parameters = [
{'init': ['k-means++'], 'n_init': [5, 10, 20, 30, 40], 'algorithm': ['auto', 'full', 'elkan']},
{'init': ['random'], 'n_init': [5, 10, 20, 30, 40], 'algorithm': ['auto', 'full', 'elkan']},
]
scores = ['precision', 'recall']
for score in scores:
print("# Tuning hyper-parameters for %s" % score)
print()
clf = GridSearchCV(cluster.KMeans(n_clusters=nb_clusters), tuned_parameters, cv=5,
scoring='%s_macro' % score)
clf.fit(X_train, y_train)
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
print("%0.3f (+/-%0.03f) for %r"
% (mean, std * 2, params))
print()
print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print()
示例14: test_grid_search_sparse
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def test_grid_search_sparse():
# Test that grid search works with both dense and sparse matrices
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator_.C
X_ = sp.csr_matrix(X_)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
cv.fit(X_[:180].tocoo(), y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator_.C
assert_true(np.mean(y_pred == y_pred2) >= .9)
assert_equal(C, C2)
示例15: BDT
# 需要导入模块: from sklearn.model_selection import GridSearchCV [as 别名]
# 或者: from sklearn.model_selection.GridSearchCV import predict [as 别名]
def BDT(X,Y,x,y):
#r = GridSearchCV(DecisionTreeRegressor(), cv=10, param_grid={"max_depth": [ 5, 6, 7, 8, 9, 10, 15, 20]})
reg = GridSearchCV(en.AdaBoostRegressor(base_estimator=DecisionTreeRegressor()), cv=10, param_grid={"n_estimators":[100],"learning_rate":[0.01],"loss":['exponential']})
#reg = en.AdaBoostRegressor(base_estimator=r, n_estimators=50, learning_rate=0.01, loss='exponential')
reg.fit(X, Y)
y_predict = reg.predict(x)
y_predict = abs(y_predict)
rmse = RMSE(y=y, y_predict=y_predict)
print "rmse: ", str(rmse)
return y_predict