本文整理汇总了Python中scikits.learn.grid_search.GridSearchCV类的典型用法代码示例。如果您正苦于以下问题:Python GridSearchCV类的具体用法?Python GridSearchCV怎么用?Python GridSearchCV使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GridSearchCV类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_grid_search
def test_grid_search():
"""Test that the best estimator contains the right value for foo_param"""
clf = MockClassifier()
cross_validation = GridSearchCV(clf, {'foo_param': [1, 2, 3]})
# make sure it selects the smallest parameter in case of ties
assert_equal(cross_validation.fit(X, y).best_estimator.foo_param, 2)
for i, foo_i in enumerate([1, 2, 3]):
assert cross_validation.grid_scores_[i][0] == {'foo_param' : foo_i}
示例2: do_grid_search
def do_grid_search(X,Y, gs_params):
""" Given data (X,Y) will perform a grid search on g_params
for a LogisticRegression called logreg
"""
lrpipe = Pipeline([
('logreg', LogisticRegression() )
])
gs = GridSearchCV( lrpipe, gs_params , n_jobs=-1)
#print gs
gs = gs.fit(X,Y)
best_parameters, score = max(gs.grid_scores_, key=lambda x: x[1])
logger.info("best_parameters: " +str( best_parameters ) )
logger.info("expected score: "+str( score ) )
return best_parameters
示例3: train_svm_crossvalidated
def train_svm_crossvalidated(X, y, tuned_parameters={'kernel': ['rbf'], 'gamma': 2.0**np.arange(-15,3), 'C': 2.0**np.arange(-5, 15)}):
"""
Performs grid search with stratified K-fold cross validation on observations X with
true labels y and returns an optimal SVM, clf
"""
k_fold = _size_dependent_k_split(np.size(X,0))
clf = GridSearchCV(SVC(C=1), tuned_parameters, score_func=recall_score)
clf.fit(X, y, cv=StratifiedKFold(y, k_fold))
y_true, y_pred = y, clf.predict(X)
#print "Classification report for the best estimator: "
#print clf.best_estimator
print "Tuned with optimal value: %0.3f" % recall_score(y_true, y_pred)
return clf
示例4: do_grid_search
def do_grid_search(X,Y, gs_params=None):
""" Given data (X,Y) will perform a grid search on g_params
for a LogisticRegression called logreg
"""
svpipe = Pipeline([
('rbfsvm', SVC() )
])
if not gs_params:
gs_params = {
'rbfsvm__C': (1.5, 2, 5, 10, 20),
'rbfsvm__gamma': (0.01, 0.1, 0.3, 0.6, 1, 1.5, 2, 5 ) ,
}
gs = GridSearchCV( svpipe, gs_params , n_jobs=-1)
#print gs
gs = gs.fit(X,Y)
best_parameters, score = max(gs.grid_scores_, key=lambda x: x[1])
logger.info("best_parameters: " +str( best_parameters ) )
logger.info("expected score: "+str( score ) )
return best_parameters
示例5: ParameterGridSearch
def ParameterGridSearch(self, callback = None, nValidation = 5):
'''
Grid search for the best C and gamma parameters for the RBF Kernel.
The efficiency of the parameters is evaluated using nValidation-fold
cross-validation of the training data.
As this process is time consuming and parallelizable, a number of
threads equal to the number of cores in the computer is used for the
calculations
'''
from scikits.learn.grid_search import GridSearchCV
from scikits.learn.metrics import precision_score
from scikits.learn.cross_val import StratifiedKFold
#
# XXX: program crashes with >1 worker when running cpa.py
# No crash when running from classifier.py. Why?
#
n_workers = 1
#try:
#from multiprocessing import cpu_count
#n_workers = cpu_count()
#except:
#n_workers = 1
# Define the parameter ranges for C and gamma and perform a grid search for the optimal setting
parameters = {'C': 2**np.arange(-5,11,2, dtype=float),
'gamma': 2**np.arange(3,-11,-2, dtype=float)}
clf = GridSearchCV(SVC(kernel='rbf'), parameters, n_jobs=n_workers, score_func=precision_score)
clf.fit(self.svm_train_values, self.svm_train_labels,
cv=StratifiedKFold(self.svm_train_labels, nValidation))
# Pick the best parameters as the ones with the maximum cross-validation rate
bestParameters = max(clf.grid_scores_, key=lambda a: a[1])
bestC = bestParameters[0]['C']
bestGamma = bestParameters[0]['gamma']
logging.info('Optimal values: C=%s g=%s rate=%s'%
(bestC, bestGamma, bestParameters[1]))
return bestC, bestGamma
示例6: test_dense_vectorizer_pipeline_grid_selection
def test_dense_vectorizer_pipeline_grid_selection():
# raw documents
data = JUNK_FOOD_DOCS + NOTJUNK_FOOD_DOCS
# simulate iterables
train_data = iter(data[1:-1])
test_data = iter([data[0], data[-1]])
# label junk food as -1, the others as +1
y = np.ones(len(data))
y[:6] = -1
y_train = y[1:-1]
y_test = np.array([y[0],y[-1]])
pipeline = Pipeline([('vect', CountVectorizer()),
('svc', DenseLinearSVC())])
parameters = {
'vect__analyzer': (WordNGramAnalyzer(min_n=1, max_n=1),
WordNGramAnalyzer(min_n=1, max_n=2)),
'svc__loss' : ('l1', 'l2')
}
# find the best parameters for both the feature extraction and the
# classifier
grid_search = GridSearchCV(pipeline, parameters, n_jobs=1)
# cross-validation doesn't work if the length of the data is not known,
# hence use lists instead of iterators
pred = grid_search.fit(list(train_data), y_train).predict(list(test_data))
assert_array_equal(pred, y_test)
# on this toy dataset bigram representation which is used in the last of the
# grid_search is considered the best estimator since they all converge to
# 100% accurracy models
assert_equal(grid_search.best_score, 1.0)
best_vectorizer = grid_search.best_estimator.named_steps['vect']
assert_equal(best_vectorizer.analyzer.max_n, 2)
示例7: test_grid_search_sparse_score_func
def test_grid_search_sparse_score_func():
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, seed=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator.C
X_ = sp.csr_matrix(X_)
clf = SparseLinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
cv.fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator.C
assert_array_equal(y_pred, y_pred2)
assert_equal(C, C2)
示例8: test_grid_search_sparse
def test_grid_search_sparse():
"""Test that grid search works with both dense and sparse matrices"""
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, seed=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C':[0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator.C
X_ = sp.csr_matrix(X_)
clf = SparseLinearSVC()
cv = GridSearchCV(clf, {'C':[0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator.C
assert np.mean(y_pred == y_pred2) >= .9
assert_equal(C, C2)
示例9: test_grid_search_sparse_score_func
def test_grid_search_sparse_score_func():
X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
# XXX: set refit to False due to a random bug when True (default)
cv.set_params(refit=False).fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator.C
X_ = sp.csr_matrix(X_)
clf = SparseLinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
# XXX: set refit to False due to a random bug when True (default)
cv.set_params(refit=False).fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator.C
assert_array_equal(y_pred, y_pred2)
assert_equal(C, C2)
示例10: Pipeline
# Data attributes
targets = [0, 1, 2]
target_names = ["covered", "no alternance", "uncovered"]
target_colors = "rgb"
# Classification settings
pipeline = Pipeline([
('extr', InfinitivesExtractor()),
('svc', LinearSVC(multi_class=True))
])
parameters = {
'extr__count': (True,False),
'extr__n': (3, 4, 5, 6),
'svc__C': (1e-1, 1e-2, 1e9)
}
grid_search = GridSearchCV(pipeline, parameters)
print "Loading data..."
X, y = load_data()
print "Searching for the best model..."
t0 = time()
grid_search.fit(X, y)
print "Done in %0.3f" % (time() - t0)
print "Best score: %0.3f" % grid_search.best_score
clf = grid_search.best_estimator
print clf
yp = clf.predict(X)
print classification_report(y, yp, targets, target_names)
#pl.figure()
#pl.title("Classification rate for 3-fold stratified CV")
示例11: iter
# split the dataset in two equal part respecting label proportions
train, test = iter(StratifiedKFold(y, 2)).next()
################################################################################
# Set the parameters by cross-validation
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-4],
'C': [1, 10, 100, 1000]},
{'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
scores = [
('precision', precision_score),
('recall', recall_score),
]
for score_name, score_func in scores:
clf = GridSearchCV(SVC(C=1), tuned_parameters, score_func=score_func)
clf.fit(X[train], y[train], cv=StratifiedKFold(y[train], 5))
y_true, y_pred = y[test], clf.predict(X[test])
print "Classification report for the best estimator: "
print clf.best_estimator
print "Tuned for '%s' with optimal value: %0.3f" % (
score_name, score_func(y_true, y_pred))
print classification_report(y_true, y_pred)
print "Grid scores:"
pprint(clf.grid_scores_)
print
# Note the problem is too easy: the hyperparameter plateau is too flat and the
# output model is the same for precision and recall with ties in quality
示例12: RandomizedPCA
n_components = 150
print "Extracting the top %d eigenfaces" % n_components
pca_sl = RandomizedPCA(n_components=n_components, whiten=True)
pca_sl.fit(X_train)
#components, mean = pca.pca(X_train, n_components)
#print "PCA components shape", pca.components_.T.shape
#eigenfaces = pca.components_.T.reshape((-1, 64, 64))
# project the input data on the eigenfaces orthonormal basis
X_train_pca = pca_sl.transform(X_train)
#X_train_pca = pca.transform(X_train, mean, components)
################################################################################
# Train a SVM classification model
print "Fitting the classifier to the training set"
param_grid = {
'C': [1, 5, 10, 50, 100],
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],
}
clf = GridSearchCV(SVC(kernel='rbf'), param_grid,
fit_params={'class_weight': 'auto'})
clf = clf.fit(X_train_pca, y_train)
print "Best estimator found by grid search:"
print clf.best_estimator
示例13: Pipeline
y_test = dataset.target[split:]
# Build a vectorizer / classifier pipeline using the previous analyzer
pipeline = Pipeline([
('vect', CountVectorizer(max_features=100000)),
('tfidf', TfidfTransformer()),
('clf', LinearSVC(C=1000)),
])
parameters = {
'vect__analyzer__max_n': (1, 2),
'vect__max_df': (.95,),
}
# Fit the pipeline on the training set using grid search for the parameters
grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1)
grid_search.fit(docs_train[:200], y_train[:200])
# Refit the best parameter set on the complete training set
clf = grid_search.best_estimator.fit(docs_train, y_train)
# Predict the outcome on the testing set
y_predicted = clf.predict(docs_test)
# Print the classification report
print metrics.classification_report(y_test, y_predicted,
class_names=dataset.target_names)
# Plot the confusion matrix
cm = metrics.confusion_matrix(y_test, y_predicted)
print cm
示例14: RandomizedPCA
print "Extracting the top %d eigenfaces" % n_components
pca = RandomizedPCA(n_components=n_components, whiten=True).fit(X_train)
eigenfaces = pca.components_.T.reshape((n_components, 64, 64))
# project the input data on the eigenfaces orthonormal basis
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
# Train a SVM classification model
print "Fitting the classifier to the training set"
param_grid = {"C": [1, 5, 10, 100], "gamma": [0.0001, 0.001, 0.01, 0.1]}
clf = GridSearchCV(SVC(kernel="rbf"), param_grid, fit_params={"class_weight": "auto"}, n_jobs=-1)
clf = clf.fit(X_train_pca, y_train)
print "Best estimator found by grid search:"
print clf.best_estimator
# Quantitative evaluation of the model quality on the test set
y_pred = clf.predict(X_test_pca)
print classification_report(y_test, y_pred, labels=selected_target, target_names=target_names[selected_target])
print confusion_matrix(y_test, y_pred, labels=selected_target)
# Qualitative evaluation of the predictions using matplotlib
示例15: test_GridSearch
def test_GridSearch():
clf = MockClassifier()
cross_validation = GridSearchCV(clf, {'foo_param':[1, 2, 3]})
assert_equal(cross_validation.fit(X, y).best_estimator.foo_param, 2)