本文整理汇总了Python中scikits.learn.datasets.samples_generator.test_dataset_classif函数的典型用法代码示例。如果您正苦于以下问题:Python test_dataset_classif函数的具体用法?Python test_dataset_classif怎么用?Python test_dataset_classif使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了test_dataset_classif函数的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_grid_search_error
def test_grid_search_error():
"""Test that grid search will capture errors on data with different
length"""
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, seed=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C':[0.1, 1.0]})
assert_raises(ValueError, cv.fit, X_[:180], y_)
示例2: test_weight
def test_weight():
"""
Test class weights
"""
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1], seed=0)
X_ = scipy.sparse.csr_matrix(X_)
for clf in (linear_model.sparse.LogisticRegression(), svm.sparse.LinearSVC(), svm.sparse.SVC()):
clf.fit(X_[:180], y_[:180], class_weight={0: 5})
y_pred = clf.predict(X_[180:])
assert np.sum(y_pred == y_[180:]) >= 11
示例3: test_f_classif_multi_class
def test_f_classif_multi_class():
"""
Test whether the F test yields meaningful results
on a simple simulated classification problem
"""
X, Y = test_dataset_classif(n_samples=50, n_features=20, k=5,
seed=seed, param=[1, 1, 1])
F, pv = f_classif(X, Y)
assert(F>0).all()
assert(pv>0).all()
assert(pv<1).all()
assert(pv[:5]<0.05).all()
assert(pv[5:]>1.e-5).all()
示例4: test_f_regression
def test_f_regression():
"""
Test whether the F test yields meaningful results
on a simple simulated regression problem
"""
X, Y = test_dataset_classif(n_samples=50, n_features=20, k=5,
seed=seed)
F, pv = f_regression(X, Y)
assert(F>0).all()
assert(pv>0).all()
assert(pv<1).all()
assert(pv[:5]<0.05).all()
assert(pv[5:]>1.e-4).all()
示例5: test_weight
def test_weight():
"""
Test class weights
"""
clf = svm.SVC()
# we give a small weights to class 1
clf.fit(X, Y, {1: 0.1})
# so all predicted values belong to class 2
assert_array_almost_equal(clf.predict(X), [2] * 6)
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, param=[5, 1],
seed=0)
for clf in (linear_model.LogisticRegression(), svm.LinearSVC(), svm.SVC()):
clf.fit(X_[: 180], y_[: 180], class_weight={0: 5})
y_pred = clf.predict(X_[180:])
assert np.sum(y_pred == y_[180:]) >= 11
示例6: test_select_kbest_classif
def test_select_kbest_classif():
"""
Test whether the relative univariate feature selection
gets the correct items in a simple classification problem
with the k best heuristic
"""
X, Y = test_dataset_classif(n_samples=50, n_features=20, k=5,
seed=seed)
univariate_filter = SelectKBest(f_classif, k=5)
X_r = univariate_filter.fit(X, Y).transform(X)
X_r2 = GenericUnivariateSelect(f_classif, mode='k_best',
param=5).fit(X, Y).transform(X)
assert_array_equal(X_r, X_r2)
support = univariate_filter.get_support()
gtruth = np.zeros(20)
gtruth[:5]=1
assert_array_equal(support, gtruth)
示例7: test_select_fwe_classif
def test_select_fwe_classif():
"""
Test whether the relative univariate feature selection
gets the correct items in a simple classification problem
with the fpr heuristic
"""
X, Y = test_dataset_classif(n_samples=50, n_features=20, k=5,
seed=seed)
univariate_filter = SelectFwe(f_classif, alpha=0.01)
X_r = univariate_filter.fit(X, Y).transform(X)
X_r2 = GenericUnivariateSelect(f_classif, mode='fwe',
param=0.01).fit(X, Y).transform(X)
assert_array_equal(X_r, X_r2)
support = univariate_filter.get_support()
gtruth = np.zeros(20)
gtruth[:5]=1
assert(np.sum(np.abs(support-gtruth))<2)
示例8: test_grid_search_sparse_score_func
def test_grid_search_sparse_score_func():
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, seed=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator.C
X_ = sp.csr_matrix(X_)
clf = SparseLinearSVC()
cv = GridSearchCV(clf, {'C': [0.1, 1.0]}, score_func=f1_score)
cv.fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator.C
assert_array_equal(y_pred, y_pred2)
assert_equal(C, C2)
示例9: test_grid_search_sparse
def test_grid_search_sparse():
"""Test that grid search works with both dense and sparse matrices"""
X_, y_ = test_dataset_classif(n_samples=200, n_features=100, seed=0)
clf = LinearSVC()
cv = GridSearchCV(clf, {'C':[0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred = cv.predict(X_[180:])
C = cv.best_estimator.C
X_ = sp.csr_matrix(X_)
clf = SparseLinearSVC()
cv = GridSearchCV(clf, {'C':[0.1, 1.0]})
cv.fit(X_[:180], y_[:180])
y_pred2 = cv.predict(X_[180:])
C2 = cv.best_estimator.C
assert np.mean(y_pred == y_pred2) >= .9
assert_equal(C, C2)
示例10: SelectKBest
"""
==========================
Pipeline Anova SVM
==========================
Simple usages of pipeline:
- ANOVA SVM-C
"""
from scikits.learn import svm
from scikits.learn.datasets import samples_generator
from scikits.learn.feature_selection.univariate_selection import SelectKBest,f_regression
from scikits.learn.pipeline import Pipeline
# import some data to play with
X, y = samples_generator.test_dataset_classif(k=5)
# ANOVA SVM-C
# 1) anova filter, take 5 best ranked features
anova_filter = SelectKBest(f_regression, k=5)
# 2) svm
clf = svm.SVC(kernel='linear')
anova_svm = Pipeline([anova_filter], clf)
anova_svm.fit(X, y)
anova_svm.predict(X)
示例11: SVC
Recursive feature elimination with automatic tuning of the
number of features selected with cross-validation
"""
print __doc__
import numpy as np
from scikits.learn.svm import SVC
from scikits.learn.cross_val import StratifiedKFold
from scikits.learn.feature_selection import RFECV
from scikits.learn.datasets import samples_generator
from scikits.learn.metrics import zero_one
################################################################################
# Loading a dataset
X, y = samples_generator.test_dataset_classif(n_features=500, k=5, seed=0)
################################################################################
# Create the RFE object and compute a cross-validated score
svc = SVC(kernel='linear')
rfecv = RFECV(estimator=svc, n_features=2, percentage=0.1, loss_func=zero_one)
rfecv.fit(X, y, cv=StratifiedKFold(y, 2))
print 'Optimal number of features : %d' % rfecv.support_.sum()
import pylab as pl
pl.figure()
pl.semilogx(rfecv.n_features_, rfecv.cv_scores_)
pl.xlabel('Number of features selected')
pl.ylabel('Cross validation score (nb of misclassifications)')