Python datasets.make_hastie_10_2函数代码示例

本文整理汇总了Python中sklearn.datasets.make_hastie_10_2函数的典型用法代码示例。如果您正苦于以下问题：Python make_hastie_10_2函数的具体用法？Python make_hastie_10_2怎么用？Python make_hastie_10_2使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了make_hastie_10_2函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_subsample_heuristic

    def test_subsample_heuristic(self):
        """Test subsample=auto heuristic"""
        task = RuleFitC('s=auto')
        X, y =  datasets.make_hastie_10_2(n_samples=112)
        task._modify_parameters(X, y)
        self.assertEqual(task.parameters['subsample'], 1.0)

        task = RuleFitC('s=auto')
        X, y =  datasets.make_hastie_10_2(n_samples=500)
        task._modify_parameters(X, y)
        self.assertEqual(round(task.parameters['subsample'], 2), 0.49)

开发者ID:tkincaid，项目名称:tkincaid.github.com，代码行数:11，代码来源:test_rulefit.py

示例2: test_warm_start_smaller_n_estimators

def test_warm_start_smaller_n_estimators(Cls):
    # Test if warm start with smaller n_estimators raises error
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=99)
    assert_raises(ValueError, est.fit, X, y)

开发者ID:amueller，项目名称:scikit-learn，代码行数:7，代码来源:test_gradient_boosting.py

示例3: test_warm_start_sparse

def test_warm_start_sparse(Cls):
    # Test that all sparse matrix types are supported
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    sparse_matrix_type = [csr_matrix, csc_matrix, coo_matrix]
    est_dense = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                    random_state=1, warm_start=True)
    est_dense.fit(X, y)
    est_dense.predict(X)
    est_dense.set_params(n_estimators=200)
    est_dense.fit(X, y)
    y_pred_dense = est_dense.predict(X)

    for sparse_constructor in sparse_matrix_type:
        X_sparse = sparse_constructor(X)

        est_sparse = Cls(n_estimators=100, max_depth=1, subsample=0.5,
                         random_state=1, warm_start=True)
        est_sparse.fit(X_sparse, y)
        est_sparse.predict(X)
        est_sparse.set_params(n_estimators=200)
        est_sparse.fit(X_sparse, y)
        y_pred_sparse = est_sparse.predict(X)

        assert_array_almost_equal(est_dense.oob_improvement_[:100],
                                  est_sparse.oob_improvement_[:100])
        assert_array_almost_equal(y_pred_dense, y_pred_sparse)

开发者ID:amueller，项目名称:scikit-learn，代码行数:26，代码来源:test_gradient_boosting.py

示例4: test_monitor_early_stopping

def test_monitor_early_stopping(Cls):
    # Test if monitor return value works.
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)

    est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5)
    est.fit(X, y, monitor=early_stopping_monitor)
    assert_equal(est.n_estimators, 20)  # this is not altered
    assert_equal(est.estimators_.shape[0], 10)
    assert_equal(est.train_score_.shape[0], 10)
    assert_equal(est.oob_improvement_.shape[0], 10)

    # try refit
    est.set_params(n_estimators=30)
    est.fit(X, y)
    assert_equal(est.n_estimators, 30)
    assert_equal(est.estimators_.shape[0], 30)
    assert_equal(est.train_score_.shape[0], 30)

    est = Cls(n_estimators=20, max_depth=1, random_state=1, subsample=0.5,
              warm_start=True)
    est.fit(X, y, monitor=early_stopping_monitor)
    assert_equal(est.n_estimators, 20)
    assert_equal(est.estimators_.shape[0], 10)
    assert_equal(est.train_score_.shape[0], 10)
    assert_equal(est.oob_improvement_.shape[0], 10)

    # try refit
    est.set_params(n_estimators=30, warm_start=False)
    est.fit(X, y)
    assert_equal(est.n_estimators, 30)
    assert_equal(est.train_score_.shape[0], 30)
    assert_equal(est.estimators_.shape[0], 30)
    assert_equal(est.oob_improvement_.shape[0], 30)

开发者ID:amueller，项目名称:scikit-learn，代码行数:33，代码来源:test_gradient_boosting.py

示例5: test_most_freq_clf_proba

 def test_most_freq_clf_proba(self):
     X, y = datasets.make_hastie_10_2(random_state=13, n_samples=100)
     prior_pos = (y == 1).mean()
     clf = _DummyClassifier(strategy='most_frequent').fit(X, y)
     proba = clf.predict_proba(X)
     np.testing.assert_array_equal(proba[:, 1], np.ones(X.shape[0]) * prior_pos)
     np.testing.assert_array_equal(proba[:, 0], np.ones(X.shape[0]) * (1 - prior_pos))

开发者ID:tkincaid，项目名称:tkincaid.github.com，代码行数:7，代码来源:test_dummy.py

示例6: test_staged_predict_proba

def test_staged_predict_proba():
    # Test whether staged predict proba eventually gives
    # the same prediction.
    X, y = datasets.make_hastie_10_2(n_samples=1200,
                                     random_state=1)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingClassifier(n_estimators=20)
    # test raise NotFittedError if not fitted
    assert_raises(NotFittedError, lambda X: np.fromiter(
        clf.staged_predict_proba(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)

    # test if prediction for last stage equals ``predict``
    for y_pred in clf.staged_predict(X_test):
        assert_equal(y_test.shape, y_pred.shape)

    assert_array_equal(clf.predict(X_test), y_pred)

    # test if prediction for last stage equals ``predict_proba``
    for staged_proba in clf.staged_predict_proba(X_test):
        assert_equal(y_test.shape[0], staged_proba.shape[0])
        assert_equal(2, staged_proba.shape[1])

    assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)

开发者ID:amueller，项目名称:scikit-learn，代码行数:26，代码来源:test_gradient_boosting.py

示例7: check_warm_start_oob

def check_warm_start_oob(name):
    # Test that the warm start computes oob score when asked.
    X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
    ForestEstimator = FOREST_ESTIMATORS[name]
    # Use 15 estimators to avoid 'some inputs do not have OOB scores' warning.
    clf = ForestEstimator(n_estimators=15, max_depth=3, warm_start=False,
                          random_state=1, bootstrap=True, oob_score=True)
    clf.fit(X, y)

    clf_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=False,
                            random_state=1, bootstrap=True, oob_score=False)
    clf_2.fit(X, y)

    clf_2.set_params(warm_start=True, oob_score=True, n_estimators=15)
    clf_2.fit(X, y)

    assert_true(hasattr(clf_2, 'oob_score_'))
    assert_equal(clf.oob_score_, clf_2.oob_score_)

    # Test that oob_score is computed even if we don't need to train
    # additional trees.
    clf_3 = ForestEstimator(n_estimators=15, max_depth=3, warm_start=True,
                            random_state=1, bootstrap=True, oob_score=False)
    clf_3.fit(X, y)
    assert_true(not(hasattr(clf_3, 'oob_score_')))

    clf_3.set_params(oob_score=True)
    ignore_warnings(clf_3.fit)(X, y)

    assert_equal(clf.oob_score_, clf_3.oob_score_)

开发者ID:EddieBurning，项目名称:scikit-learn，代码行数:30，代码来源:test_forest.py

示例8: test_clf_early_stop_gridsearch_weights

    def test_clf_early_stop_gridsearch_weights(self, mocklogloss):
        """Test clf passes weights to the loss function if early-stopping is in effect when doing gridsearch. """
        def weight_loss(actual, pred, weights):
            print "Test"
            if np.all(weights[actual == 1] == 10.) and \
               np.all(weights[actual == -1] == 1.):
                raise ValueError("Weights passed successfully")
            else:
                assert(False)
                return np.sum(pred) - 50.0
        mocklogloss.method = weight_loss
        x, Y = make_hastie_10_2(n_samples=300, random_state=41)
        X = Container()
        X.add(x)
        Z = Partition(X.shape[0], max_reps=2, max_folds=0)
        Z.set(max_reps=1, max_folds=1)
        wt = {'weight': pd.Series(2.0 + 9.0 * (Y == 1).astype(float))}

        # Add weights to container
        X.initialize(wt)
        task = ESGBC('s=1;n=10;md=[2];ls=1;lr=[0.1, 0.000001];t_m=Weighted LogLoss')

        task.fit(X, Y, Z)
        # Assert the patched loss function was passed the weights
        self.assertTrue(mocklogloss.called)
        # The third argument is weight, we should be passed two values
        passed_weights = mocklogloss.call_args[0][2]
        passed_actuals = mocklogloss.call_args[0][0]
        self.assertEqual(len(np.unique(passed_weights)), 2)
        print passed_weights
        self.assertTrue(np.all(passed_weights[passed_actuals == -1] == 2))
        self.assertTrue(np.all(passed_weights[passed_actuals == 1] == 11))

开发者ID:tkincaid，项目名称:tkincaid.github.com，代码行数:32，代码来源:test_gbm.py

示例9: test_estimators_samples

def test_estimators_samples():
    # Check that format of estimators_samples_ is correct and that results
    # generated at fit time can be identically reproduced at a later time
    # using data saved in object attributes.
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
                                max_features=0.5, random_state=1,
                                bootstrap=False)
    bagging.fit(X, y)

    # Get relevant attributes
    estimators_samples = bagging.estimators_samples_
    estimators_features = bagging.estimators_features_
    estimators = bagging.estimators_

    # Test for correct formatting
    assert_equal(len(estimators_samples), len(estimators))
    assert_equal(len(estimators_samples[0]), len(X) // 2)
    assert_equal(estimators_samples[0].dtype.kind, 'i')

    # Re-fit single estimator to test for consistent sampling
    estimator_index = 0
    estimator_samples = estimators_samples[estimator_index]
    estimator_features = estimators_features[estimator_index]
    estimator = estimators[estimator_index]

    X_train = (X[estimator_samples])[:, estimator_features]
    y_train = y[estimator_samples]

    orig_coefs = estimator.coef_
    estimator.fit(X_train, y_train)
    new_coefs = estimator.coef_

    assert_array_almost_equal(orig_coefs, new_coefs)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:34，代码来源:test_bagging.py

示例10: test_max_feature_auto

def test_max_feature_auto():
    # Test if max features is set properly for floats and str.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)
    _, n_features = X.shape

    X_train = X[:2000]
    y_train = y[:2000]

    gbrt = GradientBoostingClassifier(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='auto')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, n_features)

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features=0.3)
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(n_features * 0.3))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='sqrt')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.sqrt(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1, max_features='log2')
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, int(np.log2(n_features)))

    gbrt = GradientBoostingRegressor(n_estimators=1,
                                     max_features=0.01 / X.shape[1])
    gbrt.fit(X_train, y_train)
    assert_equal(gbrt.max_features_, 1)

开发者ID:amueller，项目名称:scikit-learn，代码行数:32，代码来源:test_gradient_boosting.py

示例11: test_warm_start_smaller_n_estimators

def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:7，代码来源:test_bagging.py

示例12: test_classification_synthetic

def test_classification_synthetic():
    # Test GradientBoostingClassifier on synthetic dataset used by
    # Hastie et al. in ESLII Example 12.7.
    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=1)

    X_train, X_test = X[:2000], X[2000:]
    y_train, y_test = y[:2000], y[2000:]

    for loss in ('deviance', 'exponential'):

        gbrt = GradientBoostingClassifier(n_estimators=100, min_samples_split=1,
                                          max_depth=1, loss=loss,
                                          learning_rate=1.0, random_state=0)
        gbrt.fit(X_train, y_train)
        error_rate = (1.0 - gbrt.score(X_test, y_test))
        assert error_rate < 0.09, \
            "GB(loss={}) failed with error {}".format(loss, error_rate)

        gbrt = GradientBoostingClassifier(n_estimators=200, min_samples_split=1,
                                          max_depth=1,
                                          learning_rate=1.0, subsample=0.5,
                                          random_state=0)
        gbrt.fit(X_train, y_train)
        error_rate = (1.0 - gbrt.score(X_test, y_test))
        assert error_rate < 0.08, ("Stochastic GradientBoostingClassifier(loss={}) "
                                   "failed with error {}".format(loss, error_rate))

开发者ID:BobChew，项目名称:scikit-learn，代码行数:26，代码来源:test_gradient_boosting.py

示例13: check_warm_start

def check_warm_start(name, random_state=42):
    # Test if fitting incrementally with warm start gives a forest of the
    # right size and the same results as a normal fit.
    X, y = datasets.make_hastie_10_2(n_samples=20, random_state=1)
    ForestEstimator = FOREST_ESTIMATORS[name]
    clf_ws = None
    for n_estimators in [5, 10]:
        if clf_ws is None:
            clf_ws = ForestEstimator(n_estimators=n_estimators,
                                     random_state=random_state,
                                     warm_start=True)
        else:
            clf_ws.set_params(n_estimators=n_estimators)
        clf_ws.fit(X, y)
        assert_equal(len(clf_ws), n_estimators)

    clf_no_ws = ForestEstimator(n_estimators=10, random_state=random_state,
                                warm_start=False)
    clf_no_ws.fit(X, y)

    assert_equal(set([tree.random_state for tree in clf_ws]),
                 set([tree.random_state for tree in clf_no_ws]))

    assert_array_equal(clf_ws.apply(X), clf_no_ws.apply(X),
                       err_msg="Failed with {0}".format(name))

开发者ID:EddieBurning，项目名称:scikit-learn，代码行数:25，代码来源:test_forest.py

示例14: test_max_samples_consistency

def test_max_samples_consistency():
    # Make sure validated max_samples and original max_samples are identical
    # when valid integer max_samples supplied by user
    max_samples = 100
    X, y = make_hastie_10_2(n_samples=2 * max_samples, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=max_samples, max_features=0.5, random_state=1)
    bagging.fit(X, y)
    assert_equal(bagging._max_samples, max_samples)

开发者ID:agamemnonc，项目名称:scikit-learn，代码行数:8，代码来源:test_bagging.py

示例15: test_oob_score_consistency

def test_oob_score_consistency():
    # Make sure OOB scores are identical when random_state, estimator, and
    # training data are fixed and fitting is done twice
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(KNeighborsClassifier(), max_samples=0.5,
                                max_features=0.5, oob_score=True,
                                random_state=1)
    assert_equal(bagging.fit(X, y).oob_score_, bagging.fit(X, y).oob_score_)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:8，代码来源:test_bagging.py

注：本文中的sklearn.datasets.make_hastie_10_2函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。