Python test_estimators.generate_classification_data函数代码示例

本文整理汇总了Python中rep.test.test_estimators.generate_classification_data函数的典型用法代码示例。如果您正苦于以下问题：Python generate_classification_data函数的具体用法？Python generate_classification_data怎么用？Python generate_classification_data使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了generate_classification_data函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_cache_classifier

def test_cache_classifier():
    cache_helper.clear_cache()

    for Wrapper, Model in [(CacheClassifier, LogisticRegression), (CacheRegressor, LinearRegression)]:
        X, y, weights = generate_classification_data(n_classes=2)
        clf = Wrapper('first', Model()).fit(X, y)
        assert clf._used_cache == False
        clf = Wrapper('first', Model()).fit(X + 0, y + 0)
        assert clf._used_cache == True
        # changed name
        clf = Wrapper('second', Model()).fit(X, y)
        assert clf._used_cache == False
        # changed data
        X_new = X.copy()
        X_new.iloc[0, 0] += 1
        clf = Wrapper('first', Model()).fit(X_new, y)
        assert clf._used_cache == False
        # changed labels
        y_new = y.copy()
        y_new[0] += 1
        clf = Wrapper('first', Model()).fit(X, y_new)
        assert clf._used_cache == False
        # added weights
        clf = Wrapper('first', Model()).fit(X, y, sample_weight=None)
        assert clf._used_cache == False
        # changed parameters
        clf = Wrapper('first', Model(n_jobs=2)).fit(X, y)
        assert clf._used_cache == False
        # fitting previous once again. Checking that overwriting is correct.
        clf = Wrapper('first', Model(n_jobs=2)).fit(X, y)
        assert clf._used_cache == True

    cache_helper.clear_cache()

开发者ID:chrinide，项目名称:rep，代码行数:33，代码来源:test_meta_caching.py

示例2: check_folding

def check_folding(classifier, check_instance=True, has_staged_pp=True, has_importances=True):
    X, y, sample_weight = generate_classification_data(distance=0.6)

    assert classifier == classifier.fit(X, y, sample_weight=sample_weight)
    assert list(classifier.features) == list(X.columns)

    check_classification_model(classifier, X, y, check_instance=check_instance, has_staged_pp=has_staged_pp,
                               has_importances=has_importances)

    def mean_vote(x):
        return numpy.mean(x, axis=0)

    labels = classifier.predict(X, mean_vote)
    proba = classifier.predict_proba(X, mean_vote)
    assert numpy.all(proba == classifier.predict_proba(X, mean_vote))

    score = accuracy_score(y, labels)
    print(score)
    assert score > 0.7
    assert numpy.allclose(proba.sum(axis=1), 1), 'probabilities do not sum to 1'
    assert numpy.all(proba >= 0.), 'negative probabilities'

    auc_score = roc_auc_score(y, proba[:, 1])
    print(auc_score)
    assert auc_score > 0.8
    if has_staged_pp:
        for p in classifier.staged_predict_proba(X, mean_vote):
            assert p.shape == (len(X), 2)
        # checking that last iteration coincides with previous
        assert numpy.all(p == proba)

开发者ID:AlexanderTek，项目名称:rep，代码行数:30，代码来源:test_folding.py

示例3: very_basic_xgboost_test

def very_basic_xgboost_test():
    X, y, w = generate_classification_data(n_classes=2)
    clf = XGBoostClassifier(n_estimators=10).fit(X, y)
    clf.predict(X)
    clf.predict_proba(X)
    # testing that returned features in importances are correct and in the same order
    assert numpy.all(clf.features == clf.get_feature_importances().index)

开发者ID:jithsjoy，项目名称:rep，代码行数:7，代码来源:test_xgboost.py

示例4: test_xgboost_works_with_different_dtypes

def test_xgboost_works_with_different_dtypes():
    dtypes = ['float32', 'float64', 'int32', 'int64', 'uint32']
    for dtype in dtypes:
        X, y, weights = generate_classification_data(n_classes=2, distance=5)
        clf = XGBoostClassifier(n_estimators=10)
        clf.fit(X.astype(dtype=dtype), y.astype(dtype=dtype), sample_weight=weights.astype(dtype))
        probabilities = clf.predict_proba(X.astype(dtype))

    # testing single pandas.DataFrame with different dtypes
    X, y, weights = generate_classification_data(n_classes=2, distance=5)
    import pandas
    X = pandas.DataFrame()
    for dtype in dtypes:
        X[dtype] = numpy.random.normal(0, 10, size=len(y)).astype(dtype)
    clf = XGBoostClassifier(n_estimators=10)
    clf.fit(X, y, sample_weight=weights)
    probabilities = clf.predict_proba(X)

开发者ID:arogozhnikov，项目名称:rep，代码行数:17，代码来源:test_xgboost.py

示例5: test_pybrain_reproducibility

def test_pybrain_reproducibility():
    # This test fails. Because PyBrain can't reproduce training.
    X, y, _ = generate_classification_data()
    clf1 = PyBrainClassifier(layers=[4], epochs=2).fit(X, y)
    clf2 = PyBrainClassifier(layers=[4], epochs=2).fit(X, y)
    print(clf1.predict_proba(X) - clf2.predict_proba(X))
    assert numpy.allclose(clf1.predict_proba(X), clf2.predict_proba(X)), 'different predicitons'
    check_classification_reproducibility(clf1, X, y)

开发者ID:chrinide，项目名称:rep，代码行数:8，代码来源:test_pybrain.py

示例6: test_nolearn_reproducibility

def test_nolearn_reproducibility():
    X, y, sample_weight = generate_classification_data()
    cl = NolearnClassifier()
    y_predicted_1 = cl.fit(X, y).predict(X)
    y_predicted_2 = cl.fit(X, y).predict(X)
    assert (y_predicted_1 == y_predicted_2).all(), 'fitting the classifier twice gives different predictions'
    y_predicted_3 = clone(cl).fit(X, y).predict(X)
    assert (y_predicted_1 == y_predicted_3).all(), 'cloned classifier gives different prediction'

开发者ID:a-berdnikov，项目名称:rep，代码行数:8，代码来源:test_nolearn.py

示例7: test_factory

def test_factory():
    factory = ClassifiersFactory()
    try:
        from rep.estimators.tmva import TMVAClassifier
        factory.add_classifier('tmva', TMVAClassifier())
    except ImportError:
        pass
    factory.add_classifier('rf', RandomForestClassifier(n_estimators=10))
    factory.add_classifier('ada', AdaBoostClassifier(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X, y, sample_weight=sample_weight, features=list(X.columns),
                                  parallel_profile='threads-4')
    for cl in factory.values():
        assert list(cl.features) == list(X.columns)
    proba = factory.predict_proba(X, parallel_profile='threads-4')
    labels = factory.predict(X, parallel_profile='threads-4')
    for key, val in labels.items():
        score = accuracy_score(y, val)
        print(key, score)
        assert score > 0.7, key

    for key, val in proba.items():
        assert numpy.allclose(val.sum(axis=1), 1), 'probabilities do not sum to 1'
        assert numpy.all(val >= 0.), 'negative probabilities'

        auc_score = roc_auc_score(y, val[:, 1])
        print(auc_score)
        assert auc_score > 0.8

    for key, iterator in factory.staged_predict_proba(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), 2)

        # checking that last iteration coincides with previous
        assert numpy.all(p == proba[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict_proba(X)
    probs2 = clf_loaded.predict_proba(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = ClassificationReport({'rf': factory['rf']}, LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(roc_auc_score_mod).plot(new_plot=True, figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    val = numpy.mean(X['column0'])
    yield check_report_with_mask, report, "column0 > %f" % (val / 2.), X
    yield check_report_with_mask, report, lambda x: numpy.array(x['column0']) < val * 2., X
    yield check_report_with_mask, report, None, X

开发者ID:AlexanderTek，项目名称:rep，代码行数:57，代码来源:test_factory_clf.py

示例8: test_xgboost_random_states

def test_xgboost_random_states():
    X, y, weights = generate_classification_data(n_classes=2, distance=5)
    for random_state in [145, None, check_random_state(None), check_random_state(145)]:
        clf1 = XGBoostClassifier(n_estimators=5, max_depth=1, subsample=0.1, random_state=random_state)
        clf1.fit(X, y)
        clf2 = XGBoostClassifier(n_estimators=5, max_depth=1, subsample=0.1, random_state=random_state)
        clf2.fit(X, y)
        if isinstance(random_state, numpy.random.RandomState):
            assert not numpy.allclose(clf1.predict_proba(X), clf2.predict_proba(X)), 'seed: {}'.format(random_state)
        else:
            assert numpy.allclose(clf1.predict_proba(X), clf2.predict_proba(X)), 'seed: {}'.format(random_state)

开发者ID:arogozhnikov，项目名称:rep，代码行数:11，代码来源:test_xgboost.py

示例9: test_xgboost_feature_importance

def test_xgboost_feature_importance():
    X, y, weights = generate_classification_data(n_classes=2, distance=5)
    clf = XGBoostClassifier(n_estimators=1, max_depth=1)
    clf.fit(X, y)
    importances = clf.get_feature_importances()
    original_features = set(X.columns)
    importances_features = set(importances.index)
    print(original_features, importances_features)
    assert original_features == importances_features, 'feature_importances_ return something wrong'

    assert len(original_features) == len(clf.feature_importances_)

开发者ID:arogozhnikov，项目名称:rep，代码行数:11，代码来源:test_xgboost.py

示例10: test_factory

def test_factory():
    factory = RegressorsFactory()
    try:
        from rep.estimators.tmva import TMVARegressor
        factory.add_regressor('tmva', TMVARegressor())
    except ImportError:
        pass
    factory.add_regressor('rf', RandomForestRegressor(n_estimators=10))
    factory.add_regressor('ada', AdaBoostRegressor(n_estimators=20))

    X, y, sample_weight = generate_classification_data()
    assert factory == factory.fit(X, y, sample_weight=sample_weight, features=list(X.columns))
    values = factory.predict(X)

    for cl in factory.values():
        assert list(cl.features) == list(X.columns)

    for key, val in values.items():
        score = mean_squared_error(y, val)
        print(score)
        assert score < 0.2

    for key, iterator in factory.staged_predict(X).items():
        assert key != 'tmva', 'tmva does not support staged pp'
        for p in iterator:
            assert p.shape == (len(X), )

        # checking that last iteration coincides with previous
        assert numpy.all(p == values[key])

    # testing picklability
    dump_string = cPickle.dumps(factory)
    clf_loaded = cPickle.loads(dump_string)

    assert type(factory) == type(clf_loaded)

    probs1 = factory.predict(X)
    probs2 = clf_loaded.predict(X)
    for key, val in probs1.items():
        assert numpy.all(val == probs2[key]), 'something strange was loaded'

    report = RegressionReport({'rf': factory['rf']}, LabeledDataStorage(X, y, sample_weight))
    report.feature_importance_shuffling(mean_squared_mod).plot(new_plot=True, figsize=(18, 3))
    report = factory.test_on_lds(LabeledDataStorage(X, y, sample_weight))
    report = factory.test_on(X, y, sample_weight=sample_weight)
    report.feature_importance()
    report.features_correlation_matrix()
    report.predictions_scatter()

    val = numpy.mean(X['column0'])
    report_mask(report, "column0 > %f" % val, X)
    report_mask(report, lambda x: numpy.array(x['column0']) < val, X)
    report_mask(report, None, X)

开发者ID:AlexanderTek，项目名称:rep，代码行数:53，代码来源:test_factory_reg.py

示例11: test_gridsearch_threads

def test_gridsearch_threads(n_threads=3):
    scorer = FoldingScorer(numpy.random.choice([OptimalAMS(), RocAuc()]))

    grid_param = OrderedDict({"n_estimators": [10, 20],
                              "learning_rate": [0.1, 0.05],
                              'features': [['column0', 'column1'], ['column0', 'column1', 'column2']]})
    generator = RegressionParameterOptimizer(grid_param, n_evaluations=4)

    base = SklearnClassifier(clf=AdaBoostClassifier())
    grid = GridOptimalSearchCV(base, generator, scorer, parallel_profile='threads-{}'.format(n_threads))

    X, y, sample_weight = generate_classification_data()
    grid.fit(X, y, sample_weight=sample_weight)

开发者ID:arogozhnikov，项目名称:rep，代码行数:13，代码来源:test_grid.py

示例12: test_own_classification_reports

def test_own_classification_reports():
    """
    testing clf.test_on
    """
    X, y, sample_weight = generate_classification_data()
    clf = SklearnClassifier(RandomForestClassifier())
    clf.fit(X, y, sample_weight=sample_weight)
    report = clf.test_on(X, y, sample_weight=sample_weight)
    roc1 = report.compute_metric(RocAuc())

    lds = LabeledDataStorage(X, y, sample_weight=sample_weight)
    roc2 = clf.test_on_lds(lds=lds).compute_metric(RocAuc())
    assert roc1 == roc2, 'Something wrong with test_on'

开发者ID:jithsjoy，项目名称:rep，代码行数:13，代码来源:test_factory.py

示例13: test_folding_regressor_functions

def test_folding_regressor_functions():
    """Testing folding functions """
    data, y, sample_weight = generate_classification_data()

    for X in [data, numpy.array(data)]:
        kfolder = FoldingRegressor(SklearnRegressor(GradientBoostingRegressor(n_estimators=5)), n_folds=2)
        kfolder.fit(X, y, sample_weight=sample_weight)
        preds = kfolder.predict(X)
        for p in kfolder.staged_predict(X):
            pass
        assert numpy.allclose(p, preds)

        importances = kfolder.feature_importances_
        other_importances = kfolder.get_feature_importances()

开发者ID:AlexanderTek，项目名称:rep，代码行数:14，代码来源:test_folding.py

示例14: test_feature_importances

def test_feature_importances():
    clf = XGBoostClassifier()
    X, y, sample_weight = generate_classification_data()
    clf.fit(X, y, sample_weight=sample_weight)
    # checking feature importance (three ways)

    res_default = clf.xgboost_classifier.get_fscore()
    res2 = clf._get_fscore()
    res3 = clf.feature_importances_

    assert res_default == res2, res_default
    for i, val in enumerate(res3):
        if val > 0.0:
            assert val == res_default['f' + str(i)]

开发者ID:0x0all，项目名称:rep，代码行数:14，代码来源:test_xgboost.py

示例15: test_feature_splitter

def test_feature_splitter():
    # testing splitter
    from rep.metaml import FeatureSplitter

    X, y, sample_weight = generate_classification_data(n_classes=3)
    split_column = X.columns[0]
    splitters = numpy.random.randint(0, 3, size=len(X))
    X[split_column] = splitters
    X.ix[splitters == 1, :] += 4
    X.ix[splitters == 2, :] -= 4
    fs = FeatureSplitter(base_estimator=XGBoostClassifier(features=list(X.columns[1:]), n_estimators=10, max_depth=3),
                         split_feature=split_column)
    fs.fit(X, y, sample_weight=sample_weight)
    assert fs.score(X, y) > 0.9

开发者ID:0x0all，项目名称:rep，代码行数:14，代码来源:test_stacking.py

注：本文中的rep.test.test_estimators.generate_classification_data函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。