Python ensemble.BaggingClassifier类代码示例

本文整理汇总了Python中sklearn.ensemble.BaggingClassifier类的典型用法代码示例。如果您正苦于以下问题：Python BaggingClassifier类的具体用法？Python BaggingClassifier怎么用？Python BaggingClassifier使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了BaggingClassifier类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_warm_start_equal_n_estimators

def test_warm_start_equal_n_estimators():
    # Test that nothing happens when fitting without increasing n_estimators
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)

    clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)
    # modify X to nonsense values, this should not change anything
    X_train += 1.

    assert_warns_message(UserWarning,
                         "Warm-start fitting without increasing n_estimators does not",
                         clf.fit, X_train, y_train)
    assert_array_equal(y_pred, clf.predict(X_test))

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:16，代码来源:test_bagging.py

示例2: test_estimators_samples

def test_estimators_samples():
    # Check that format of estimators_samples_ is correct and that results
    # generated at fit time can be identically reproduced at a later time
    # using data saved in object attributes.
    X, y = make_hastie_10_2(n_samples=200, random_state=1)
    bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
                                max_features=0.5, random_state=1,
                                bootstrap=False)
    bagging.fit(X, y)

    # Get relevant attributes
    estimators_samples = bagging.estimators_samples_
    estimators_features = bagging.estimators_features_
    estimators = bagging.estimators_

    # Test for correct formatting
    assert_equal(len(estimators_samples), len(estimators))
    assert_equal(len(estimators_samples[0]), len(X) // 2)
    assert_equal(estimators_samples[0].dtype.kind, 'i')

    # Re-fit single estimator to test for consistent sampling
    estimator_index = 0
    estimator_samples = estimators_samples[estimator_index]
    estimator_features = estimators_features[estimator_index]
    estimator = estimators[estimator_index]

    X_train = (X[estimator_samples])[:, estimator_features]
    y_train = y[estimator_samples]

    orig_coefs = estimator.coef_
    estimator.fit(X_train, y_train)
    new_coefs = estimator.coef_

    assert_array_almost_equal(orig_coefs, new_coefs)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:34，代码来源:test_bagging.py

示例3: query_by_bagging

def query_by_bagging(X, y, current_model, batch_size, rng, base_model=SVC(C=1, kernel='linear'), n_bags=5, method="KL", D=None):
    """
    :param base_model: Model that will be  **fitted every iteration**
    :param n_bags: Number of bags on which train n_bags models
    :param method: 'entropy' or 'KL'
    :return:
    """
    assert method == 'entropy' or method == 'KL'
    eps = 0.0000001
    if method == 'KL':
        assert hasattr(base_model, 'predict_proba'), "Model with probability prediction needs to be passed to this strategy!"
    clfs = BaggingClassifier(base_model, n_estimators=n_bags, random_state=rng)
    clfs.fit(X[y.known], y[y.known])
    pc = clfs.predict_proba(X[np.invert(y.known)])
    # Settles page 17
    if method == 'entropy':
        pc += eps
        fitness = np.sum(pc * np.log(pc), axis=1)
        ids =  np.argsort(fitness)[:batch_size]
    elif method == 'KL':
        p = np.array([clf.predict_proba(X[np.invert(y.known)]) for clf in clfs.estimators_])
        fitness = np.mean(np.sum(p * np.log(p / pc), axis=2), axis=0)
        ids = np.argsort(fitness)[-batch_size:]

    return y.unknown_ids[ids], fitness/np.max(fitness)

开发者ID:gmum，项目名称:mlls2015，代码行数:25，代码来源:strategy.py

示例4: test_estimators_samples_deterministic

def test_estimators_samples_deterministic():
    # This test is a regression test to check that with a random step
    # (e.g. SparseRandomProjection) and a given random state, the results
    # generated at fit time can be identically reproduced at a later time using
    # data saved in object attributes. Check issue #9524 for full discussion.

    iris = load_iris()
    X, y = iris.data, iris.target

    base_pipeline = make_pipeline(SparseRandomProjection(n_components=2),
                                  LogisticRegression())
    clf = BaggingClassifier(base_estimator=base_pipeline,
                            max_samples=0.5,
                            random_state=0)
    clf.fit(X, y)
    pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy()

    estimator = clf.estimators_[0]
    estimator_sample = clf.estimators_samples_[0]
    estimator_feature = clf.estimators_features_[0]

    X_train = (X[estimator_sample])[:, estimator_feature]
    y_train = y[estimator_sample]

    estimator.fit(X_train, y_train)
    assert_array_equal(estimator.steps[-1][1].coef_, pipeline_estimator_coef)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:26，代码来源:test_bagging.py

示例5: ADABoost

class ADABoost(Base):

    def train(self, data = None, plugin=None):
        """ With dataframe train mllib """
        super(ADABoost, self).train(data, plugin)

            #cl = svm.SVC(gamma=0.001, C= 100, kernel='linear', probability=True)

        X = self.X_train.iloc[:,:-1]
        Y = self.X_train.iloc[:,-1]

        self.scaler = StandardScaler().fit(X)
        X = self.scaler.transform(X)

        cl = SGDClassifier(loss='hinge')
        p = Pipeline([("Scaler", self.scaler), ("svm", cl)])

        self.clf = BaggingClassifier(p, n_estimators=50)
        #self.clf = AdaBoostClassifier(p, n_estimators=10)
            #self.clf = AdaBoostClassifier(SGDClassifier(loss='hinge'),algorithm='SAMME', n_estimators=10)

        self.clf.fit(X, Y)

    def predict(self, file, plugin=None):
        super(ADABoost, self).predict(file, plugin)

        data = file.vector
        X = data[plugin]
        X = self.scaler.transform(X)
        guess = self.clf.predict(X)
        return self.getTag(guess)

开发者ID:dhruvkp，项目名称:musicreco，代码行数:31，代码来源:ada_boost.py

示例6: baggedDecisionTree

def baggedDecisionTree( X_train, y_train, X_test, y_test, nEstimators ):

    print("\n### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###")
    print("baggedDecisionTree()\n")

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myBaggedDecisionTree = BaggingClassifier(
        base_estimator = DecisionTreeClassifier(),
        n_estimators   = nEstimators,
        # max_samples    = X_train.shape[0],
        bootstrap      = True,
        oob_score      = True,
        n_jobs         = -1 # use all available cores
        )

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    myBaggedDecisionTree.fit(X_train,y_train)
    y_pred = myBaggedDecisionTree.predict(X_test)

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    print( "nEstimators: "      + str(nEstimators)                     )
    print( "out-of-bag score: " + str(myBaggedDecisionTree.oob_score_) )
    print( "accuracy score: "   + str(accuracy_score(y_test,y_pred))   )
    print( "out-of-bag decision function:" )
    print( str(myBaggedDecisionTree.oob_decision_function_) )

    ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
    return( None )

开发者ID:paradisepilot，项目名称:statistics，代码行数:28，代码来源:baggedDecisionTree.py

示例7: bagging

def bagging(X_train, X_test, y_train, y_test,n_est):
    n_est=51
    estimators=range(1,n_est)
    decision_clf = DecisionTreeClassifier()
    
    for est in estimators:
        bagging_clf = BaggingClassifier(decision_clf, n_estimators=est, max_samples=0.67,max_features=0.67, 
                                    bootstrap=True, random_state=9)
        bagging_clf.fit(X_train, y_train)
        # test line
        y_pred_bagging1 = bagging_clf.predict(X_test)
        score_bc_dt1 = accuracy_score(y_test, y_pred_bagging1)
        scores1.append(score_bc_dt1)
        # train line
        y_pred_bagging2 = bagging_clf.predict(X_train)
        score_bc_dt2 = accuracy_score(y_train, y_pred_bagging2)
        scores2.append(score_bc_dt2)
    
    plt.figure(figsize=(10, 6))
    plt.title('Bagging Info')
    plt.xlabel('Estimators')
    plt.ylabel('Scores')
    plt.plot(estimators,scores1,'g',label='test line', linewidth=3)
    plt.plot(estimators,scores2,'c',label='train line', linewidth=3)
    plt.legend()
    plt.show()

开发者ID:santoshmayekar，项目名称:ensemble_methods_projects，代码行数:26，代码来源:build.py

示例8: BaggingSK

class BaggingSK(PoolGenerator):
    '''
    This class should not be used, use brew.generation.bagging.Bagging instead.
    '''

    def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):

        self.base_classifier = base_classifier
        self.n_classifiers = n_classifiers

        # using the sklearn implementation of bagging for now
        self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
                n_estimators=n_classifiers, max_samples=1.0, max_features=1.0)
        
        self.ensemble = Ensemble()
        self.combiner = Combiner(rule=combination_rule)

    def fit(self, X, y):
        self.sk_bagging.fit(X, y)
        self.ensemble.add_classifiers(self.sk_bagging.estimators_)
        #self.classes_ = set(y)

    def predict(self, X):
        out = self.ensemble.output(X)
        return self.combiner.combine(out)

开发者ID:glemaitre，项目名称:brew，代码行数:25，代码来源:bagging.py

示例9: test_oob_score_classification

def test_oob_score_classification():
    # Check that oob prediction is a good estimation of the generalization
    # error.
    rng = check_random_state(0)
    X_train, X_test, y_train, y_test = train_test_split(iris.data,
                                                        iris.target,
                                                        random_state=rng)

    for base_estimator in [DecisionTreeClassifier(), SVC()]:
        clf = BaggingClassifier(base_estimator=base_estimator,
                                n_estimators=100,
                                bootstrap=True,
                                oob_score=True,
                                random_state=rng).fit(X_train, y_train)

        test_score = clf.score(X_test, y_test)

        assert_less(abs(test_score - clf.oob_score_), 0.1)

        # Test with few estimators
        assert_warns(UserWarning,
                     BaggingClassifier(base_estimator=base_estimator,
                                       n_estimators=1,
                                       bootstrap=True,
                                       oob_score=True,
                                       random_state=rng).fit,
                     X_train,
                     y_train)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:28，代码来源:test_bagging.py

示例10: test_bagging_sample_weight_unsupported_but_passed

def test_bagging_sample_weight_unsupported_but_passed():
    estimator = BaggingClassifier(DummyZeroEstimator())
    rng = check_random_state(0)

    estimator.fit(iris.data, iris.target).predict(iris.data)
    assert_raises(ValueError, estimator.fit, iris.data, iris.target,
                  sample_weight=rng.randint(10, size=(iris.data.shape[0])))

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:7，代码来源:test_bagging.py

示例11: test_warm_start_smaller_n_estimators

def test_warm_start_smaller_n_estimators():
    # Test if warm start'ed second fit with smaller n_estimators raises error.
    X, y = make_hastie_10_2(n_samples=20, random_state=1)
    clf = BaggingClassifier(n_estimators=5, warm_start=True)
    clf.fit(X, y)
    clf.set_params(n_estimators=4)
    assert_raises(ValueError, clf.fit, X, y)

开发者ID:daniel-perry，项目名称:scikit-learn，代码行数:7，代码来源:test_bagging.py

示例12: test_bagging_with_pipeline

def test_bagging_with_pipeline():
    estimator = BaggingClassifier(make_pipeline(SelectKBest(k=1),
                                                DecisionTreeClassifier()),
                                  max_features=2)
    estimator.fit(iris.data, iris.target)
    assert_true(isinstance(estimator[0].steps[-1][1].random_state,
                           int))

开发者ID:MechCoder，项目名称:scikit-learn，代码行数:7，代码来源:test_bagging.py

示例13: train_classifiers

def train_classifiers(data):
    train_vars = [
        'X', 'Y',
        'Darkness',
        'Moon',
        'Hour',
        'DayOfWeekInt',
        'Day',
        'Month',
        'Year',
        'PdDistrictInt',
        'TemperatureC',
        'Precipitationmm',
        'InPdDistrict',
        'Conditions',
        'AddressCode',
    ]
    weather_mapping = {
        'Light Drizzle': 1,
        'Drizzle': 2,
        'Light Rain': 3,
        'Rain': 4,
        'Heavy Rain': 5,
        'Thunderstorm': 6,
    }
    data.Precipitationmm = data.Precipitationmm.fillna(-1)
    data.Conditions = data.Conditions.map(weather_mapping).fillna(0)

    train, test = split(data)
    X_train = train[train_vars]
    y_train = train.CategoryInt
    X_test = test[train_vars]
    y_test = test.CategoryInt

    bdt_real_2 = AdaBoostClassifier(
        DecisionTreeClassifier(max_depth=8),
        n_estimators=10,
        learning_rate=1
    )

    #bdt_real = DecisionTreeClassifier(max_depth=None, min_samples_split=1,
                                      #random_state=6065)

    bdt_real = BaggingClassifier(base_estimator=bdt_real_2,
                                random_state=6065,
                                n_estimators=100)

    #bdt_real = RandomForestClassifier(random_state=6065,
                                      #n_estimators=200)

    #bdt_real = ExtraTreesClassifier(random_state=6065,
                                    #min_samples_split=5,
                                    #n_estimators=200)

    bdt_real.fit(X_train, y_train)
    y_predict = pandas.Series(bdt_real.predict(X_test))
    print len(y_predict[y_predict == y_test])
    print len(y_predict)
    return bdt_real

开发者ID:scphall，项目名称:samandbobbs，代码行数:59，代码来源:train.py

示例14: create_estimators

 def create_estimators(self, X_train, y_train, X_test):
     for model in self.models:
         param_grid = self.create_parameter_grid(model)
         for parameters in param_grid:
             clf = BaggingClassifier(base_estimator=model.set_params(**parameters), n_estimators=self.estimators, max_samples=0.95, n_jobs = 3)
             clf.fit(X_train, y_train)
             prediction = clf.predict_proba(X_test)[:,1]
             self.predictions.append(prediction)

开发者ID:pkravik，项目名称:kaggle，代码行数:8，代码来源:main.py

示例15: classification

    def classification(self, x_train, y_train):
        ml = BaggingClassifier(DecisionTreeClassifier())
        ml.fit(x_train, y_train)
#         print y_train[0]
#         print x_train[0]
        y_pred = ml.predict(x_train)
        print 'y_train ',y_train
        print 'y_pred ',y_pred.tolist()

开发者ID:chaluemwut，项目名称:fbfilterCore，代码行数:8，代码来源:NLPTask.py

注：本文中的sklearn.ensemble.BaggingClassifier类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。