本文整理汇总了Python中sklearn.ensemble.BaggingClassifier类的典型用法代码示例。如果您正苦于以下问题:Python BaggingClassifier类的具体用法?Python BaggingClassifier怎么用?Python BaggingClassifier使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BaggingClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_warm_start_equal_n_estimators
def test_warm_start_equal_n_estimators():
# Test that nothing happens when fitting without increasing n_estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# modify X to nonsense values, this should not change anything
X_train += 1.
assert_warns_message(UserWarning,
"Warm-start fitting without increasing n_estimators does not",
clf.fit, X_train, y_train)
assert_array_equal(y_pred, clf.predict(X_test))
示例2: test_estimators_samples
def test_estimators_samples():
# Check that format of estimators_samples_ is correct and that results
# generated at fit time can be identically reproduced at a later time
# using data saved in object attributes.
X, y = make_hastie_10_2(n_samples=200, random_state=1)
bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
max_features=0.5, random_state=1,
bootstrap=False)
bagging.fit(X, y)
# Get relevant attributes
estimators_samples = bagging.estimators_samples_
estimators_features = bagging.estimators_features_
estimators = bagging.estimators_
# Test for correct formatting
assert_equal(len(estimators_samples), len(estimators))
assert_equal(len(estimators_samples[0]), len(X) // 2)
assert_equal(estimators_samples[0].dtype.kind, 'i')
# Re-fit single estimator to test for consistent sampling
estimator_index = 0
estimator_samples = estimators_samples[estimator_index]
estimator_features = estimators_features[estimator_index]
estimator = estimators[estimator_index]
X_train = (X[estimator_samples])[:, estimator_features]
y_train = y[estimator_samples]
orig_coefs = estimator.coef_
estimator.fit(X_train, y_train)
new_coefs = estimator.coef_
assert_array_almost_equal(orig_coefs, new_coefs)
示例3: query_by_bagging
def query_by_bagging(X, y, current_model, batch_size, rng, base_model=SVC(C=1, kernel='linear'), n_bags=5, method="KL", D=None):
"""
:param base_model: Model that will be **fitted every iteration**
:param n_bags: Number of bags on which train n_bags models
:param method: 'entropy' or 'KL'
:return:
"""
assert method == 'entropy' or method == 'KL'
eps = 0.0000001
if method == 'KL':
assert hasattr(base_model, 'predict_proba'), "Model with probability prediction needs to be passed to this strategy!"
clfs = BaggingClassifier(base_model, n_estimators=n_bags, random_state=rng)
clfs.fit(X[y.known], y[y.known])
pc = clfs.predict_proba(X[np.invert(y.known)])
# Settles page 17
if method == 'entropy':
pc += eps
fitness = np.sum(pc * np.log(pc), axis=1)
ids = np.argsort(fitness)[:batch_size]
elif method == 'KL':
p = np.array([clf.predict_proba(X[np.invert(y.known)]) for clf in clfs.estimators_])
fitness = np.mean(np.sum(p * np.log(p / pc), axis=2), axis=0)
ids = np.argsort(fitness)[-batch_size:]
return y.unknown_ids[ids], fitness/np.max(fitness)
示例4: test_estimators_samples_deterministic
def test_estimators_samples_deterministic():
# This test is a regression test to check that with a random step
# (e.g. SparseRandomProjection) and a given random state, the results
# generated at fit time can be identically reproduced at a later time using
# data saved in object attributes. Check issue #9524 for full discussion.
iris = load_iris()
X, y = iris.data, iris.target
base_pipeline = make_pipeline(SparseRandomProjection(n_components=2),
LogisticRegression())
clf = BaggingClassifier(base_estimator=base_pipeline,
max_samples=0.5,
random_state=0)
clf.fit(X, y)
pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy()
estimator = clf.estimators_[0]
estimator_sample = clf.estimators_samples_[0]
estimator_feature = clf.estimators_features_[0]
X_train = (X[estimator_sample])[:, estimator_feature]
y_train = y[estimator_sample]
estimator.fit(X_train, y_train)
assert_array_equal(estimator.steps[-1][1].coef_, pipeline_estimator_coef)
示例5: ADABoost
class ADABoost(Base):
def train(self, data = None, plugin=None):
""" With dataframe train mllib """
super(ADABoost, self).train(data, plugin)
#cl = svm.SVC(gamma=0.001, C= 100, kernel='linear', probability=True)
X = self.X_train.iloc[:,:-1]
Y = self.X_train.iloc[:,-1]
self.scaler = StandardScaler().fit(X)
X = self.scaler.transform(X)
cl = SGDClassifier(loss='hinge')
p = Pipeline([("Scaler", self.scaler), ("svm", cl)])
self.clf = BaggingClassifier(p, n_estimators=50)
#self.clf = AdaBoostClassifier(p, n_estimators=10)
#self.clf = AdaBoostClassifier(SGDClassifier(loss='hinge'),algorithm='SAMME', n_estimators=10)
self.clf.fit(X, Y)
def predict(self, file, plugin=None):
super(ADABoost, self).predict(file, plugin)
data = file.vector
X = data[plugin]
X = self.scaler.transform(X)
guess = self.clf.predict(X)
return self.getTag(guess)
示例6: baggedDecisionTree
def baggedDecisionTree( X_train, y_train, X_test, y_test, nEstimators ):
print("\n### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###")
print("baggedDecisionTree()\n")
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree = BaggingClassifier(
base_estimator = DecisionTreeClassifier(),
n_estimators = nEstimators,
# max_samples = X_train.shape[0],
bootstrap = True,
oob_score = True,
n_jobs = -1 # use all available cores
)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree.fit(X_train,y_train)
y_pred = myBaggedDecisionTree.predict(X_test)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
print( "nEstimators: " + str(nEstimators) )
print( "out-of-bag score: " + str(myBaggedDecisionTree.oob_score_) )
print( "accuracy score: " + str(accuracy_score(y_test,y_pred)) )
print( "out-of-bag decision function:" )
print( str(myBaggedDecisionTree.oob_decision_function_) )
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
return( None )
示例7: bagging
def bagging(X_train, X_test, y_train, y_test,n_est):
n_est=51
estimators=range(1,n_est)
decision_clf = DecisionTreeClassifier()
for est in estimators:
bagging_clf = BaggingClassifier(decision_clf, n_estimators=est, max_samples=0.67,max_features=0.67,
bootstrap=True, random_state=9)
bagging_clf.fit(X_train, y_train)
# test line
y_pred_bagging1 = bagging_clf.predict(X_test)
score_bc_dt1 = accuracy_score(y_test, y_pred_bagging1)
scores1.append(score_bc_dt1)
# train line
y_pred_bagging2 = bagging_clf.predict(X_train)
score_bc_dt2 = accuracy_score(y_train, y_pred_bagging2)
scores2.append(score_bc_dt2)
plt.figure(figsize=(10, 6))
plt.title('Bagging Info')
plt.xlabel('Estimators')
plt.ylabel('Scores')
plt.plot(estimators,scores1,'g',label='test line', linewidth=3)
plt.plot(estimators,scores2,'c',label='train line', linewidth=3)
plt.legend()
plt.show()
示例8: BaggingSK
class BaggingSK(PoolGenerator):
'''
This class should not be used, use brew.generation.bagging.Bagging instead.
'''
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):
self.base_classifier = base_classifier
self.n_classifiers = n_classifiers
# using the sklearn implementation of bagging for now
self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
n_estimators=n_classifiers, max_samples=1.0, max_features=1.0)
self.ensemble = Ensemble()
self.combiner = Combiner(rule=combination_rule)
def fit(self, X, y):
self.sk_bagging.fit(X, y)
self.ensemble.add_classifiers(self.sk_bagging.estimators_)
#self.classes_ = set(y)
def predict(self, X):
out = self.ensemble.output(X)
return self.combiner.combine(out)
示例9: test_oob_score_classification
def test_oob_score_classification():
# Check that oob prediction is a good estimation of the generalization
# error.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
for base_estimator in [DecisionTreeClassifier(), SVC()]:
clf = BaggingClassifier(base_estimator=base_estimator,
n_estimators=100,
bootstrap=True,
oob_score=True,
random_state=rng).fit(X_train, y_train)
test_score = clf.score(X_test, y_test)
assert_less(abs(test_score - clf.oob_score_), 0.1)
# Test with few estimators
assert_warns(UserWarning,
BaggingClassifier(base_estimator=base_estimator,
n_estimators=1,
bootstrap=True,
oob_score=True,
random_state=rng).fit,
X_train,
y_train)
示例10: test_bagging_sample_weight_unsupported_but_passed
def test_bagging_sample_weight_unsupported_but_passed():
estimator = BaggingClassifier(DummyZeroEstimator())
rng = check_random_state(0)
estimator.fit(iris.data, iris.target).predict(iris.data)
assert_raises(ValueError, estimator.fit, iris.data, iris.target,
sample_weight=rng.randint(10, size=(iris.data.shape[0])))
示例11: test_warm_start_smaller_n_estimators
def test_warm_start_smaller_n_estimators():
# Test if warm start'ed second fit with smaller n_estimators raises error.
X, y = make_hastie_10_2(n_samples=20, random_state=1)
clf = BaggingClassifier(n_estimators=5, warm_start=True)
clf.fit(X, y)
clf.set_params(n_estimators=4)
assert_raises(ValueError, clf.fit, X, y)
示例12: test_bagging_with_pipeline
def test_bagging_with_pipeline():
estimator = BaggingClassifier(make_pipeline(SelectKBest(k=1),
DecisionTreeClassifier()),
max_features=2)
estimator.fit(iris.data, iris.target)
assert_true(isinstance(estimator[0].steps[-1][1].random_state,
int))
示例13: train_classifiers
def train_classifiers(data):
train_vars = [
'X', 'Y',
'Darkness',
'Moon',
'Hour',
'DayOfWeekInt',
'Day',
'Month',
'Year',
'PdDistrictInt',
'TemperatureC',
'Precipitationmm',
'InPdDistrict',
'Conditions',
'AddressCode',
]
weather_mapping = {
'Light Drizzle': 1,
'Drizzle': 2,
'Light Rain': 3,
'Rain': 4,
'Heavy Rain': 5,
'Thunderstorm': 6,
}
data.Precipitationmm = data.Precipitationmm.fillna(-1)
data.Conditions = data.Conditions.map(weather_mapping).fillna(0)
train, test = split(data)
X_train = train[train_vars]
y_train = train.CategoryInt
X_test = test[train_vars]
y_test = test.CategoryInt
bdt_real_2 = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=8),
n_estimators=10,
learning_rate=1
)
#bdt_real = DecisionTreeClassifier(max_depth=None, min_samples_split=1,
#random_state=6065)
bdt_real = BaggingClassifier(base_estimator=bdt_real_2,
random_state=6065,
n_estimators=100)
#bdt_real = RandomForestClassifier(random_state=6065,
#n_estimators=200)
#bdt_real = ExtraTreesClassifier(random_state=6065,
#min_samples_split=5,
#n_estimators=200)
bdt_real.fit(X_train, y_train)
y_predict = pandas.Series(bdt_real.predict(X_test))
print len(y_predict[y_predict == y_test])
print len(y_predict)
return bdt_real
示例14: create_estimators
def create_estimators(self, X_train, y_train, X_test):
for model in self.models:
param_grid = self.create_parameter_grid(model)
for parameters in param_grid:
clf = BaggingClassifier(base_estimator=model.set_params(**parameters), n_estimators=self.estimators, max_samples=0.95, n_jobs = 3)
clf.fit(X_train, y_train)
prediction = clf.predict_proba(X_test)[:,1]
self.predictions.append(prediction)
示例15: classification
def classification(self, x_train, y_train):
ml = BaggingClassifier(DecisionTreeClassifier())
ml.fit(x_train, y_train)
# print y_train[0]
# print x_train[0]
y_pred = ml.predict(x_train)
print 'y_train ',y_train
print 'y_pred ',y_pred.tolist()