本文整理汇总了Python中sklearn.ensemble.BaggingClassifier.fit方法的典型用法代码示例。如果您正苦于以下问题:Python BaggingClassifier.fit方法的具体用法?Python BaggingClassifier.fit怎么用?Python BaggingClassifier.fit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.BaggingClassifier
的用法示例。
在下文中一共展示了BaggingClassifier.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_warm_start_equal_n_estimators
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_warm_start_equal_n_estimators():
# Test that nothing happens when fitting without increasing n_estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# modify X to nonsense values, this should not change anything
X_train += 1.
assert_warns_message(UserWarning,
"Warm-start fitting without increasing n_estimators does not",
clf.fit, X_train, y_train)
assert_array_equal(y_pred, clf.predict(X_test))
示例2: query_by_bagging
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def query_by_bagging(X, y, current_model, batch_size, rng, base_model=SVC(C=1, kernel='linear'), n_bags=5, method="KL", D=None):
"""
:param base_model: Model that will be **fitted every iteration**
:param n_bags: Number of bags on which train n_bags models
:param method: 'entropy' or 'KL'
:return:
"""
assert method == 'entropy' or method == 'KL'
eps = 0.0000001
if method == 'KL':
assert hasattr(base_model, 'predict_proba'), "Model with probability prediction needs to be passed to this strategy!"
clfs = BaggingClassifier(base_model, n_estimators=n_bags, random_state=rng)
clfs.fit(X[y.known], y[y.known])
pc = clfs.predict_proba(X[np.invert(y.known)])
# Settles page 17
if method == 'entropy':
pc += eps
fitness = np.sum(pc * np.log(pc), axis=1)
ids = np.argsort(fitness)[:batch_size]
elif method == 'KL':
p = np.array([clf.predict_proba(X[np.invert(y.known)]) for clf in clfs.estimators_])
fitness = np.mean(np.sum(p * np.log(p / pc), axis=2), axis=0)
ids = np.argsort(fitness)[-batch_size:]
return y.unknown_ids[ids], fitness/np.max(fitness)
示例3: ADABoost
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
class ADABoost(Base):
def train(self, data = None, plugin=None):
""" With dataframe train mllib """
super(ADABoost, self).train(data, plugin)
#cl = svm.SVC(gamma=0.001, C= 100, kernel='linear', probability=True)
X = self.X_train.iloc[:,:-1]
Y = self.X_train.iloc[:,-1]
self.scaler = StandardScaler().fit(X)
X = self.scaler.transform(X)
cl = SGDClassifier(loss='hinge')
p = Pipeline([("Scaler", self.scaler), ("svm", cl)])
self.clf = BaggingClassifier(p, n_estimators=50)
#self.clf = AdaBoostClassifier(p, n_estimators=10)
#self.clf = AdaBoostClassifier(SGDClassifier(loss='hinge'),algorithm='SAMME', n_estimators=10)
self.clf.fit(X, Y)
def predict(self, file, plugin=None):
super(ADABoost, self).predict(file, plugin)
data = file.vector
X = data[plugin]
X = self.scaler.transform(X)
guess = self.clf.predict(X)
return self.getTag(guess)
示例4: train_dts
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def train_dts(observations,targets,method='bagging'):
"""Trains a decision tree for each output
:param observations: our train dataset
:param targets: multiple target variables.
:param method: bagging,random_forest,boosting
:return: the dt models in a list, one for each target variable
"""
n_targets = len(targets[0])
tars = np.array(targets)
dts = []
for i in range(n_targets):
act_tar = tars[:,i].tolist()
dt = None
if method == 'bagging': dt = BaggingClassifier(tree.DecisionTreeClassifier(),n_estimators=100,max_samples=0.5, max_features=1.)
elif method == 'random_forest': dt = RandomForestClassifier(n_estimators=100)
elif method == 'boosting': dt = AdaBoostClassifier(n_estimators=100)
else: dt = tree.DecisionTreeClassifier()
# the dt cannot be trained if the outputs are all equal. In that case, we create a fake dt
if len(set(act_tar)) > 1:
# We want to have a balanced data set while training.
bal_observations, bal_tar = sample_balanced_dataset(observations,act_tar) #from data_manipulation
dt.fit(bal_observations,bal_tar)
else:
dt = FakeClassifier(act_tar[0])
dts.append(dt)
return dts
示例5: bagging
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def bagging(X_train, X_test, y_train, y_test,n_est):
n_est=51
estimators=range(1,n_est)
decision_clf = DecisionTreeClassifier()
for est in estimators:
bagging_clf = BaggingClassifier(decision_clf, n_estimators=est, max_samples=0.67,max_features=0.67,
bootstrap=True, random_state=9)
bagging_clf.fit(X_train, y_train)
# test line
y_pred_bagging1 = bagging_clf.predict(X_test)
score_bc_dt1 = accuracy_score(y_test, y_pred_bagging1)
scores1.append(score_bc_dt1)
# train line
y_pred_bagging2 = bagging_clf.predict(X_train)
score_bc_dt2 = accuracy_score(y_train, y_pred_bagging2)
scores2.append(score_bc_dt2)
plt.figure(figsize=(10, 6))
plt.title('Bagging Info')
plt.xlabel('Estimators')
plt.ylabel('Scores')
plt.plot(estimators,scores1,'g',label='test line', linewidth=3)
plt.plot(estimators,scores2,'c',label='train line', linewidth=3)
plt.legend()
plt.show()
示例6: test_base
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_base():
# Check BaseEnsemble methods.
ensemble = BaggingClassifier(
base_estimator=Perceptron(tol=1e-3, random_state=None), n_estimators=3)
iris = load_iris()
ensemble.fit(iris.data, iris.target)
ensemble.estimators_ = [] # empty the list and create estimators manually
ensemble._make_estimator()
random_state = np.random.RandomState(3)
ensemble._make_estimator(random_state=random_state)
ensemble._make_estimator(random_state=random_state)
ensemble._make_estimator(append=False)
assert_equal(3, len(ensemble))
assert_equal(3, len(ensemble.estimators_))
assert_true(isinstance(ensemble[0], Perceptron))
assert_equal(ensemble[0].random_state, None)
assert_true(isinstance(ensemble[1].random_state, int))
assert_true(isinstance(ensemble[2].random_state, int))
assert_not_equal(ensemble[1].random_state, ensemble[2].random_state)
np_int_ensemble = BaggingClassifier(base_estimator=Perceptron(tol=1e-3),
n_estimators=np.int32(3))
np_int_ensemble.fit(iris.data, iris.target)
示例7: BaggingSK
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
class BaggingSK(PoolGenerator):
'''
This class should not be used, use brew.generation.bagging.Bagging instead.
'''
def __init__(self, base_classifier=None, n_classifiers=100, combination_rule='majority_vote'):
self.base_classifier = base_classifier
self.n_classifiers = n_classifiers
# using the sklearn implementation of bagging for now
self.sk_bagging = BaggingClassifier(base_estimator=base_classifier,
n_estimators=n_classifiers, max_samples=1.0, max_features=1.0)
self.ensemble = Ensemble()
self.combiner = Combiner(rule=combination_rule)
def fit(self, X, y):
self.sk_bagging.fit(X, y)
self.ensemble.add_classifiers(self.sk_bagging.estimators_)
#self.classes_ = set(y)
def predict(self, X):
out = self.ensemble.output(X)
return self.combiner.combine(out)
示例8: test_bagging_with_pipeline
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_bagging_with_pipeline():
estimator = BaggingClassifier(make_pipeline(SelectKBest(k=1),
DecisionTreeClassifier()),
max_features=2)
estimator.fit(iris.data, iris.target)
assert_true(isinstance(estimator[0].steps[-1][1].random_state,
int))
示例9: test_estimators_samples
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_estimators_samples():
# Check that format of estimators_samples_ is correct and that results
# generated at fit time can be identically reproduced at a later time
# using data saved in object attributes.
X, y = make_hastie_10_2(n_samples=200, random_state=1)
bagging = BaggingClassifier(LogisticRegression(), max_samples=0.5,
max_features=0.5, random_state=1,
bootstrap=False)
bagging.fit(X, y)
# Get relevant attributes
estimators_samples = bagging.estimators_samples_
estimators_features = bagging.estimators_features_
estimators = bagging.estimators_
# Test for correct formatting
assert_equal(len(estimators_samples), len(estimators))
assert_equal(len(estimators_samples[0]), len(X) // 2)
assert_equal(estimators_samples[0].dtype.kind, 'i')
# Re-fit single estimator to test for consistent sampling
estimator_index = 0
estimator_samples = estimators_samples[estimator_index]
estimator_features = estimators_features[estimator_index]
estimator = estimators[estimator_index]
X_train = (X[estimator_samples])[:, estimator_features]
y_train = y[estimator_samples]
orig_coefs = estimator.coef_
estimator.fit(X_train, y_train)
new_coefs = estimator.coef_
assert_array_almost_equal(orig_coefs, new_coefs)
示例10: test_bagging_sample_weight_unsupported_but_passed
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_bagging_sample_weight_unsupported_but_passed():
estimator = BaggingClassifier(DummyZeroEstimator())
rng = check_random_state(0)
estimator.fit(iris.data, iris.target).predict(iris.data)
assert_raises(ValueError, estimator.fit, iris.data, iris.target,
sample_weight=rng.randint(10, size=(iris.data.shape[0])))
示例11: test_warm_start_smaller_n_estimators
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_warm_start_smaller_n_estimators():
# Test if warm start'ed second fit with smaller n_estimators raises error.
X, y = make_hastie_10_2(n_samples=20, random_state=1)
clf = BaggingClassifier(n_estimators=5, warm_start=True)
clf.fit(X, y)
clf.set_params(n_estimators=4)
assert_raises(ValueError, clf.fit, X, y)
示例12: test_estimators_samples_deterministic
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_estimators_samples_deterministic():
# This test is a regression test to check that with a random step
# (e.g. SparseRandomProjection) and a given random state, the results
# generated at fit time can be identically reproduced at a later time using
# data saved in object attributes. Check issue #9524 for full discussion.
iris = load_iris()
X, y = iris.data, iris.target
base_pipeline = make_pipeline(SparseRandomProjection(n_components=2),
LogisticRegression())
clf = BaggingClassifier(base_estimator=base_pipeline,
max_samples=0.5,
random_state=0)
clf.fit(X, y)
pipeline_estimator_coef = clf.estimators_[0].steps[-1][1].coef_.copy()
estimator = clf.estimators_[0]
estimator_sample = clf.estimators_samples_[0]
estimator_feature = clf.estimators_features_[0]
X_train = (X[estimator_sample])[:, estimator_feature]
y_train = y[estimator_sample]
estimator.fit(X_train, y_train)
assert_array_equal(estimator.steps[-1][1].coef_, pipeline_estimator_coef)
示例13: test_bagging_classifier_with_missing_inputs
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
示例14: baggedDecisionTree
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def baggedDecisionTree( X_train, y_train, X_test, y_test, nEstimators ):
print("\n### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###")
print("baggedDecisionTree()\n")
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree = BaggingClassifier(
base_estimator = DecisionTreeClassifier(),
n_estimators = nEstimators,
# max_samples = X_train.shape[0],
bootstrap = True,
oob_score = True,
n_jobs = -1 # use all available cores
)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree.fit(X_train,y_train)
y_pred = myBaggedDecisionTree.predict(X_test)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
print( "nEstimators: " + str(nEstimators) )
print( "out-of-bag score: " + str(myBaggedDecisionTree.oob_score_) )
print( "accuracy score: " + str(accuracy_score(y_test,y_pred)) )
print( "out-of-bag decision function:" )
print( str(myBaggedDecisionTree.oob_decision_function_) )
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
return( None )
示例15: train_classifiers
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import fit [as 别名]
def train_classifiers(data):
train_vars = [
'X', 'Y',
'Darkness',
'Moon',
'Hour',
'DayOfWeekInt',
'Day',
'Month',
'Year',
'PdDistrictInt',
'TemperatureC',
'Precipitationmm',
'InPdDistrict',
'Conditions',
'AddressCode',
]
weather_mapping = {
'Light Drizzle': 1,
'Drizzle': 2,
'Light Rain': 3,
'Rain': 4,
'Heavy Rain': 5,
'Thunderstorm': 6,
}
data.Precipitationmm = data.Precipitationmm.fillna(-1)
data.Conditions = data.Conditions.map(weather_mapping).fillna(0)
train, test = split(data)
X_train = train[train_vars]
y_train = train.CategoryInt
X_test = test[train_vars]
y_test = test.CategoryInt
bdt_real_2 = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=8),
n_estimators=10,
learning_rate=1
)
#bdt_real = DecisionTreeClassifier(max_depth=None, min_samples_split=1,
#random_state=6065)
bdt_real = BaggingClassifier(base_estimator=bdt_real_2,
random_state=6065,
n_estimators=100)
#bdt_real = RandomForestClassifier(random_state=6065,
#n_estimators=200)
#bdt_real = ExtraTreesClassifier(random_state=6065,
#min_samples_split=5,
#n_estimators=200)
bdt_real.fit(X_train, y_train)
y_predict = pandas.Series(bdt_real.predict(X_test))
print len(y_predict[y_predict == y_test])
print len(y_predict)
return bdt_real