本文整理汇总了Python中sklearn.ensemble.BaggingClassifier.predict方法的典型用法代码示例。如果您正苦于以下问题:Python BaggingClassifier.predict方法的具体用法?Python BaggingClassifier.predict怎么用?Python BaggingClassifier.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.BaggingClassifier
的用法示例。
在下文中一共展示了BaggingClassifier.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_warm_start_equal_n_estimators
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def test_warm_start_equal_n_estimators():
# Test that nothing happens when fitting without increasing n_estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf = BaggingClassifier(n_estimators=5, warm_start=True, random_state=83)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# modify X to nonsense values, this should not change anything
X_train += 1.
assert_warns_message(UserWarning,
"Warm-start fitting without increasing n_estimators does not",
clf.fit, X_train, y_train)
assert_array_equal(y_pred, clf.predict(X_test))
示例2: bagging
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def bagging(X_train, X_test, y_train, y_test,n_est):
n_est=51
estimators=range(1,n_est)
decision_clf = DecisionTreeClassifier()
for est in estimators:
bagging_clf = BaggingClassifier(decision_clf, n_estimators=est, max_samples=0.67,max_features=0.67,
bootstrap=True, random_state=9)
bagging_clf.fit(X_train, y_train)
# test line
y_pred_bagging1 = bagging_clf.predict(X_test)
score_bc_dt1 = accuracy_score(y_test, y_pred_bagging1)
scores1.append(score_bc_dt1)
# train line
y_pred_bagging2 = bagging_clf.predict(X_train)
score_bc_dt2 = accuracy_score(y_train, y_pred_bagging2)
scores2.append(score_bc_dt2)
plt.figure(figsize=(10, 6))
plt.title('Bagging Info')
plt.xlabel('Estimators')
plt.ylabel('Scores')
plt.plot(estimators,scores1,'g',label='test line', linewidth=3)
plt.plot(estimators,scores2,'c',label='train line', linewidth=3)
plt.legend()
plt.show()
示例3: main
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def main():
'''main function'''
bagging = BaggingClassifier(DecisionTreeClassifier())
iris = load_iris()
x = iris.data
y = iris.target
#train, test, train_, test_ = train_test_split(x, y, test_size=0.2, random_state=42)
bagging.fit(x, y)
bagging.predict(x[:2])
print(bagging.score(x[:2], y[:2]))
示例4: bagging_with_base_estimator
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def bagging_with_base_estimator(base_estimator, x_train, x_test, y_train,
y_test, rands = None):
"""
Predict the lemons using a Bagging Classifier and a random seed
both for the number of features, as well as for the size of the
sample to train the data on
ARGS:
- x_train: :class:`pandas.DataFrame` of the x_training data
- y_train: :class:`pandas.Series` of the y_training data
- x_test: :class:`pandas.DataFrame` of the x_testing data
- y_test: :class:`pandas.Series` of the y_testing data
- rands: a :class:`tuple` of the (rs, rf) to seed the sample
and features of the BaggingClassifier. If `None`, then
rands are generated and provided in the return `Series`
RETURNS:
:class:`pandas.Series` of the f1-scores and random seeds
"""
#create a dictionary for the return values
ret_d = {'train-f1':[], 'test-f1':[], 'rs':[], 'rf':[]}
#use the randoms provided if there are any, otherwise generate them
if not rands:
rs = numpy.random.rand()
rf = numpy.random.rand()
while rf < 0.1:
rf = numpy.random.rand()
else:
rs, rf = rands[0], rands[1]
#place them into the dictionary
ret_d['rs'], ret_d['rf'] = rs, rf
#create and run the bagging classifier
bc = BaggingClassifier(base_estimator = base_estimator, n_estimators = 300,
max_samples = rs, max_features = rf, n_jobs = 1)
bc.fit(x_train, y_train)
y_hat_train = bc.predict(x_train)
ret_d['train-f1'] = f1_score(y_train, y_hat_train)
y_hat_test = bc.predict(x_test)
ret_d['test-f1'] = f1_score(y_test, y_hat_test)
return pandas.Series(ret_d)
示例5: baggedDecisionTree
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def baggedDecisionTree( X_train, y_train, X_test, y_test, nEstimators ):
print("\n### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###")
print("baggedDecisionTree()\n")
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree = BaggingClassifier(
base_estimator = DecisionTreeClassifier(),
n_estimators = nEstimators,
# max_samples = X_train.shape[0],
bootstrap = True,
oob_score = True,
n_jobs = -1 # use all available cores
)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
myBaggedDecisionTree.fit(X_train,y_train)
y_pred = myBaggedDecisionTree.predict(X_test)
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
print( "nEstimators: " + str(nEstimators) )
print( "out-of-bag score: " + str(myBaggedDecisionTree.oob_score_) )
print( "accuracy score: " + str(accuracy_score(y_test,y_pred)) )
print( "out-of-bag decision function:" )
print( str(myBaggedDecisionTree.oob_decision_function_) )
### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###
return( None )
示例6: ADABoost
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
class ADABoost(Base):
def train(self, data = None, plugin=None):
""" With dataframe train mllib """
super(ADABoost, self).train(data, plugin)
#cl = svm.SVC(gamma=0.001, C= 100, kernel='linear', probability=True)
X = self.X_train.iloc[:,:-1]
Y = self.X_train.iloc[:,-1]
self.scaler = StandardScaler().fit(X)
X = self.scaler.transform(X)
cl = SGDClassifier(loss='hinge')
p = Pipeline([("Scaler", self.scaler), ("svm", cl)])
self.clf = BaggingClassifier(p, n_estimators=50)
#self.clf = AdaBoostClassifier(p, n_estimators=10)
#self.clf = AdaBoostClassifier(SGDClassifier(loss='hinge'),algorithm='SAMME', n_estimators=10)
self.clf.fit(X, Y)
def predict(self, file, plugin=None):
super(ADABoost, self).predict(file, plugin)
data = file.vector
X = data[plugin]
X = self.scaler.transform(X)
guess = self.clf.predict(X)
return self.getTag(guess)
示例7: test_bagging_classifier_with_missing_inputs
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
示例8: train_classifiers
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def train_classifiers(data):
train_vars = [
'X', 'Y',
'Darkness',
'Moon',
'Hour',
'DayOfWeekInt',
'Day',
'Month',
'Year',
'PdDistrictInt',
'TemperatureC',
'Precipitationmm',
'InPdDistrict',
'Conditions',
'AddressCode',
]
weather_mapping = {
'Light Drizzle': 1,
'Drizzle': 2,
'Light Rain': 3,
'Rain': 4,
'Heavy Rain': 5,
'Thunderstorm': 6,
}
data.Precipitationmm = data.Precipitationmm.fillna(-1)
data.Conditions = data.Conditions.map(weather_mapping).fillna(0)
train, test = split(data)
X_train = train[train_vars]
y_train = train.CategoryInt
X_test = test[train_vars]
y_test = test.CategoryInt
bdt_real_2 = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=8),
n_estimators=10,
learning_rate=1
)
#bdt_real = DecisionTreeClassifier(max_depth=None, min_samples_split=1,
#random_state=6065)
bdt_real = BaggingClassifier(base_estimator=bdt_real_2,
random_state=6065,
n_estimators=100)
#bdt_real = RandomForestClassifier(random_state=6065,
#n_estimators=200)
#bdt_real = ExtraTreesClassifier(random_state=6065,
#min_samples_split=5,
#n_estimators=200)
bdt_real.fit(X_train, y_train)
y_predict = pandas.Series(bdt_real.predict(X_test))
print len(y_predict[y_predict == y_test])
print len(y_predict)
return bdt_real
示例9: classification
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def classification(self, x_train, y_train):
ml = BaggingClassifier(DecisionTreeClassifier())
ml.fit(x_train, y_train)
# print y_train[0]
# print x_train[0]
y_pred = ml.predict(x_train)
print 'y_train ',y_train
print 'y_pred ',y_pred.tolist()
示例10: test_warm_start_equivalence
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def test_warm_start_equivalence():
# warm started classifier with 5+5 estimators should be equivalent to
# one classifier with 10 estimators
X, y = make_hastie_10_2(n_samples=20, random_state=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=43)
clf_ws = BaggingClassifier(n_estimators=5, warm_start=True, random_state=3141)
clf_ws.fit(X_train, y_train)
clf_ws.set_params(n_estimators=10)
clf_ws.fit(X_train, y_train)
y1 = clf_ws.predict(X_test)
clf = BaggingClassifier(n_estimators=10, warm_start=False, random_state=3141)
clf.fit(X_train, y_train)
y2 = clf.predict(X_test)
assert_array_almost_equal(y1, y2)
示例11: test_sparse_classification
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def test_sparse_classification():
# Check classification for various parameter settings on sparse input.
class CustomSVC(SVC):
"""SVC variant that records the nature of the training set"""
def fit(self, X, y):
super(CustomSVC, self).fit(X, y)
self.data_type_ = type(X)
return self
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
parameter_sets = [
{"max_samples": 0.5,
"max_features": 2,
"bootstrap": True,
"bootstrap_features": True},
{"max_samples": 1.0,
"max_features": 4,
"bootstrap": True,
"bootstrap_features": True},
{"max_features": 2,
"bootstrap": False,
"bootstrap_features": True},
{"max_samples": 0.5,
"bootstrap": True,
"bootstrap_features": False},
]
for sparse_format in [csc_matrix, csr_matrix]:
X_train_sparse = sparse_format(X_train)
X_test_sparse = sparse_format(X_test)
for params in parameter_sets:
# Trained on sparse format
sparse_classifier = BaggingClassifier(
base_estimator=CustomSVC(),
random_state=1,
**params
).fit(X_train_sparse, y_train)
sparse_results = sparse_classifier.predict(X_test_sparse)
# Trained on dense format
dense_results = BaggingClassifier(
base_estimator=CustomSVC(),
random_state=1,
**params
).fit(X_train, y_train).predict(X_test)
sparse_type = type(X_train_sparse)
types = [i.data_type_ for i in sparse_classifier.estimators_]
assert_array_equal(sparse_results, dense_results)
assert all([t == sparse_type for t in types])
示例12: adaboost_train
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
def adaboost_train(train_file,test_file):
_,x,y = readFile(train_file)
print 'reading done.'
ts = x.shape[0]
id,x2 = readFile(test_file)
print x.shape
print x2.shape
x = np.concatenate((x,x2))
print 'concatenate done.'
from sklearn.preprocessing import scale
x = scale(x,with_mean=False)
print 'scale done.'
x2 = x[ts:]
x=x[0:ts]
from sklearn.feature_selection import SelectKBest,chi2
x = SelectKBest(chi2,k=50000).fit_transform(x,y)
from sklearn.cross_validation import train_test_split
tmp_array = np.arange(x.shape[0])
train_i, test_i = train_test_split(tmp_array, train_size = 0.8, random_state = 500)
train_x = x[train_i]
test_x = x[test_i]
train_y = y[train_i]
test_y = y[test_i]
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(LR(penalty='l2',dual=True),n_estimators = 10,max_samples=0.6,max_features=0.6)
bagging.fit(train_x,train_y)
print 'train done.'
res = bagging.predict(train_x)
print res
from sklearn.metrics import roc_auc_score
score = roc_auc_score(train_y,res)
res = bagging.predict_proba(train_x)
print res
score = roc_auc_score(train_y,res[:,1])
print score
print '-----------------------------------------'
print res[:,1]
res = bagging.predict_proba(test_x)
score = roc_auc_score(test_y,res[:,1])
print score
y=bagging.predict_proba(x2)
output = pd.DataFrame( data={"id":id, "sentiment":y[:,1]} )
output.to_csv( "/home/chuangxin/Bagging_result.csv", index=False, quoting=3 )
return bagging
示例13: BaggingLearner
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
class BaggingLearner(AbstractLearner):
def __init__(self):
self.learner = BaggingClassifier(KNeighborsClassifier())
def _train(self, x_train, y_train):
self.learner = self.learner.fit(x_train, y_train)
def _predict(self, x):
return self.learner.predict(x)
def _predict_proba(self, x):
return self.learner.predict_proba(x)
示例14: BaggingDecisionTrees
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
class BaggingDecisionTrees(object):
def __init__(self, n_estimators):
self.classifier = BaggingClassifier(n_estimators=n_estimators)
def fit(self, xs, ys):
xs = xs.values
ys = ys['y']
self.classifier.fit(xs, ys)
def predict(self, xs):
xs = xs.values
ys = self.classifier.predict(xs)
return ys
示例15: SVMBag
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict [as 别名]
class SVMBag(DMCClassifier):
classifier = None
estimators = 10
max_features = .5
max_samples = .5
def __init__(self, X: csr_matrix, Y: np.array, tune_parameters=False):
super().__init__(X, Y, tune_parameters)
self.X, self.Y = X.toarray(), Y
self.classifier = SVC(decision_function_shape='ovo')
self.clf = BaggingClassifier(self.classifier, n_estimators=self.estimators, n_jobs=8,
max_samples=self.max_samples, max_features=self.max_features)
def predict(self, X: csr_matrix):
X = X.toarray()
return self.clf.predict(X)