本文整理汇总了Python中sklearn.ensemble.BaggingClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python BaggingClassifier.predict_proba方法的具体用法?Python BaggingClassifier.predict_proba怎么用?Python BaggingClassifier.predict_proba使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.BaggingClassifier
的用法示例。
在下文中一共展示了BaggingClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_bagging_classifier_with_missing_inputs
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def test_bagging_classifier_with_missing_inputs():
# Check that BaggingClassifier can accept X with missing/infinite data
X = np.array([
[1, 3, 5],
[2, None, 6],
[2, np.nan, 6],
[2, np.inf, 6],
[2, np.NINF, 6],
])
y = np.array([3, 6, 6, 6, 6])
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(
FunctionTransformer(replace, validate=False),
classifier
)
pipeline.fit(X, y).predict(X)
bagging_classifier = BaggingClassifier(pipeline)
bagging_classifier.fit(X, y)
y_hat = bagging_classifier.predict(X)
assert_equal(y.shape, y_hat.shape)
bagging_classifier.predict_log_proba(X)
bagging_classifier.predict_proba(X)
# Verify that exceptions can be raised by wrapper classifier
classifier = DecisionTreeClassifier()
pipeline = make_pipeline(classifier)
assert_raises(ValueError, pipeline.fit, X, y)
bagging_classifier = BaggingClassifier(pipeline)
assert_raises(ValueError, bagging_classifier.fit, X, y)
示例2: test_parallel_classification
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def test_parallel_classification():
# Check parallel classification.
rng = check_random_state(0)
# Classification
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=rng)
ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=3, random_state=0).fit(X_train, y_train)
# predict_proba
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict_proba(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict_proba(X_test)
assert_array_almost_equal(y1, y2)
ensemble = BaggingClassifier(DecisionTreeClassifier(), n_jobs=1, random_state=0).fit(X_train, y_train)
y3 = ensemble.predict_proba(X_test)
assert_array_almost_equal(y1, y3)
# decision_function
ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=3, random_state=0).fit(X_train, y_train)
ensemble.set_params(n_jobs=1)
decisions1 = ensemble.decision_function(X_test)
ensemble.set_params(n_jobs=2)
decisions2 = ensemble.decision_function(X_test)
assert_array_almost_equal(decisions1, decisions2)
ensemble = BaggingClassifier(SVC(decision_function_shape="ovr"), n_jobs=1, random_state=0).fit(X_train, y_train)
decisions3 = ensemble.decision_function(X_test)
assert_array_almost_equal(decisions1, decisions3)
示例3: test_probability
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def test_probability():
# Predict probabilities.
rng = check_random_state(0)
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
with np.errstate(divide="ignore", invalid="ignore"):
# Normal case
ensemble = BaggingClassifier(base_estimator=DecisionTreeClassifier(),
random_state=rng).fit(X_train, y_train)
assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
axis=1),
np.ones(len(X_test)))
assert_array_almost_equal(ensemble.predict_proba(X_test),
np.exp(ensemble.predict_log_proba(X_test)))
# Degenerate case, where some classes are missing
ensemble = BaggingClassifier(base_estimator=LogisticRegression(),
random_state=rng,
max_samples=5).fit(X_train, y_train)
assert_array_almost_equal(np.sum(ensemble.predict_proba(X_test),
axis=1),
np.ones(len(X_test)))
assert_array_almost_equal(ensemble.predict_proba(X_test),
np.exp(ensemble.predict_log_proba(X_test)))
示例4: adaboost_train
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def adaboost_train(train_file,test_file):
_,x,y = readFile(train_file)
print 'reading done.'
ts = x.shape[0]
id,x2 = readFile(test_file)
print x.shape
print x2.shape
x = np.concatenate((x,x2))
print 'concatenate done.'
from sklearn.preprocessing import scale
x = scale(x,with_mean=False)
print 'scale done.'
x2 = x[ts:]
x=x[0:ts]
from sklearn.feature_selection import SelectKBest,chi2
x = SelectKBest(chi2,k=50000).fit_transform(x,y)
from sklearn.cross_validation import train_test_split
tmp_array = np.arange(x.shape[0])
train_i, test_i = train_test_split(tmp_array, train_size = 0.8, random_state = 500)
train_x = x[train_i]
test_x = x[test_i]
train_y = y[train_i]
test_y = y[test_i]
from sklearn.ensemble import BaggingClassifier
bagging = BaggingClassifier(LR(penalty='l2',dual=True),n_estimators = 10,max_samples=0.6,max_features=0.6)
bagging.fit(train_x,train_y)
print 'train done.'
res = bagging.predict(train_x)
print res
from sklearn.metrics import roc_auc_score
score = roc_auc_score(train_y,res)
res = bagging.predict_proba(train_x)
print res
score = roc_auc_score(train_y,res[:,1])
print score
print '-----------------------------------------'
print res[:,1]
res = bagging.predict_proba(test_x)
score = roc_auc_score(test_y,res[:,1])
print score
y=bagging.predict_proba(x2)
output = pd.DataFrame( data={"id":id, "sentiment":y[:,1]} )
output.to_csv( "/home/chuangxin/Bagging_result.csv", index=False, quoting=3 )
return bagging
示例5: test_parallel_classification
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def test_parallel_classification():
# Check parallel classification.
rng = check_random_state(0)
# Classification
X_train, X_test, y_train, y_test = train_test_split(iris.data,
iris.target,
random_state=rng)
ensemble = BaggingClassifier(DecisionTreeClassifier(),
n_jobs=3,
random_state=0).fit(X_train, y_train)
# predict_proba
ensemble.set_params(n_jobs=1)
y1 = ensemble.predict_proba(X_test)
ensemble.set_params(n_jobs=2)
y2 = ensemble.predict_proba(X_test)
assert_array_almost_equal(y1, y2)
ensemble = BaggingClassifier(DecisionTreeClassifier(),
n_jobs=1,
random_state=0).fit(X_train, y_train)
y3 = ensemble.predict_proba(X_test)
assert_array_almost_equal(y1, y3)
# decision_function
ensemble = BaggingClassifier(SVC(gamma='scale',
decision_function_shape='ovr'),
n_jobs=3,
random_state=0).fit(X_train, y_train)
ensemble.set_params(n_jobs=1)
decisions1 = ensemble.decision_function(X_test)
ensemble.set_params(n_jobs=2)
decisions2 = ensemble.decision_function(X_test)
assert_array_almost_equal(decisions1, decisions2)
X_err = np.hstack((X_test, np.zeros((X_test.shape[0], 1))))
assert_raise_message(ValueError, "Number of features of the model "
"must match the input. Model n_features is {0} "
"and input n_features is {1} "
"".format(X_test.shape[1], X_err.shape[1]),
ensemble.decision_function, X_err)
ensemble = BaggingClassifier(SVC(gamma='scale',
decision_function_shape='ovr'),
n_jobs=1,
random_state=0).fit(X_train, y_train)
decisions3 = ensemble.decision_function(X_test)
assert_array_almost_equal(decisions1, decisions3)
示例6: main
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def main():
# The competition datafiles are in the directory /input
# Read output csv format in case the file does not exists
submit = pd.read_csv('sample_submission.csv')
# Training cols
print ("Loading training csv.")
#train_cols = ['site_name', 'posa_continent', 'user_location_country', 'user_location_region', 'user_location_city', 'orig_destination_distance', 'user_id', 'is_mobile', 'is_package', 'channel', 'srch_adults_cnt', 'srch_children_cnt', 'srch_rm_cnt', 'srch_destination_id', 'srch_destination_type_id', 'hotel_continent', 'hotel_country', 'hotel_market', 'hotel_cluster']
train_cols = ['site_name', 'user_location_region', 'is_package', 'srch_adults_cnt', 'srch_children_cnt', 'srch_destination_id', 'hotel_market', 'hotel_country', 'hotel_cluster']
train = pd.DataFrame(columns=train_cols)
train_chunk = pd.read_csv('input/train.csv', chunksize=100000)
print ("Training csv loaded.")
# Read each chunk to train
for chunk in train_chunk:
#train = pd.concat( [ train, chunk ] )
train = pd.concat( [ train, chunk[chunk['is_booking']==1][train_cols] ] )
print ("Chunk done")
# Load each column
#x_train = train[['site_name', 'posa_continent', 'user_location_country', 'user_location_region', 'user_location_city', 'orig_destination_distance', 'user_id', 'is_mobile', 'is_package', 'channel', 'srch_adults_cnt', 'srch_children_cnt', 'srch_rm_cnt', 'srch_destination_id', 'srch_destination_type_id', 'hotel_continent', 'hotel_country', 'hotel_market']].values
x_train = train[['site_name', 'user_location_region', 'is_package', 'srch_adults_cnt', 'srch_children_cnt', 'srch_destination_id', 'hotel_market', 'hotel_country']].values
y_train = train['hotel_cluster'].values
# Run RandomForest on training data
print ("Training RandomForest.")
rf = RandomForestClassifier(n_estimators=50, max_depth=10, n_jobs=4)
bclf = BaggingClassifier(rf, n_estimators=2, n_jobs=4)
bclf.fit(x_train, y_train)
print ("Training done.")
print ("Loading testing csv.")
test_chunk = pd.read_csv('input/test.csv', chunksize=100000)
print ("Begin testing each chunk.")
predict = np.array([])
# Read each chunk to test
for i, chunk in enumerate(test_chunk):
#test_X = chunk[['site_name', 'posa_continent', 'user_location_country', 'user_location_region', 'user_location_city', 'orig_destination_distance', 'user_id', 'is_mobile', 'is_package', 'channel', 'srch_adults_cnt', 'srch_children_cnt', 'srch_rm_cnt', 'srch_destination_id', 'srch_destination_type_id', 'hotel_continent', 'hotel_country', 'hotel_market']].values
test_X = chunk[['site_name', 'user_location_region', 'is_package', 'srch_adults_cnt', 'srch_children_cnt', 'srch_destination_id', 'hotel_market', 'hotel_country']].values
test_X = np.nan_to_num(test_X)
if i > 0:
predict = np.concatenate( [predict, bclf.predict_proba(test_X)])
else:
predict = bclf.predict_proba(test_X)
print ("Chunk id: " + str(i))
submit['hotel_cluster'] = np.apply_along_axis(get5Best, 1, predict)
submit.head()
submit.to_csv('submission_random_forest.csv', index=False)
示例7: query_by_bagging
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def query_by_bagging(X, y, current_model, batch_size, rng, base_model=SVC(C=1, kernel='linear'), n_bags=5, method="KL", D=None):
"""
:param base_model: Model that will be **fitted every iteration**
:param n_bags: Number of bags on which train n_bags models
:param method: 'entropy' or 'KL'
:return:
"""
assert method == 'entropy' or method == 'KL'
eps = 0.0000001
if method == 'KL':
assert hasattr(base_model, 'predict_proba'), "Model with probability prediction needs to be passed to this strategy!"
clfs = BaggingClassifier(base_model, n_estimators=n_bags, random_state=rng)
clfs.fit(X[y.known], y[y.known])
pc = clfs.predict_proba(X[np.invert(y.known)])
# Settles page 17
if method == 'entropy':
pc += eps
fitness = np.sum(pc * np.log(pc), axis=1)
ids = np.argsort(fitness)[:batch_size]
elif method == 'KL':
p = np.array([clf.predict_proba(X[np.invert(y.known)]) for clf in clfs.estimators_])
fitness = np.mean(np.sum(p * np.log(p / pc), axis=2), axis=0)
ids = np.argsort(fitness)[-batch_size:]
return y.unknown_ids[ids], fitness/np.max(fitness)
示例8: create_estimators
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def create_estimators(self, X_train, y_train, X_test):
for model in self.models:
param_grid = self.create_parameter_grid(model)
for parameters in param_grid:
clf = BaggingClassifier(base_estimator=model.set_params(**parameters), n_estimators=self.estimators, max_samples=0.95, n_jobs = 3)
clf.fit(X_train, y_train)
prediction = clf.predict_proba(X_test)[:,1]
self.predictions.append(prediction)
示例9: train_and_test
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def train_and_test(X_train, X_test, y_train, y_test):
forest = BaggingClassifier(n_estimators=500, random_state=1234)
forest = forest.fit(X_train, y_train)
proba = forest.predict_proba(X_test)
proba = proba[:, 1]
y_test = np.array(y_test)
fpr, tpr, thresholds = metrics.roc_curve(y_test, proba, pos_label=1)
loss = metrics.auc(fpr, tpr)
print loss
return loss
示例10: BaggingClassifier
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
class BaggingClassifier(BaseEstimator):
def __init__(self, base_estimator=None, bag_kwargs=None):
klass = dynamic_load(base_estimator['class'])
svc = klass(**base_estimator['params'])
self.__clf = SK_BaggingClassifier(base_estimator=svc, **bag_kwargs)
def fit(self, X, y):
return self.__clf.fit(X, y)
def predict_proba(self, X):
return self.__clf.predict_proba(X)
示例11: predict_with_best_model
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def predict_with_best_model(estimator, xtrain, ytrain, xtest):
from sklearn.ensemble import BaggingClassifier
model = BaggingClassifier(base_estimator=estimator, n_estimators=10, max_samples=0.9, max_features=0.9, n_jobs=1,
bootstrap=False, bootstrap_features=False, oob_score=False)
model = model.fit(xtrain,ytrain)
y = model.predict_proba(xtest)
# print("Bagging score with oob estimates: ")
# print model.oob_score_
print ("Model used: ")
print model.base_estimator_
return y
示例12: train_predict
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, depth=4, lrate=.1, n_fold=5):
logging.info('Loading training and test data...')
X, y = load_svmlight_file(train_file)
X_tst, _ = load_svmlight_file(test_file)
X = X.todense()
X_tst = X_tst.todense()
xg = XGBoostClassifier(n_estimators=n_est,
eta=lrate,
max_depth=depth,
n_jobs=8)
clf = BaggingClassifier(base_estimator=xg,
n_estimators=5,
max_samples=0.9,
max_features=0.9,
random_state=42)
cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
logging.info('Cross validation...')
p_val = np.zeros_like(y)
lloss = 0.
for i_trn, i_val in cv:
clf.fit(X[i_trn], y[i_trn])
p_val[i_val] = clf.predict_proba(X[i_val])[:, 1]
lloss += log_loss(y[i_val], p_val[i_val])
logging.info('Log Loss = {:.4f}'.format(lloss / n_fold))
logging.info('Retraining with 100% data...')
clf.fit(X, y)
p_tst = clf.predict_proba(X_tst)[:, 1]
logging.info('Saving predictions...')
np.savetxt(predict_valid_file, p_val, fmt='%.6f')
np.savetxt(predict_test_file, p_tst, fmt='%.6f')
示例13: BaggingLearner
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
class BaggingLearner(AbstractLearner):
def __init__(self):
self.learner = BaggingClassifier(KNeighborsClassifier())
def _train(self, x_train, y_train):
self.learner = self.learner.fit(x_train, y_train)
def _predict(self, x):
return self.learner.predict(x)
def _predict_proba(self, x):
return self.learner.predict_proba(x)
示例14: phenotype_imputation
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def phenotype_imputation(data, config):
'''
Function to impute the labels on II based on the classifier learned on I.
Parameters
----------
data : an object of class Dataset that contains: genotypes, covariates,
labels and information about random folds
config : an object of class ConfigState. It contains the user-entered
parameters in a YAML format.
See the config_file parameter in the main script for more details.
'''
# Parameters for this task
num_folds = data.num_folds
task_name = "phenotype_imputation"
n_estimators = config.get_entry(task_name, "n_estimators")
romans_trn = config.get_entry(task_name, "romans_used_for_learning")
romans_tst = config.get_entry(task_name, "romans_used_for_imputing")
# Iterate through the folds:
i = 0
size_of_two = find_vec_entries_that_contain(data.folds[:,0], romans_tst).shape[0]
soft_labels = np.zeros((size_of_two, num_folds))
X_scaled = preprocessing.scale(data.clin_covariate.transpose()).transpose()
fpr = dict()
tpr = dict()
thres = dict()
roc_auc = np.zeros(num_folds)
for fold in data.folds.transpose():
logging.info("Fold=%d" % (i + 1))
sel_trn = find_vec_entries_that_contain(fold,[romans_trn])
sel_tst = find_vec_entries_that_contain(fold,[romans_tst])
model = BaggingClassifier(base_estimator=linear_model.LogisticRegression(),
n_estimators=n_estimators, max_samples=0.632,
# for small set I n_estimators=n_estimators, max_samples=0.8,
max_features=5,
bootstrap=True, bootstrap_features=True, oob_score=False,
# for small set I bootstrap=False, bootstrap_features=True, oob_score=False,
n_jobs=1, random_state=None, verbose=0)
model.fit(X_scaled[:,sel_trn].transpose(), data.labels[:,sel_trn].transpose())
soft_labels[:,i] = model.predict_proba(X_scaled[:,sel_tst].transpose())[:,1]
fpr[i], tpr[i], thres[i] = metrics.roc_curve(data.labels[0,sel_tst], soft_labels[:,i])
roc_auc[i] = metrics.auc(fpr[i], tpr[i])
i+=1
# Save the output of this task
config.save_variable(task_name, "%f", soft_labels=soft_labels, roc_auc=roc_auc)
示例15: train_predict
# 需要导入模块: from sklearn.ensemble import BaggingClassifier [as 别名]
# 或者: from sklearn.ensemble.BaggingClassifier import predict_proba [as 别名]
def train_predict(train_file, test_file, predict_valid_file, predict_test_file,
n_est=100, depth=4, lrate=.1, n_fold=5):
logging.info('Loading training and test data...')
X, y = load_svmlight_file(train_file)
X_tst, _ = load_svmlight_file(test_file)
X = X.todense()
X_tst = X_tst.todense()
logging.info('Validation...')
gbm = GBM(max_depth=depth, learning_rate=lrate, n_estimators=n_est,
random_state=2015)
clf = BG(base_estimator=gbm, n_estimators=5, max_samples=0.8,
max_features=0.8, bootstrap=True, bootstrap_features=True,
random_state=42, verbose=0)
cv = StratifiedKFold(y, n_folds=n_fold, shuffle=True, random_state=2015)
logging.info('Cross validation...')
p_val = np.zeros_like(y)
lloss = 0.
for i_trn, i_val in cv:
clf.fit(X[i_trn], y[i_trn])
p_val[i_val] = clf.predict_proba(X[i_val])[:, 1]
lloss += log_loss(y[i_val], p_val[i_val])
logging.info('Log Loss = {:.4f}'.format(lloss))
logging.info('Retraining with 100% data...')
clf.fit(X, y)
p_tst = clf.predict_proba(X_tst)[:, 1]
logging.info('Saving predictions...')
np.savetxt(predict_valid_file, p_val, fmt='%.6f')
np.savetxt(predict_test_file, p_tst, fmt='%.6f')