本文整理汇总了Python中sklearn.ensemble.VotingClassifier.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python VotingClassifier.predict_proba方法的具体用法?Python VotingClassifier.predict_proba怎么用?Python VotingClassifier.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.ensemble.VotingClassifier
的用法示例。
在下文中一共展示了VotingClassifier.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_set_params
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_set_params():
"""set_params should be able to set estimators"""
clf1 = LogisticRegression(random_state=123, C=1.0)
clf2 = RandomForestClassifier(random_state=123, max_depth=None)
clf3 = GaussianNB()
eclf1 = VotingClassifier([('lr', clf1), ('rf', clf2)], voting='soft',
weights=[1, 2])
assert_true('lr' in eclf1.named_estimators)
assert_true(eclf1.named_estimators.lr is eclf1.estimators[0][1])
assert_true(eclf1.named_estimators.lr is eclf1.named_estimators['lr'])
eclf1.fit(X, y)
assert_true('lr' in eclf1.named_estimators_)
assert_true(eclf1.named_estimators_.lr is eclf1.estimators_[0])
assert_true(eclf1.named_estimators_.lr is eclf1.named_estimators_['lr'])
eclf2 = VotingClassifier([('lr', clf1), ('nb', clf3)], voting='soft',
weights=[1, 2])
eclf2.set_params(nb=clf2).fit(X, y)
assert_false(hasattr(eclf2, 'nb'))
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
assert_equal(eclf2.estimators[0][1].get_params(), clf1.get_params())
assert_equal(eclf2.estimators[1][1].get_params(), clf2.get_params())
eclf1.set_params(lr__C=10.0)
eclf2.set_params(nb__max_depth=5)
assert_true(eclf1.estimators[0][1].get_params()['C'] == 10.0)
assert_true(eclf2.estimators[1][1].get_params()['max_depth'] == 5)
assert_equal(eclf1.get_params()["lr__C"],
eclf1.get_params()["lr"].get_params()['C'])
示例2: test_sample_weight
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_sample_weight():
"""Tests sample_weight parameter of VotingClassifier"""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
eclf3.fit(X, y, sample_weight)
clf1.fit(X, y, sample_weight)
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator \'knn\' does not support sample weights.')
assert_raise_message(ValueError, msg, eclf3.fit, X, y, sample_weight)
示例3: test_estimator_weights_format
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_estimator_weights_format():
# Test estimator weights inputs as list and array
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
eclf1 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], weights=[1, 2], voting="soft")
eclf2 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], weights=np.array((1, 2)), voting="soft")
eclf1.fit(X, y)
eclf2.fit(X, y)
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例4: test_set_estimator_none
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_set_estimator_none():
"""VotingClassifier set_params should be able to set estimators as None"""
# Test predict
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
('nb', clf3)],
voting='hard', weights=[1, 0, 0.5]).fit(X, y)
eclf2 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2),
('nb', clf3)],
voting='hard', weights=[1, 1, 0.5])
eclf2.set_params(rf=None).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_true(dict(eclf2.estimators)["rf"] is None)
assert_true(len(eclf2.estimators_) == 2)
assert_true(all([not isinstance(est, RandomForestClassifier) for est in
eclf2.estimators_]))
assert_true(eclf2.get_params()["rf"] is None)
eclf1.set_params(voting='soft').fit(X, y)
eclf2.set_params(voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
msg = ('All estimators are None. At least one is required'
' to be a classifier!')
assert_raise_message(
ValueError, msg, eclf2.set_params(lr=None, rf=None, nb=None).fit, X, y)
# Test soft voting transform
X1 = np.array([[1], [2]])
y1 = np.array([1, 2])
eclf1 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
voting='soft', weights=[0, 0.5],
flatten_transform=False).fit(X1, y1)
eclf2 = VotingClassifier(estimators=[('rf', clf2), ('nb', clf3)],
voting='soft', weights=[1, 0.5],
flatten_transform=False)
eclf2.set_params(rf=None).fit(X1, y1)
assert_array_almost_equal(eclf1.transform(X1),
np.array([[[0.7, 0.3], [0.3, 0.7]],
[[1., 0.], [0., 1.]]]))
assert_array_almost_equal(eclf2.transform(X1),
np.array([[[1., 0.],
[0., 1.]]]))
eclf1.set_params(voting='hard')
eclf2.set_params(voting='hard')
assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]]))
assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
示例5: test_parallel_predict
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_parallel_predict():
"""Check parallel backend of VotingClassifier on toy dataset."""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = GaussianNB()
X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf1 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=1).fit(X, y)
eclf2 = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=2).fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
示例6: process_cell
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def process_cell(self, df_cell_train, df_cell_test, window):
place_counts = df_cell_train.place_id.value_counts()
mask = (place_counts[df_cell_train.place_id.values] >= th).values
df_cell_train = df_cell_train.loc[mask]
# Working on df_test
row_ids = df_cell_test.index
# Preparing data
le = LabelEncoder()
y = le.fit_transform(df_cell_train.place_id.values)
X = df_cell_train.drop(['place_id', ], axis=1).values.astype(int)
X_test = df_cell_test.values.astype(int)
# Applying the classifier
clf1 = KNeighborsClassifier(n_neighbors=50, weights='distance',
metric='manhattan')
clf2 = RandomForestClassifier(n_estimators=50, n_jobs=-1)
eclf = VotingClassifier(estimators=[('knn', clf1), ('rf', clf2)], voting='soft')
eclf.fit(X, y)
y_pred = eclf.predict_proba(X_test)
pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:, ::-1][:, :3])
return pred_labels, row_ids
示例7: process_one_cell
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def process_one_cell(df_train, df_test, x_min, x_max, y_min, y_max):
x_border_augment = 0.025
y_border_augment = 0.0125
#Working on df_train
df_cell_train = df_train[(df_train['x'] >= x_min-x_border_augment) & (df_train['x'] < x_max+x_border_augment) &
(df_train['y'] >= y_min-y_border_augment) & (df_train['y'] < y_max+y_border_augment)]
place_counts = df_cell_train.place_id.value_counts()
mask = (place_counts[df_cell_train.place_id.values] >= th).values
df_cell_train = df_cell_train.loc[mask]
#Working on df_test
# to be delete: df_cell_test = df_test.loc[df_test.grid_cell == grid_id]
df_cell_test = df_test[(df_test['x'] >= x_min) & (df_test['x'] < x_max) &
(df_test['y'] >= y_min) & (df_test['y'] < y_max)]
row_ids = df_cell_test.index
if(len(df_cell_train) == 0 or len(df_cell_test) == 0):
return None, None
#Feature engineering on x and y
df_cell_train.loc[:,'x'] *= fw[0]
df_cell_train.loc[:,'y'] *= fw[1]
df_cell_test.loc[:,'x'] *= fw[0]
df_cell_test.loc[:,'y'] *= fw[1]
#Preparing data
le = LabelEncoder()
y = le.fit_transform(df_cell_train.place_id.values)
X = df_cell_train.drop(['place_id'], axis=1).values.astype(float)
if 'place_id' in df_cell_test.columns:
cols = df_cell_test.columns
cols = cols.drop('place_id')
X_test = df_cell_test[cols].values.astype(float)
else:
X_test = df_cell_test.values.astype(float)
#Applying the classifier
# clf = KNeighborsClassifier(n_neighbors=26, weights='distance',
# metric='manhattan')
clf1 = BaggingClassifier(KNeighborsClassifier(n_neighbors=26, weights='distance',
metric='manhattan'), n_jobs=-1, n_estimators=50)
clf2 = RandomForestClassifier(n_estimators=100, n_jobs=-1)
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2)], voting='hard')
eclf.fit(X, y)
y_pred = eclf.predict_proba(X_test)
pred_labels = le.inverse_transform(np.argsort(y_pred, axis=1)[:,::-1][:,:3])
return pred_labels, row_ids
示例8: test_sample_weight
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def test_sample_weight():
"""Tests sample_weight parameter of VotingClassifier"""
clf1 = LogisticRegression(random_state=123)
clf2 = RandomForestClassifier(random_state=123)
clf3 = SVC(probability=True, random_state=123)
eclf1 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y, sample_weight=np.ones((len(y),)))
eclf2 = VotingClassifier(estimators=[
('lr', clf1), ('rf', clf2), ('svc', clf3)],
voting='soft').fit(X, y)
assert_array_equal(eclf1.predict(X), eclf2.predict(X))
assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
sample_weight = np.random.RandomState(123).uniform(size=(len(y),))
eclf3 = VotingClassifier(estimators=[('lr', clf1)], voting='soft')
eclf3.fit(X, y, sample_weight)
clf1.fit(X, y, sample_weight)
assert_array_equal(eclf3.predict(X), clf1.predict(X))
assert_array_almost_equal(eclf3.predict_proba(X), clf1.predict_proba(X))
# check that an error is raised and indicative if sample_weight is not
# supported.
clf4 = KNeighborsClassifier()
eclf3 = VotingClassifier(estimators=[
('lr', clf1), ('svc', clf3), ('knn', clf4)],
voting='soft')
msg = ('Underlying estimator KNeighborsClassifier does not support '
'sample weights.')
with pytest.raises(ValueError, match=msg):
eclf3.fit(X, y, sample_weight)
# check that _parallel_fit_estimator will raise the right error
# it should raise the original error if this is not linked to sample_weight
class ClassifierErrorFit(BaseEstimator, ClassifierMixin):
def fit(self, X, y, sample_weight):
raise TypeError('Error unrelated to sample_weight.')
clf = ClassifierErrorFit()
with pytest.raises(TypeError, match='Error unrelated to sample_weight'):
clf.fit(X, y, sample_weight=sample_weight)
示例9: main
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def main(argv):
trainX = pd.read_csv('trainingData.txt','\t', header = None)
trainX.drop(trainX.columns[len(trainX.columns)-1], axis = 1, inplace = True)
trainY = pd.read_csv("trainingTruth.txt", header = None, names = ['Y'])
df = trainX.join(trainY)
index = df.isnull().sum(axis=1) <= 2
df = df[index]
df.fillna(df.median(), inplace = True)
print(len(df))
#df.dropna(axis=0, inplace=True) # drop the row with NA in training.
X = df.iloc[:,0:-1].values
Y = df['Y'].values
Y_binary = np.ones((len(Y),3)) * (-1)
for i in range(3):
index = Y == (i+1)
Y_binary[index,i] = 1
X_scaled = preprocessing.scale(X)
X_PCA = PCA(n_components=30).fit_transform(X_scaled)
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1, n_estimators=20)
clf3 = GaussianNB()
clf4 = DecisionTreeClassifier(max_depth=4)
clf5 = KNeighborsClassifier(n_neighbors=7)
clf6 = SVC(kernel='rbf', probability=True)
clf7 = AdaBoostClassifier(random_state=1)
testX = pd.read_csv('testData.txt','\t', header = None)
testX.drop(testX.columns[len(testX.columns)-1], axis = 1, inplace = True)
testX.fillna(testX.median(), inplace = True) # Handle NA in test data, although not necessary for this assignment.
testX_scaled = preprocessing.scale(testX)
testX_PCA = PCA(n_components=30).fit_transform(testX_scaled)
proba = np.zeros((len(testX),3))
for i in range(3):
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3),
('dt', clf4), ('kn', clf5), ('svc', clf6)],
voting='soft').fit(X_PCA,Y_binary[:,i])
proba[:,i] = eclf.predict_proba(testX_PCA)[:,1]
# Write to file
results = pd.DataFrame(proba)
results['prediction'] = np.argmax(proba, axis=1) + 1
results.to_csv('testY.txt', sep='\t', header = False, index = False)
print(results.iloc[0:10,:])
示例10: voting_class
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def voting_class(X,training_target,Y):
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()
eclf = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='soft')
eclf.fit(X[:,0:6],training_target)
proba = eclf.predict_proba(Y[:,0:6])
eclf.predict()
示例11: all_classifer
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
def all_classifer(X_train,y_train,X_test,y_test):
rf=RandomForestClassifier(n_estimators=100,class_weight ='balanced')
score1=scores(y_test,rf.fit(X_train,y_train).predict(X_test),rf.predict_proba(X_test)[:,1],'RT')
gbc = GradientBoostingClassifier(n_estimators=50,learning_rate=0.05).fit(X_train,y_train)
score2=scores(y_test,gbc.fit(X_train,y_train).predict(X_test),gbc.predict_proba(X_test)[:,1],'gbc')
ets=ExtraTreesClassifier(n_estimators=100,max_depth=None,min_samples_split=1,random_state=0)
score3=scores(y_test,ets.fit(X_train,y_train).predict(X_test),ets.predict_proba(X_test)[:,1],'ets')
# lgr = LogisticRegression()
# score4=scores(y_test,lgr.fit(X_train,y_train).predict(X_test),'lgr')
ab = AdaBoostClassifier(algorithm='SAMME.R',n_estimators=50,learning_rate=0.7)
score5=scores(y_test,ab.fit(X_train,y_train).predict(X_test),ab.predict_proba(X_test)[:,1],'abboost')
# print roc_auc_score(y_test,clf.predict_proba(X_test)[:,1])
# bagging=BaggingClassifier()
# score8=scores(y_test,bagging.fit(X_train,y_train).predict(X_test),'bagging')
# dt = DecisionTreeClassifier(max_depth=None, min_samples_split=1,random_state=0)
# score6=scores(y_test,dt.fit(X_train,y_train).predict(X_test),'dt')
eclf = VotingClassifier(estimators=[ ('rf', rf),
('gd',gbc),('ETs',ets),('ab',ab)],
voting='soft',weights =[score1[0],score2[0],score3[0],score5[0]])
score7=scores(y_test,eclf.fit(X_train,y_train).predict(X_test),eclf.predict_proba(X_test)[:,1],'voting')
print eclf
return [score1,score2,score3,score5,score7]
示例12: VtClassifier
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
class VtClassifier(Model):
'''
Voting Classfier
'''
def __init__(self, *args):
Model.__init__(self)
self.modelIndex = ['GNB', 'SVClassifier', 'LRModel', 'ABClassifier', 'GBClassifier']
self.models = []
self.estimators = []
for arg in args:
index = self.modelIndex.index(arg)
if index == 0:
self.models.append(Model())
self.estimators.append((arg, Model().model))
elif index == 1:
self.models.append(SVClassifier())
self.estimators.append((arg, SVClassifier().model))
elif index == 2:
self.models.append(LRModel())
self.estimators.append((arg, LRModel().model))
elif index == 3:
self.models.append(ABClassifier())
self.estimators.append((arg, ABClassifier().model))
elif index == 4:
self.models.append(GBClassifier())
self.estimators.append((arg, GBClassifier().model))
self.model = VotingClassifier(estimators=self.estimators, voting='hard')
def train(self, data, target):
for model in self.models:
model.train(data, target)
self.model.fit(data, target)
def predict(self, test):
return self.model.predict_proba(test)
示例13: log_loss
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
xgb2 = xgb.XGBClassifier(max_depth=11,
n_estimators=100,
learning_rate=0.03,
subsample=0.96,
colsample_bytree=0.45,
colsample_bylevel=0.45,
objective='binary:logistic',
nthread=4,
seed=1313)
#score = log_loss(y_test, extc.predict_proba(X_test)[:, 1])
X_train, X_test, y_train, y_test = cross_validation.train_test_split(train, target, random_state=1301, test_size=0.3)
clfs = [('etc', etc1), ('rf', rf1), ('xgb', xgb1), ('etc2', etc2)]
# # set up ensemble of rf_1 and rf_2
clf = VotingClassifier(estimators=clfs, voting='soft', weights=[1, 1, 1, 1])
st = time.time()
scores = cross_validation.cross_val_score(clf, X_train, y_train, scoring='log_loss', cv=5, verbose=2)
print(scores.mean()*-1)
print("time elaspe", time.time() - st)
exit()
clf.fit(train, target)
print('Predict...')
y_pred = clf.predict_proba(test)
# print y_pred
pd.DataFrame({"ID": id_test, "PredictedProb": y_pred[:, 1]}).to_csv('data/extra_trees_1_7.csv', index=False)
示例14: LogisticRegression
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
'''
####################################
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=200,max_depth = 15,random_state=1)
clf3 = GaussianNB()
clf4 = xgb.XGBClassifier(missing=np.nan, max_depth=15, n_estimators=200, learning_rate=0.02, nthread=16, subsample=0.95, colsample_bytree=0.85, seed=4242)
clf5 = AdaBoostClassifier(n_estimators=300, learning_rate=0.02,random_state=1)
eclf1 = VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3), ('xgb', clf4),('adb',clf5)], voting='soft')
print("fitting..")
eclf1 = eclf1.fit(X_train, y_train)
print("predicting..")
rfpreds = eclf1.predict_proba(X_test)
print("arrived at verdict..")
###################################
x,y,thresholds =roc_curve(y_test,rfpreds[:,1],1)
plt.figure()
plt.plot(x,y)
plt.show()
print (auc(x,y))
bestMCCR =0
for threshold in thresholds:
predicted = rfpreds[:,1] > threshold
CCR1, CCR2, mCCR = MCCR(predicted,y_test,0,1);
bestMCCR = max(bestMCCR,mCCR)
示例15: roc_auc_score
# 需要导入模块: from sklearn.ensemble import VotingClassifier [as 别名]
# 或者: from sklearn.ensemble.VotingClassifier import predict_proba [as 别名]
bagged_rf.fit(X_train, y_train)
print "bagged rf test",roc_auc_score(y_test, bagged_rf.predict_proba(X_test)[:,1])
#print "bagged rf train",roc_auc_score(y_train, bagged_rf.predict_proba(X_train)[:,1])
'''print "Calibrating Bagged Decision Trees..."
calibrated_dt.fit(X_train, y_train)
print "calibrated_dt test:", roc_auc_score(y_test, calibrated_dt.predict_proba(X_test)[:,1])
print "Calibrating Bagged Random Forests..."
calibrated_rf.fit(X_train, y_train)
print "calibrated_rf test:", roc_auc_score(y_test, calibrated_rf.predict_proba(X_test)[:,1])
'''
print "Voting with all models...."
voted_model = VotingClassifier(estimators=[('one', ada), ('two', bagged_rf), ('four', bagged_dt)], voting='soft')
voted_model.fit(X_train, y_train)
print "Voted Model test:",roc_auc_score(y_test, voted_model.predict_proba(X_test)[:,1])
#print "Voted Model train",roc_auc_score(y_train, voted_model.predict_proba(X_train)[:,1])
####Loading test file and saving predictions
print "Saving Voted Submission"
X_test = np.genfromtxt ('test_normal_286.csv', delimiter=",")
ncounts = np.zeros((X_test.shape[0], 1))
for i in range(0, X_test.shape[0]):
ncounts[i, 0] = (X_test[i, :] == 0).sum(0)
X_test = np.append(X_test, ncounts, axis = 1)
categories_test = clusters.predict(X_test)
cats = np.zeros((len(categories_test), 1))
for i in range(0, cats.shape[0]):
cats[i, 0] = categories_test[i]