本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.predict_proba方法的具体用法?Python CalibratedClassifierCV.predict_proba怎么用?Python CalibratedClassifierCV.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.calibration.CalibratedClassifierCV
的用法示例。
在下文中一共展示了CalibratedClassifierCV.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_sample_weight_warning
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def test_sample_weight_warning():
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_test = X[n_samples:]
for method in ['sigmoid', 'isotonic']:
base_estimator = LinearSVC(random_state=42)
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
# LinearSVC does not currently support sample weights but they
# can still be used for the calibration step (with a warning)
msg = "LinearSVC does not support sample_weight."
assert_warns_message(
UserWarning, msg,
calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
probs_with_sw = calibrated_clf.predict_proba(X_test)
# As the weights are used for the calibration, they should still yield
# a different predictions
calibrated_clf.fit(X_train, y_train)
probs_without_sw = calibrated_clf.predict_proba(X_test)
diff = np.linalg.norm(probs_with_sw - probs_without_sw)
assert_greater(diff, 0.1)
示例2: calibrate_probs
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
""" Calling from R:
suppressMessages(library("rPython")) # Load RPython
python.load("path/to/util_rpython.py")
data.pred.calib <- python.call('calibrate_probs',
y_val=y_val, # Actual values from validation
prob_val=pred_val, # Predicted values from validation
prob_test=pred_test) # Predicted values from test
# data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
calib_pred_val = data.pred.calib$val
calib_pred_test = data.pred.calib$test
"""
y_val = np.asarray(y_val, dtype=float)
prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))
prob_clb_val = np.zeros(len(y_val))
prob_clb_test = np.zeros(len(prob_test))
kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)
for ix_train, ix_test in kf_val_full:
kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
clf.fit(prob_val[ix_train], y_val[ix_train])
prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds
return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
示例3: simple_model
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def simple_model(data, test):
targets = data.target
X, tX, y, ty = train_test_split(data.drop("target", axis=1),
targets,
test_size=0.2,
random_state=2016)
predictions = []
print("\n\nTraining")
# Sklearn GBM
clf = RandomForestClassifier(n_estimators=2500,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
print("\n\tValidation for Calibrated RFC")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["gbm"] = pred
predictions.append(cal.predict_proba(test)[:,1])
predictions = sum(predictions)/len(predictions)
return predictions
示例4: train
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
train_x,train_y=shuffle(train_x,train_y)
random_state=random.randint(0, 1000000)
print('random state: {state}'.format(state=random_state))
clf = RandomForestClassifier(bootstrap=False, class_weight=None,
criterion='entropy', max_depth=29008, max_features=36,
max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
oob_score=False, random_state=979271, verbose=0,
warm_start=False)
clf.fit(train_x, train_y)
ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
ccv.fit(valid_x,valid_y)
valid_predictions = ccv.predict_proba(valid_x)
test_predictions= ccv.predict_proba(test_x)
loss = test(valid_y,valid_predictions,True)
if loss<0.52:
data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
示例5: main
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def main():
X, Y, encoder, scale = load_train_data('train.csv')
estimators = 500
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=42, max_depth=55, min_samples_split=1)
clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
log.info('Fitting GradientBoost')
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')
示例6: simple_model
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def simple_model(data, test):
targets = data.target
X, tX, y, ty = train_test_split(data.drop("target", axis=1),
targets,
test_size=0.2,
random_state=2016)
predictions = []
print("\n\nTraining")
# Sklearn GBM
clf = GradientBoostingClassifier(n_estimators=2500,
learning_rate=0.026,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
print("\n\tValidation for Calibrated GBM")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["gbm"] = pred
predictions.append(cal.predict_proba(test)[:,1])
# XGBoost
data = X.values
label = y.values
dtrain = xgb.DMatrix(data, label=label)
datat = tX.values
dtest = xgb.DMatrix(datat)
param = {}
param['objective'] = 'binary:logistic'
param['eta'] = 0.1
param['max_depth'] = 8
param['eval_metric'] = 'auc'
param['silent'] = 1
param['min_child_weight'] = 2
param['subsample'] = 0.5
param['colsample_bytree'] = 0.5
param['nthread'] = 4
num_round = 50
bst = xgb.train(param, dtrain, num_round)
pred = bst.predict(dtest)
print("\n\tValidation for XGBoost")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["xgb"] = pred
predictions.append(cal.predict_proba(test)[:,1])
predictions = sum(predictions)/len(predictions)
return predictions
示例7: train_test
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def train_test(self, X, y, X_test):
"""
"""
sss = StratifiedShuffleSplit(y, 1, test_size=0.5)
for train_id, valid_id in sss:
X0, X1 = X[train_id], X[valid_id]
y0, y1 = y[train_id], y[valid_id]
#First half
w0 = np.zeros(len(y0))
for i in range(len(w0)):
w0[i] = self.w[int(y0[i])]
xg0_train = DMatrix(X0, label=y0, weight=w0)
xg0_test = DMatrix(X1, label=y1)
xgt_test = DMatrix(X_test)
bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
#Calibrated RF
rf = RandomForestClassifier(n_estimators=600, criterion='gini',
class_weight='auto', max_features='auto')
cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
cal.fit(X0, y0)
y0_cal = cal.predict_proba(X1)
yt_cal = cal.predict_proba(X_test)
#Second half
ss = StandardScaler()
y0_pred = ss.fit_transform(y0_pred)
yt_pred = ss.fit_transform(yt_pred)
y0_cal = ss.fit_transform(y0_cal)
yt_cal = ss.fit_transform(yt_cal)
X1 = np.hstack((X1, y0_pred, y0_cal))
X_test = np.hstack((X_test, yt_pred, yt_cal))
w1 = np.zeros(len(y1))
# self.param['eta'] = 0.01
self.num_round = 450
for i in range(len(w1)):
w1[i] = self.w[int(y1[i])]
xg1_train = DMatrix(X1, label=y1, weight=w1)
xg_test= DMatrix(X_test)
bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
return y_pred
示例8: train_validate
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def train_validate(self, X_train, y_train, X_valid, y_valid):
"""
"""
sss = StratifiedShuffleSplit(y_train, 1, test_size=0.5)
for train_id, valid_id in sss:
X0_train, X1_train = X_train[train_id], X_train[valid_id]
y0_train, y1_train = y_train[train_id], y_train[valid_id]
#First half
w0_train = np.zeros(len(y0_train))
for i in range(len(w0_train)):
w0_train[i] = self.w[int(y0_train[i])]
xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train)
xg0_valid = DMatrix(X1_train, label=y1_train)
xgv_valid = DMatrix(X_valid, label=y_valid)
watchlist = [(xg0_train,'train'), (xg0_valid, 'validation0')]
# bst0 = train(self.param, xg0_train, self.num_round, watchlist)
bst0 = my_train_xgboost(self.param, xg0_train, self.num_round, watchlist)
y0_pred = bst0.predict(xg0_valid).reshape(X1_train.shape[0], 9)
yv_pred = bst0.predict(xgv_valid).reshape(X_valid.shape[0], 9)
#Calibrated RF
rf = RandomForestClassifier(n_estimators=600, criterion='gini',
class_weight='auto', max_features='auto')
cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
cal.fit(X0_train, y0_train)
y0_cal = cal.predict_proba(X1_train)
yv_cal = cal.predict_proba(X_valid)
#Second half
ss = StandardScaler()
y0_pred = ss.fit_transform(y0_pred)
yv_pred = ss.fit_transform(yv_pred)
y0_cal = ss.fit_transform(y0_cal)
yv_cal = ss.fit_transform(yv_cal)
X1_train = np.hstack((X1_train, y0_pred, y0_cal))
X_valid = np.hstack((X_valid, yv_pred, yv_cal))
w1_train = np.zeros(len(y1_train))
# self.param['eta'] = 0.05
self.num_round = 450
for i in range(len(w1_train)):
w1_train[i] = self.w[int(y1_train[i])]
xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train)
xg_valid = DMatrix(X_valid, label=y_valid)
watchlist = [(xg1_train,'train'), (xg_valid, 'validation')]
# bst1 = train(self.param, xg1_train, self.num_round, watchlist)
bst1 = my_train_xgboost(self.param, xg1_train, self.num_round, watchlist)
y_pred = bst1.predict(xg_valid).reshape(X_valid.shape[0], 9)
# pdb.set_trace()
return y_pred
示例9: test_calibration_multiclass
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def test_calibration_multiclass():
"""Test calibration for multiclass """
# test multi-class setting with classifier that implements
# only decision function
clf = LinearSVC()
X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
centers=3, cluster_std=3.0)
# Use categorical labels to check that CalibratedClassifierCV supports
# them correctly
target_names = np.array(['a', 'b', 'c'])
y = target_names[y_idx]
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf.fit(X_train, y_train)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
cal_clf.fit(X_train, y_train)
probas = cal_clf.predict_proba(X_test)
assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))
# Check that log-loss of calibrated classifier is smaller than
# log-loss of naively turned OvR decision function to probabilities
# via softmax
def softmax(y_pred):
e = np.exp(-y_pred)
return e / e.sum(axis=1).reshape(-1, 1)
uncalibrated_log_loss = \
log_loss(y_test, softmax(clf.decision_function(X_test)))
calibrated_log_loss = log_loss(y_test, probas)
assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)
# Test that calibration of a multiclass classifier decreases log-loss
# for RandomForestClassifier
X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
cluster_std=3.0)
X_train, y_train = X[::2], y[::2]
X_test, y_test = X[1::2], y[1::2]
clf = RandomForestClassifier(n_estimators=10, random_state=42)
clf.fit(X_train, y_train)
clf_probs = clf.predict_proba(X_test)
loss = log_loss(y_test, clf_probs)
for method in ['isotonic', 'sigmoid']:
cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
cal_clf.fit(X_train, y_train)
cal_clf_probs = cal_clf.predict_proba(X_test)
cal_loss = log_loss(y_test, cal_clf_probs)
assert_greater(loss, cal_loss)
示例10: trainrf
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def trainrf(model_id,train_x,train_y,valid_x,valid_y,test_x):
train_x,train_y=shuffle(train_x,train_y)
random_state=random.randint(0, 1000000)
print('random state: {state}'.format(state=random_state))
clf = RandomForestClassifier(n_estimators=random.randint(50,5000),
criterion='gini',
max_depth=random.randint(10,1000),
min_samples_split=random.randint(2,50),
min_samples_leaf=random.randint(1,10),
min_weight_fraction_leaf=random.uniform(0.0,0.5),
max_features=random.uniform(0.1,1.0),
max_leaf_nodes=random.randint(1,10),
bootstrap=False,
oob_score=False,
n_jobs=30,
random_state=random_state,
verbose=0,
warm_start=True,
class_weight=None
)
clf.fit(train_x, train_y)
valid_predictions1 = clf.predict_proba(valid_x)
test_predictions1= clf.predict_proba(test_x)
t1 = test(valid_y,valid_predictions1)
ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv='prefit')
ccv.fit(valid_x,valid_y)
valid_predictions2 = ccv.predict_proba(valid_x)
test_predictions2= ccv.predict_proba(test_x)
t2 = test(valid_y,valid_predictions2)
if t2<t1:
valid_predictions=valid_predictions2
test_predictions=test_predictions2
t=t2
else:
valid_predictions=valid_predictions1
test_predictions=test_predictions1
t=t1
if t < 0.450:
data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
示例11: internal_processing
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def internal_processing(self, X, y, X_test):
"""
"""
Xs = np.hsplit(X, 5)
Xts = np.hsplit(X_test, 5)
Xts_cal = []
for i in range(len(Xs)):
Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
XX_test = np.hstack(Xts_cal)
ec = EC(n_preds=5)
ec.fit(X, y)
y_ens = ec.predict_proba(XX_test)
# y_pred = ec.predict_proba(X_test)
#validation
yv = ec.predict_proba(X)
print 'Weights: %s' %(ec.w)
print 'Validation log-loss: %s' %(logloss_mc(y, yv))
cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5),
method='isotonic', cv=10)
cc.fit(X, y)
y_cal = cc.predict_proba(XX_test)
y_pred = (y_ens + y_cal)/2.
return y_pred
示例12: setTrainTestDataAndCheckModel
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
model = RandomForestClassifier(125)
model.fit(X_train,Y_train)
'''
clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
clf.fit(X_train,Y_train)
print(clf.best_score_)
print(clf.best_params_)
output = model.predict(X_test)
print "-------------------RFC-----------------------"
#print accuracy_score(Y_test,output)
#print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
ypreds = model.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
clfbag = BaggingClassifier(model, n_estimators=5)
clfbag.fit(X_train, Y_train)
ypreds = clfbag.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = clfbag.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
'''
calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
#ypreds = calibrated_clf.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = calibrated_clf.predict_proba(X_test)
print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
示例13: setTrainDataAndMakeModel
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def setTrainDataAndMakeModel(X_train,Y_train,X_test):
clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
ypreds = calibrated_clf.predict_proba(X_test)
return ypreds
示例14: calibrate_proba_fitted_models
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def calibrate_proba_fitted_models(iDf, iFeatures, iModelsDict):
iCalibratedModelsDict = {}
for model_name in iModelsDict.keys():
target = model_name.replace('_gbr', '').replace('_rf', '')
proba_cal_sig = CalibratedClassifierCV(iModelsDict[model_name], method='sigmoid', cv='prefit')
proba_cal_iso = CalibratedClassifierCV(iModelsDict[model_name], method='isotonic', cv='prefit')
proba_cal_sig.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
proba_cal_iso.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
brier_sig = brier_score_loss(iDf.loc[:, target].value,
proba_cal_sig.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])
brier_iso = brier_score_loss(iDf.loc[:, target].value,
proba_cal_iso.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])
if brier_sig <= brier_iso:
iCalibratedModelsDict[model_name] = proba_cal_sig.calibrated_classifiers_
else:
iCalibratedModelsDict[model_name] = proba_cal_iso.calibrated_classifiers_
return iCalibratedModelsDict
示例15: main
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def main():
X, Y = load_csv_file('train.csv')
estimators = 1000
test_size = 0.05
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=test_size, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=test_size, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
logistic = linear_model.LogisticRegression()
rbm = BernoulliRBM(random_state=0, verbose=True)
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=0, max_depth=None)
rbm.learning_rate = 0.06
rbm.n_iter = 20
rbm.n_components = 500
logistic.C = 6000.0
pipeline = make_pipeline(tree_clf, rbm, logistic)
#clf = GridSearchCV(pipeline, param_grid, n_jobs=-1, verbose=1)
clf = pipeline
log.info('Fitting Boltzman with %s' str([name for name, _ in pipeline.steps]))
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')