本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.fit方法的具体用法?Python CalibratedClassifierCV.fit怎么用?Python CalibratedClassifierCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.calibration.CalibratedClassifierCV
的用法示例。
在下文中一共展示了CalibratedClassifierCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: setTrainDataAndMakeModel
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def setTrainDataAndMakeModel(X_train,Y_train,X_test):
clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
ypreds = calibrated_clf.predict_proba(X_test)
return ypreds
示例2: simple_model
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def simple_model(data, test):
targets = data.target
X, tX, y, ty = train_test_split(data.drop("target", axis=1),
targets,
test_size=0.2,
random_state=2016)
predictions = []
print("\n\nTraining")
# Sklearn GBM
clf = GradientBoostingClassifier(n_estimators=2500,
learning_rate=0.026,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
print("\n\tValidation for Calibrated GBM")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["gbm"] = pred
predictions.append(cal.predict_proba(test)[:,1])
# XGBoost
data = X.values
label = y.values
dtrain = xgb.DMatrix(data, label=label)
datat = tX.values
dtest = xgb.DMatrix(datat)
param = {}
param['objective'] = 'binary:logistic'
param['eta'] = 0.1
param['max_depth'] = 8
param['eval_metric'] = 'auc'
param['silent'] = 1
param['min_child_weight'] = 2
param['subsample'] = 0.5
param['colsample_bytree'] = 0.5
param['nthread'] = 4
num_round = 50
bst = xgb.train(param, dtrain, num_round)
pred = bst.predict(dtest)
print("\n\tValidation for XGBoost")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["xgb"] = pred
predictions.append(cal.predict_proba(test)[:,1])
predictions = sum(predictions)/len(predictions)
return predictions
示例3: svc_test2
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def svc_test2():
"""
Submission:
E_val:
E_in:
E_out:
"""
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.cross_validation import StratifiedKFold
from sklearn.calibration import CalibratedClassifierCV
X, y = dataset.load_train()
raw_scaler = StandardScaler()
raw_scaler.fit(X)
X_scaled = raw_scaler.transform(X)
svc = SVC(kernel='linear', class_weight='auto', cache_size=10240)
svc.fit(X_scaled, y)
isotonic = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
method='isotonic')
isotonic.fit(X_scaled, y)
logger.debug('Got best isotonic CalibratedClassifier.')
logger.debug('E_in (isotonic): %f', Util.auc_score(isotonic, X_scaled, y))
示例4: train_model_rfc_calibrated
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def train_model_rfc_calibrated (features, labels) :
# First, set aside a some of the training set for calibration
# Use stratified shuffle split so that class ratios are maintained after the split
splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = 0.7, random_state = 30)
# Length is 1 in this case since we have a single fold for splitting
print (len(splitter))
for train_idx, calib_idx in splitter:
features_train, features_calib = features[train_idx], features[calib_idx]
labels_train, labels_calib = labels[train_idx], labels[calib_idx]
print ("features_train shape: ", features_train.shape)
print ("features_calib shape: ", features_calib.shape)
print ("labels_train shape: ", labels_train.shape)
print ("labels_calib shape: ", labels_calib.shape)
print ("Performing Grid Search ...")
# params_dict = {'criterion': ['entropy'], 'n_estimators':[30, 35, 40, 45], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10]}
params_dict = {'criterion': ['entropy'], 'n_estimators':[60, 70, 80, 90], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10], 'max_features' : [6, 7, 8]}
clf = GridSearchCV(rfc(random_state = 30, n_jobs = 4), params_dict, scoring = 'roc_auc', cv = 5)
clf.fit(features_train, labels_train)
print ("Best estimator: ", clf.best_estimator_)
print ("Best best scores: %.4f" %(clf.best_score_))
# print ("Best grid scores: ", clf.grid_scores_)
# Perform calibration
# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
print ("Performing Calibration now ...")
sigmoid = CalibratedClassifierCV(clf, cv='prefit', method='sigmoid')
sigmoid.fit(features_calib, labels_calib)
return sigmoid
示例5: main
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def main():
X, Y, encoder, scale = load_train_data('train.csv')
estimators = 500
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
log.info('Loaded training file')
X_test, _ = load_csv_file('test.csv', cut_end=False)
log.info('Loaded test file')
#Classifier Setup
tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
random_state=42, max_depth=55, min_samples_split=1)
clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
log.info('Fitting GradientBoost')
clf.fit(X_train_real, Y_train_real)
clf_probs = clf.predict_proba(X_test_real)
score = log_loss(Y_test_real, clf_probs)
log.info('Log Loss score un-trained = %f' % score)
# Calibrate Classifier using ground truth in X,Y_valid
sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
log.info('Fitting CalibratedClassifierCV')
sig_clf.fit(X_valid, Y_valid)
sig_clf_probs = sig_clf.predict_proba(X_test_real)
sig_score = log_loss(Y_test_real, sig_clf_probs)
log.info('Log loss score trained = %f' % sig_score)
# Ok lets predict the test data with our funky new classifier
sig_submission_probs = sig_clf.predict_proba(X_test)
write_out_submission(sig_submission_probs, 'submission.csv')
示例6: train
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
train_x,train_y=shuffle(train_x,train_y)
random_state=random.randint(0, 1000000)
print('random state: {state}'.format(state=random_state))
clf = RandomForestClassifier(bootstrap=False, class_weight=None,
criterion='entropy', max_depth=29008, max_features=36,
max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
oob_score=False, random_state=979271, verbose=0,
warm_start=False)
clf.fit(train_x, train_y)
ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
ccv.fit(valid_x,valid_y)
valid_predictions = ccv.predict_proba(valid_x)
test_predictions= ccv.predict_proba(test_x)
loss = test(valid_y,valid_predictions,True)
if loss<0.52:
data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
示例7: internal_processing
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def internal_processing(self, X, y, X_test):
"""
"""
Xs = np.hsplit(X, 5)
Xts = np.hsplit(X_test, 5)
Xts_cal = []
for i in range(len(Xs)):
Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
XX_test = np.hstack(Xts_cal)
ec = EC(n_preds=5)
ec.fit(X, y)
y_ens = ec.predict_proba(XX_test)
# y_pred = ec.predict_proba(X_test)
#validation
yv = ec.predict_proba(X)
print 'Weights: %s' %(ec.w)
print 'Validation log-loss: %s' %(logloss_mc(y, yv))
cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5),
method='isotonic', cv=10)
cc.fit(X, y)
y_cal = cc.predict_proba(XX_test)
y_pred = (y_ens + y_cal)/2.
return y_pred
示例8: setTrainTestDataAndCheckModel
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
model = RandomForestClassifier(125)
model.fit(X_train,Y_train)
'''
clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
clf.fit(X_train,Y_train)
print(clf.best_score_)
print(clf.best_params_)
output = model.predict(X_test)
print "-------------------RFC-----------------------"
#print accuracy_score(Y_test,output)
#print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
ypreds = model.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
clfbag = BaggingClassifier(model, n_estimators=5)
clfbag.fit(X_train, Y_train)
ypreds = clfbag.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = clfbag.predict_proba(X_test)
print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
'''
calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
calibrated_clf.fit(X_train, Y_train)
#ypreds = calibrated_clf.predict(X_test)
#print accuracy_score(Y_test,ypreds)
ypreds = calibrated_clf.predict_proba(X_test)
print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
示例9: simple_model
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def simple_model(data, test):
targets = data.target
X, tX, y, ty = train_test_split(data.drop("target", axis=1),
targets,
test_size=0.2,
random_state=2016)
predictions = []
print("\n\nTraining")
# Sklearn GBM
clf = RandomForestClassifier(n_estimators=2500,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
print("\n\tValidation for Calibrated RFC")
print("\t", log_loss(ty, pred))
print("\t", roc_auc_score(ty, pred))
# ens["gbm"] = pred
predictions.append(cal.predict_proba(test)[:,1])
predictions = sum(predictions)/len(predictions)
return predictions
示例10: calibrate_probs
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
""" Calling from R:
suppressMessages(library("rPython")) # Load RPython
python.load("path/to/util_rpython.py")
data.pred.calib <- python.call('calibrate_probs',
y_val=y_val, # Actual values from validation
prob_val=pred_val, # Predicted values from validation
prob_test=pred_test) # Predicted values from test
# data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
calib_pred_val = data.pred.calib$val
calib_pred_test = data.pred.calib$test
"""
y_val = np.asarray(y_val, dtype=float)
prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))
prob_clb_val = np.zeros(len(y_val))
prob_clb_test = np.zeros(len(prob_test))
kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)
for ix_train, ix_test in kf_val_full:
kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
clf.fit(prob_val[ix_train], y_val[ix_train])
prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds
return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
示例11: test_sample_weight_warning
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def test_sample_weight_warning():
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_test = X[n_samples:]
for method in ['sigmoid', 'isotonic']:
base_estimator = LinearSVC(random_state=42)
calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
# LinearSVC does not currently support sample weights but they
# can still be used for the calibration step (with a warning)
msg = "LinearSVC does not support sample_weight."
assert_warns_message(
UserWarning, msg,
calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
probs_with_sw = calibrated_clf.predict_proba(X_test)
# As the weights are used for the calibration, they should still yield
# a different predictions
calibrated_clf.fit(X_train, y_train)
probs_without_sw = calibrated_clf.predict_proba(X_test)
diff = np.linalg.norm(probs_with_sw - probs_without_sw)
assert_greater(diff, 0.1)
示例12: calibrate
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def calibrate(X_val, y_val, estimator):
clf = CalibratedClassifierCV(base_estimator=estimator,
method='isotonic', cv='prefit')
clf.fit(X_val, y_val)
return clf
示例13: move_bias
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def move_bias(self, data_matrix, estimator=None, nu=.5, cv=2):
'''
move bias until nu of data_matrix are in the negative class
then use scikits calibrate to calibrate self.estimator around the input
'''
# move bias
# l = [(estimator.decision_function(g)[0], g) for g in data_matrix]
# l.sort(key=lambda x: x[0])
# element = int(len(l) * nu)
# estimator.intercept_ -= l[element][0]
scores = [estimator.decision_function(sparse_vector)[0]
for sparse_vector in data_matrix]
scores_sorted = sorted(scores)
pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
estimator.intercept_ -= pivot
# calibrate
if self.move_bias_recalibrate:
# data_matrix_binary = vstack([a[1] for a in l])
# data_y = numpy.asarray([0] * element + [1] * (len(l) - element))
data_y = numpy.asarray([1 if score >= pivot else -1 for score in scores])
self.testimator = SGDClassifier(loss='log')
self.testimator.fit(data_matrix, data_y)
# estimator = CalibratedClassifierCV(estimator, cv=cv, method='sigmoid')
estimator = CalibratedClassifierCV(self.testimator, cv=cv, method='sigmoid')
estimator.fit(data_matrix, data_y)
return estimator
示例14: get_score
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def get_score(self, params):
params['n_estimators'] = int(params['n_estimators'])
params['max_depth'] = int(params['max_depth'])
params['min_samples_split'] = int(params['min_samples_split'])
params['min_samples_leaf'] = int(params['min_samples_leaf'])
params['n_estimators'] = int(params['n_estimators'])
print('Training with params:')
print(params)
# cross validation here
scores = []
for train_ix, test_ix in makeKFold(5, self.y, 1):
X_train, y_train = self.X[train_ix, :], self.y[train_ix]
X_test, y_test = self.X[test_ix, :], self.y[test_ix]
weight = y_train.shape[0] / (2 * np.bincount(y_train))
sample_weight = np.array([weight[i] for i in y_train])
clf = RandomForestClassifier(**params)
cclf = CalibratedClassifierCV(base_estimator=clf,
method='isotonic',
cv=makeKFold(3, y_train, 1))
cclf.fit(X_train, y_train, sample_weight)
pred = cclf.predict(X_test)
scores.append(f1_score(y_true=y_test, y_pred=pred))
print(scores)
score = np.mean(scores)
print(score)
return {'loss': -score, 'status': STATUS_OK}
示例15: prepare_model
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def prepare_model(self, obj_fn=None, num_steps=None, model_params=None, batch_size: int = None):
model = CalibratedClassifierCV(KNeighborsClassifier(**model_params), method="sigmoid")
model_clf = model.fit(self.ds[self.data_groups["data_train_group"]].to_ndarray(),
self.ds[self.data_groups["target_train_group"]].to_ndarray())
cal_model = CalibratedClassifierCV(model_clf, method="sigmoid", cv="prefit")
cal_model.fit(self.ds[self.data_groups["data_validation_group"]].to_ndarray(),
self.ds[self.data_groups["target_validation_group"]].to_ndarray())
return self.ml_model(cal_model)