本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.decision_function方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.decision_function方法的具体用法?Python CalibratedClassifierCV.decision_function怎么用?Python CalibratedClassifierCV.decision_function使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.calibration.CalibratedClassifierCV
的用法示例。
在下文中一共展示了CalibratedClassifierCV.decision_function方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: calibrate_clf
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import decision_function [as 别名]
def calibrate_clf(pcompa=False):
#X, training_target, Y_test, Y_test_id = load_data()
X, Y = load_data()
test_id = Y[['t_id']].as_matrix()
test_id = test_id.flatten()
Y = Y.drop( 't_id', axis = 1 )
training_target = X[['target']].as_matrix()
training_target = training_target.flatten()
X = X.drop( 'target', axis = 1)
X_np = X.as_matrix()
Y_np = Y.as_matrix()
# split traininf data in to training and validation set
X_train, X_Val, train_target, val_target = train_test_split(X_np, training_target, test_size=0.33, random_state=4)
# feature selection
select = SelectKBest(chi2, k=5)
# dimensionality reduction ( PCA)
pca = PCA(n_components=2, whiten=True)
# randomized grid search???
clfs = [
LogisticRegression(),
#xgb.XGBClassifier(objective='binary:logistic', max_depth=3, n_estimators=300, learning_rate=0.05),
#KNeighborsClassifier(n_neighbors=100),
RandomForestClassifier(n_estimators=50, max_depth=6, n_jobs=-1, criterion='gini', random_state=1),
#RandomForestClassifier(n_estimators=500, n_jobs=-1, criterion='entropy', random_state=1)
RandomForestClassifier(n_estimators=500, max_depth=3, n_jobs=-1, criterion='entropy', random_state=1),
#AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), algorithm="SAMME", learning_rate=0.01, n_estimators=50, random_state=1),
#ExtraTreesClassifier(n_estimators=50, max_depth=6, n_jobs=-1, criterion='gini', random_state=1),
#ExtraTreesClassifier(n_estimators=100, max_depth=3, min_samples_split=5, min_samples_leaf=5, n_jobs=-1, criterion='gini'),
#ExtraTreesClassifier(n_estimators=50, max_depth=6, n_jobs=-1, criterion='entropy'),
GradientBoostingClassifier(learning_rate=0.01, subsample=0.8, loss='exponential', max_depth=6, n_estimators=50)]
for j, clf in enumerate(clfs):
print
print j, clf.__class__.__name__
print
# pipeline with feature selection, pca and classifier
if pcompa==True:
#pipeline = Pipeline([('select', select), ('pca', pca), ('clf', clf)])
pipeline = Pipeline([('pca', pca), ('clf', clf)])
else:
#pipeline = Pipeline([('clf', clf)])
pipeline = Pipeline([('select', select), ('clf', clf)])
# cross validation
#skf = StratifiedKFold(training_target, n_folds=2, shuffle=True, random_state=1)
skf = KFold(len(training_target), n_folds=5, shuffle=False)
#### Uncalibrated ####
print "UNCALIBRATED:"
scores = []
for k, (train, test) in enumerate(skf):
pipeline.fit(X_np[train], training_target[train])
if hasattr(pipeline, 'predict_proba'):
score = log_loss(training_target[test], pipeline.predict_proba(X_np[test])[:, 1])
else:
score = log_loss(training_target[test], pipeline.decision_function(X_np[test]))
scores.append(score)
#print 'Fold: %s, Class dist: %s, Log loss: %.3f ' %(k+1, np.bincount(training_target[train]), score)
print 'LogLoss : %.9f +/- %.9f ' %(
np.mean(scores), np.std(scores))
#### Calibrated ####
print
print "CALIBRATED:"
scores = []
for k, (train, test) in enumerate(skf):
sig_clf = CalibratedClassifierCV(pipeline, method="sigmoid", cv="prefit")
sig_clf.fit(X_np[train], training_target[train])
if hasattr(sig_clf, 'predict_proba'):
score = log_loss(training_target[test], sig_clf.predict_proba(X_np[test])[:, 1])
else:
score = log_loss(training_target[test], sig_clf.decision_function(X_np[test]))
scores.append(score)
#print 'Fold: %s, Class dist: %s, Log loss: %.3f ' %(k+1, np.bincount(training_target[train]), score)
print 'LogLoss : %.9f +/- %.9f ' %(
np.mean(scores), np.std(scores))