本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.predict方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.predict方法的具体用法?Python CalibratedClassifierCV.predict怎么用?Python CalibratedClassifierCV.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.calibration.CalibratedClassifierCV
的用法示例。
在下文中一共展示了CalibratedClassifierCV.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_calibration_nan_imputer
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration_nan_imputer():
"""Test that calibration can accept nan"""
X, y = make_classification(n_samples=10, n_features=2,
n_informative=2, n_redundant=0,
random_state=42)
X[0, 0] = np.nan
clf = Pipeline(
[('imputer', SimpleImputer()),
('rf', RandomForestClassifier(n_estimators=1))])
clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
clf_c.fit(X, y)
clf_c.predict(X)
示例2: get_score
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def get_score(self, params):
params['n_estimators'] = int(params['n_estimators'])
params['max_depth'] = int(params['max_depth'])
params['min_samples_split'] = int(params['min_samples_split'])
params['min_samples_leaf'] = int(params['min_samples_leaf'])
params['n_estimators'] = int(params['n_estimators'])
print('Training with params:')
print(params)
# cross validation here
scores = []
for train_ix, test_ix in makeKFold(5, self.y, 1):
X_train, y_train = self.X[train_ix, :], self.y[train_ix]
X_test, y_test = self.X[test_ix, :], self.y[test_ix]
weight = y_train.shape[0] / (2 * np.bincount(y_train))
sample_weight = np.array([weight[i] for i in y_train])
clf = RandomForestClassifier(**params)
cclf = CalibratedClassifierCV(base_estimator=clf,
method='isotonic',
cv=makeKFold(3, y_train, 1))
cclf.fit(X_train, y_train, sample_weight)
pred = cclf.predict(X_test)
scores.append(f1_score(y_true=y_test, y_pred=pred))
print(scores)
score = np.mean(scores)
print(score)
return {'loss': -score, 'status': STATUS_OK}
示例3: ProbabilityCalibrationClassifier
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
class ProbabilityCalibrationClassifier(Classifier):
def __init__(self, matrixdatabase):
self._matrix_database = matrixdatabase
self._has_fit = False
self._pcc = CalibratedClassifierCV(base_estimator=BASEESTIMATOR, method=METHOD, cv=CV)
def learn(self, ingredients, cuisine):
return
def classify(self, ingredients):
if not self._has_fit:
matrix, classes = self._matrix_database.make_train_matrix()
self._pcc = self._pcc.fit(matrix, classes)
print 'Fitting complete...'
self._has_fit = True
output = self._pcc.predict(self._matrix_database.make_row_from_recipe(ingredients))
return output[0]
示例4: test_calibration_prefit
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration_prefit():
"""Test calibration for prefitted classifiers"""
n_samples = 50
X, y = make_classification(n_samples=3 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
X -= X.min() # MultinomialNB only allows positive X
# split train and test
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_calib, y_calib, sw_calib = \
X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
sample_weight[n_samples:2 * n_samples]
X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]
# Naive-Bayes
clf = MultinomialNB()
clf.fit(X_train, y_train, sw_train)
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
# Naive Bayes with calibration
for this_X_calib, this_X_test in [(X_calib, X_test),
(sparse.csr_matrix(X_calib),
sparse.csr_matrix(X_test))]:
for method in ['isotonic', 'sigmoid']:
pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")
for sw in [sw_calib, None]:
pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
y_prob = pc_clf.predict_proba(this_X_test)
y_pred = pc_clf.predict(this_X_test)
prob_pos_pc_clf = y_prob[:, 1]
assert_array_equal(y_pred,
np.array([0, 1])[np.argmax(y_prob, axis=1)])
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss(y_test, prob_pos_pc_clf))
示例5: EN_optA
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
XV = np.hstack(p_valid)
XT = np.hstack(p_test)
#EN_optA
enA = EN_optA(n_classes)
enA.fit(XV, y_valid)
w_enA = enA.w
y_enA = enA.predict(XT)
y_enA_all = enA.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'error rate =>', 1-accuracy_score(y_test, y_enA)))
print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'log loss =>', log_loss(y_test, y_enA_all)))
# Calibrated version of EN_optA
cc_optA = CalibratedClassifierCV(enA, method='isotonic')
cc_optA.fit(XV, y_valid)
y_ccA = cc_optA.predict(XT)
y_ccA_all = cc_optA.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'error rate =>', 1-accuracy_score(y_test, y_ccA)))
print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'log loss =>', log_loss(y_test, y_ccA_all)))
#EN_optB
enB = EN_optB(n_classes)
enB.fit(XV, y_valid)
w_enB = enB.w
y_enB = enB.predict(XT)
y_enB_all = enB.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'error rate =>', 1-accuracy_score(y_test, y_enB)))
print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'log loss =>', log_loss(y_test, y_enB_all)))
#Calibrated version of EN_optB
cc_optB = CalibratedClassifierCV(enB, method='isotonic')
示例6: test_calibration
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration():
"""Test calibration objects with isotonic and sigmoid"""
n_samples = 100
X, y = make_classification(n_samples=2 * n_samples, n_features=6,
random_state=42)
sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)
X -= X.min() # MultinomialNB only allows positive X
# split train and test
X_train, y_train, sw_train = \
X[:n_samples], y[:n_samples], sample_weight[:n_samples]
X_test, y_test = X[n_samples:], y[n_samples:]
# Naive-Bayes
clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
prob_pos_clf = clf.predict_proba(X_test)[:, 1]
pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
assert_raises(ValueError, pc_clf.fit, X, y)
# Naive Bayes with calibration
for this_X_train, this_X_test in [(X_train, X_test),
(sparse.csr_matrix(X_train),
sparse.csr_matrix(X_test))]:
for method in ['isotonic', 'sigmoid']:
pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
# Note that this fit overwrites the fit on the entire training
# set
pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]
# Check that brier score has improved after calibration
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss(y_test, prob_pos_pc_clf))
# Check invariance against relabeling [0, 1] -> [1, 2]
pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
assert_array_almost_equal(prob_pos_pc_clf,
prob_pos_pc_clf_relabeled)
# Check invariance against relabeling [0, 1] -> [-1, 1]
pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
assert_array_almost_equal(prob_pos_pc_clf,
prob_pos_pc_clf_relabeled)
# Check invariance against relabeling [0, 1] -> [1, 0]
pc_clf.fit(this_X_train, (y_train + 1) % 2,
sample_weight=sw_train)
prob_pos_pc_clf_relabeled = \
pc_clf.predict_proba(this_X_test)[:, 1]
if method == "sigmoid":
assert_array_almost_equal(prob_pos_pc_clf,
1 - prob_pos_pc_clf_relabeled)
else:
# Isotonic calibration is not invariant against relabeling
# but should improve in both cases
assert_greater(brier_score_loss(y_test, prob_pos_clf),
brier_score_loss((y_test + 1) % 2,
prob_pos_pc_clf_relabeled))
# check that calibration can also deal with regressors that have
# a decision_function
clf_base_regressor = CalibratedClassifierCV(Ridge())
clf_base_regressor.fit(X_train, y_train)
clf_base_regressor.predict(X_test)
# Check failure cases:
# only "isotonic" and "sigmoid" should be accepted as methods
clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)
# base-estimators should provide either decision_function or
# predict_proba (most regressors, for instance, should fail)
clf_base_regressor = \
CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
示例7: zip
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
df = pd.read_csv(df_filename, index_col=0)
y_array = df[target_column_name].values
X_dict = df.drop(target_column_name, axis=1).to_dict(orient='records')
vf_raw = pd.read_csv(vf_filename, index_col=0, compression='gzip')
vf_dict = vf_raw.applymap(csv_array_to_float).to_dict(orient='records')
X_dict = [merge_two_dicts(d_inst, v_inst) for d_inst, v_inst in zip(X_dict, vf_dict)]
return X_dict, y_array
if __name__ == '__main__':
print("Reading file ...")
X_dict, y_array = read_data(train_filename, vf_train_filename)
skf = StratifiedShuffleSplit(y_array, n_iter=2, test_size=0.5, random_state=57)
print("Training file ...")
for valid_train_is, valid_test_is in skf:
X_valid_train_dict = [X_dict[i] for i in valid_train_is]
y_valid_train = y_array[valid_train_is]
X_valid_test_dict = [X_dict[i] for i in valid_test_is]
y_valid_test = y_array[valid_test_is]
fe = feature_extractor.FeatureExtractor()
fe.fit(X_valid_train_dict, y_valid_train)
X_valid_train_array = fe.transform(X_valid_train_dict)
X_valid_test_array = fe.transform(X_valid_test_dict)
clf = classifier.Classifier()
clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
clf_c.fit(X_valid_train_array, y_valid_train)
y_valid_pred = clf_c.predict(X_valid_test_array)
y_valid_proba = clf_c.predict_proba(X_valid_test_array)
#print y_valid_proba
print 'accuracy = ', accuracy_score(y_valid_pred, y_valid_test)
示例8: print
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
print ("Train a XGBoost model")
params = {
"objective": "binary:logistic",
"eta": 0.2, # used to be 0.2 or 0.1
"max_depth": 7, # used to be 5 or 6
"min_child_weight": 1,
"silent": 1,
"colsample_bytree": 0.7,
"seed": 1,
}
num_trees = 450 # used to be 300, 375 is better
gbm = xgb.train(params, xgb.DMatrix(train[features], train["signal"]), num_trees)
gbm = CalibratedClassifierCV(gbm, method="isotonic", cv=skf)
print "saving gbm prediction"
temp = pd.DataFrame({"id": test["id"], "prediction": gbm.predict(xgb.DMatrix(test[features]))})
temp.to_csv("parts/gbm.csv", index=False)
print ("Make predictions on the test set")
# test_probs = (0.35*rf.predict_proba(test[features])[:,1]) + (0.35*gbm.predict(xgb.DMatrix(test[features])))+(0.15*predskeras) + (0.15*fb_preds)
test_probs = (
(0.24 * rf.predict_proba(test[features])[:, 1])
+ (0.3 * gbm.predict(xgb.DMatrix(test[features])))
+ (0.26 * predskeras)
+ (0.20 * fb_preds)
) # is better
# test_probs = (0.25*rf.predict_proba(test[features])[:,1]) + (0.25*gbm.predict(xgb.DMatrix(test[features])))+(0.25*predskeras) + (0.25*fb_preds)
submission = pd.DataFrame({"id": test["id"], "prediction": test_probs})
submission.to_csv("predictions/benchmark_calibrated.csv", index=False)
示例9: main
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def main(argv):
# Change to parent directory to load data
# os.chdir(os.path.pardir)
X = np.load("data/X51.npy")
Y = np.load("data/y51.npy")
labels = np.load("data/LOO.npy")
print(X.shape)
# fixes errors with Nan data
# X= preprocessing.Imputer().fit_transform(X)
# Recursive oversampling and undersampling
# adsn = ADASYN(imb_threshold=0.5,ratio=0.7)
# X,Y = adsn.fit_transform(X,Y)
# X,Y = adsn.fit_transform(X,Y)
# X,Y = deleteClass(X,Y,100,2)
# Grouping 5 classes to 3
"""for i in range(0,Y.shape[0]):
if Y[i]==0 or Y[i]==1:
Y[i]==0
elif Y[i]==2:
Y[i]=1
else:
Y[i]=2
"""
print(Counter(Y))
# Synthetic data is only to be used during training to
# enhance recall of minority classes. New data are appended
# as first rows of X,y
size_b = X.shape[0]
adsn = ADASYN(imb_threshold=0.5, ratio=0.7)
X, Y = adsn.fit_transform(X, Y)
size_a = X.shape[0]
generated_samp = size_a - size_b
newX = X[1:generated_samp]
newY = Y[1:generated_samp]
# Shuffling original data to ensure no time dependence
realX, realY = shuffle(X[generated_samp:-1], Y[generated_samp:-1], random_state=0)
realX, realY = shuffle(realX, realY, random_state=15)
print("--------------")
# appending real data after generated so that test set will not contain synthetic data
allX = np.concatenate((newX, realX), axis=0)
allY = np.concatenate((newY, realY), axis=0)
X, Y = deleteClass(allX, allY, 200, 2)
print(X.shape, Y.shape)
# creating training set with synthetic data, test set only real data
train = [i for i in range(0, int(0.7 * X.shape[0]))]
test = [i for i in range(int(0.7 * X.shape[0]), X.shape[0])]
print(Counter(Y))
if sys.argv[1] == "-ensemble":
RF = []
outputRF = []
outRFtest = []
totalacc = 0
totalRF = 0
totalXGB = 0
# Tests with all features / most important
# feats =[0,1,2,3,4,5,6,7,13,16,22,23,24,25,26,27,29,30,31,32,33,35,38,39,40,41,44,46,47,50]
# X = X[:,feats]
print(X.shape, Y.shape)
n_folds = 3
skf = StratifiedKFold(Y, n_folds=n_folds)
kf = KFold(X.shape[0], n_folds=n_folds, shuffle=True)
for traini, testi in kf:
print(len(traini), len(testi))
# Although data is oversampled, still a small imbalance is present
rfr = RandomForestClassifier(
n_estimators=300,
class_weight="auto",
n_jobs=-1,
criterion="entropy",
max_features=X.shape[1],
min_samples_split=1,
)
gbm = xgb.XGBClassifier(n_estimators=50, learning_rate=0.5, colsample_bytree=0.3).fit(X[traini], Y[traini])
rfr.fit(X[traini], Y[traini])
pred = rfr.predict(X[testi])
pred1 = gbm.predict(X[testi])
# Print to screen mean error and Tolerance Score
tempacc, trueRF = tolAcc(Y[testi], pred)
print("Random Forest: %s" % tempacc)
tempacc1, trueXGB = tolAcc(Y[testi], pred1)
print("XGBoost: %s" % tempacc1)
totalXGB += trueXGB
totalRF += trueRF
totalacc += tempacc
#.........这里部分代码省略.........
示例10: mean_squared_error
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
print 'predict on training set'
score = mean_squared_error(target, fit.predict(training))
print score
try:
os.mkdir('logs')
except:
pass
#save score to log
fName = open(os.path.join('logs', method + '.log'), 'w')
print >> fName, 'mean squared error on the training set is: ' + str(score)
fName.close()
print 'predict on testing'
prediction = ccv.predict(test)
print 'saving prediction to file'
submission = pd.DataFrame(prediction)
submission.columns = ['units']
submission['units'] = submission['units'].apply(lambda x: math.exp(x) - 1)
def merge_data(df):
return ''.join([str(df["store_nbr"]), "_", str(df["item_nbr"]), "_", df["date"]])
submission["id"] = test[["store_nbr", "item_nbr", "date"]].apply(merge_data, 1)
try:
os.mkdir('predictions')
except:
pass
示例11: print
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
random_state=2014)
predictions = []
validations = []
print("\nTraining")
clf = GradientBoostingClassifier(n_estimators=2500,
learning_rate=0.026,
max_depth=2,
random_state=2015)
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)
pred = cal.predict_proba(tX)[:,1]
prednp = cal.predict(tX)
print("\nValidation for Calibrated GBM")
print(log_loss(ty, pred))
print(accuracy_score(ty, prednp))
print(roc_auc_score(ty, pred))
predictions.append(cal.predict_proba(testing)[:,1])
validations.append(prednp)
validt = sum(validations)/len(validations)
submit = sum(predictions)/len(predictions)
print("\nMake predictions and submission")
sample = pd.read_csv("sampleSubmission.csv")
示例12: zip
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
X_dict = df.drop(target_column_name, axis=1).to_dict(orient='records')
vf_raw = pd.read_csv(vf_filename, index_col=0, compression='gzip')
vf_dict = vf_raw.applymap(csv_array_to_float).to_dict(orient='records')
X_dict = [merge_two_dicts(d_inst, v_inst) for d_inst, v_inst in zip(X_dict, vf_dict)]
return X_dict, y_array
if __name__ == '__main__':
print("Reading file ...")
X_dict, y_array = read_data(train_filename, vf_train_filename)
skf = StratifiedShuffleSplit(y_array, n_iter=2, test_size=0.5, random_state=57)
print("Training file ...")
for valid_train_is, valid_test_is in skf:
X_valid_train_dict = [X_dict[i] for i in valid_train_is]
y_valid_train = y_array[valid_train_is]
X_valid_test_dict = [X_dict[i] for i in valid_test_is]
y_valid_test = y_array[valid_test_is]
fe = feature_extractor.FeatureExtractor()
fe.fit(X_valid_train_dict, y_valid_train)
X_valid_train_array = fe.transform(X_valid_train_dict)
X_valid_test_array = fe.transform(X_valid_test_dict)
print("extracted...")
clf = classifier.Classifier()
clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
clf_c.fit(X_valid_train_array, y_valid_train)
y_valid_pred = clf_c.predict(X_valid_test_array)
y_valid_proba = clf_c.predict_proba(X_valid_test_array)
#print y_valid_proba
print('accuracy train=', accuracy_score(clf_c.predict(X_valid_train_array), y_valid_train))
print 'accuracy test = ', accuracy_score(y_valid_pred, y_valid_test)
示例13: train_test_split
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
# Split data into Train and hold out for model blending
X, X_holdout, y, y_holdout = train_test_split(X,y,test_size=0.25, random_state=22)
'''
This section of the code runs different moodels. Currently our tool is using logistic regression, Knnn
,Random Forests, and eXtreme Gradient Boosting.
Important consideration is to calibrate the posterior probabilities.
Certain algorithms give out under-confident predictions
'''
# Logistic regression
clf = LogisticRegression()
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
score = cross_val_score(calibrated_clf,X,y,cv=3,scoring="roc_auc").mean()
calibrated_clf.fit(X,y)
roc_auc_score(y_holdout,calibrated_clf.predict(X_holdout))
ypred_lr = calibrated_clf.predict(test)
# Random Forest
clf = RandomForestClassifier(n_estimators=np.int(np.sqrt(.75*shape)),min_samples_split=20, n_jobs=-1)
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
calibrated_clf.fit(X, y)
roc_auc_score(y_holdout,calibrated_clf.predict(X_holdout))
ypred_rf = calibrated_clf.predict(test)
# KNN
clf = KNeighborsClassifier(n_neighbors = np.int(np.sqrt(.75*shape)))
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
score = cross_val_score(calibrated_clf,X,y,cv=3,scoring="roc_auc").mean()
calibrated_clf.fit(X,y)
示例14: RF
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
#.........这里部分代码省略.........
space = (
hp.quniform('n_estimators',100,1000,100),
hp.quniform('max_depth', 4, 10, 1),
hp.quniform('max_features', 0.1,1., 0.2)
)
trials = Trials()
best_sln = fmin(objective, space, algo=tpe.suggest, max_evals=40,trials=trials)
rinfo = trials.results
df = pd.DataFrame(rinfo)
df.to_csv('./tune.csv',index=False)
print best_sln
def do_submit(self, CALIB=False):
if CALIB:
tp = {}
tp['base_estimator'] = self.model
tp['method'] = 'sigmoid'
tp['cv'] = 5
self.model = CalibratedClassifierCV(**tp)
X = self.data['train_x']
y = self.data['train_y']
test_x = self.data['test_x']
test_id = self.data['test_id']
self.model.fit(X, y)
pre = self.model.predict_proba(test_x)
logger.info(self.model)
self.write_submission(test_id, pre, 'rf_test_l1.csv')
#retrain
kf = StratifiedKFold(y, 5, random_state=555)
sub_col = handle_data.get_col()
lr_train = pd.DataFrame(index=range(len(X)), columns=sub_col)
index_cv = 0
index_cv = 0
for tr_ind, te_ind in kf:
train_x = X.iloc[tr_ind]
train_y = y.iloc[tr_ind]
test_x = X.iloc[te_ind]
test_y = y.iloc[te_ind]
self.model.fit(train_x, train_y)
pred = self.model.predict_proba(test_x)
lr_train.iloc[te_ind] = pred
score = log_loss(test_y,pred,eps=1e-15, normalize=True)
示例15: OneClassClassifierWrapper
# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
class OneClassClassifierWrapper(ClassifierWrapper):
"""Classifier."""
def __init__(self,
program=SGDClassifier(average=True,
class_weight='balanced',
shuffle=True),
nu=0.5):
"""Construct."""
super(OneClassClassifierWrapper, self).__init__(program)
self.nu = nu
def fit(self, graphs):
"""fit."""
try:
# make matrix
data_matrix = vectorize(graphs,
vectorizer=self.vectorizer,
**self.params_vectorize)
data_matrix_neg = data_matrix.multiply(-1)
data_matrix_both = vstack([data_matrix, data_matrix_neg], format="csr")
# make labels
length = data_matrix.shape[0]
y = [-1] * length + [1] * length
y = np.ravel(y)
# fit:
estimator = self.program.fit(data_matrix_both, y)
# moving intercept:
scores = [estimator.decision_function(sparse_vector)[0]
for sparse_vector in data_matrix]
scores_sorted = sorted(scores)
pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
estimator.intercept_ -= pivot
# calibration:
data_y = np.asarray([1 if score >= pivot else -1 for score in scores])
self.program = CalibratedClassifierCV(estimator, method='sigmoid')
self.program.fit(data_matrix, data_y)
return self
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)
def predict(self, graphs):
"""predict.
only overwrite is this:
decision_function -> predict_proba
graph.graph['score'] will be a (len 2) list
"""
try:
graphs, graphs_ = tee(graphs)
data_matrix = vectorize(graphs_,
vectorizer=self.vectorizer,
**self.params_vectorize)
predictions = self.program.predict(data_matrix)
# scores = self.program.decision_function(data_matrix)
scores = self.program.predict_proba(data_matrix)
for score, prediction, graph in izip(scores, predictions, graphs):
graph.graph['prediction'] = prediction
graph.graph['score'] = score
yield graph
except Exception as e:
logger.debug('Failed iteration. Reason: %s' % e)
logger.debug('Exception', exc_info=True)