当前位置: 首页>>代码示例>>Python>>正文


Python CalibratedClassifierCV.predict方法代码示例

本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.predict方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.predict方法的具体用法?Python CalibratedClassifierCV.predict怎么用?Python CalibratedClassifierCV.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.calibration.CalibratedClassifierCV的用法示例。


在下文中一共展示了CalibratedClassifierCV.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_calibration_nan_imputer

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration_nan_imputer():
    """Test that calibration can accept nan"""
    X, y = make_classification(n_samples=10, n_features=2,
                               n_informative=2, n_redundant=0,
                               random_state=42)
    X[0, 0] = np.nan
    clf = Pipeline(
        [('imputer', SimpleImputer()),
         ('rf', RandomForestClassifier(n_estimators=1))])
    clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
    clf_c.fit(X, y)
    clf_c.predict(X)
开发者ID:abecadel,项目名称:scikit-learn,代码行数:14,代码来源:test_calibration.py

示例2: get_score

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
    def get_score(self, params):
        params['n_estimators'] = int(params['n_estimators'])
        params['max_depth'] = int(params['max_depth'])
        params['min_samples_split'] = int(params['min_samples_split'])
        params['min_samples_leaf'] = int(params['min_samples_leaf'])
        params['n_estimators'] = int(params['n_estimators'])

        print('Training with params:')
        print(params)

        # cross validation here
        scores = []
        for train_ix, test_ix in makeKFold(5, self.y, 1):
            X_train, y_train = self.X[train_ix, :], self.y[train_ix]
            X_test, y_test = self.X[test_ix, :], self.y[test_ix]
            weight = y_train.shape[0] / (2 * np.bincount(y_train))
            sample_weight = np.array([weight[i] for i in y_train])

            clf = RandomForestClassifier(**params)
            cclf = CalibratedClassifierCV(base_estimator=clf,
                                          method='isotonic',
                                          cv=makeKFold(3, y_train, 1))
            cclf.fit(X_train, y_train, sample_weight)
            pred = cclf.predict(X_test)
            scores.append(f1_score(y_true=y_test, y_pred=pred))

        print(scores)
        score = np.mean(scores)

        print(score)
        return {'loss': -score, 'status': STATUS_OK}
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:33,代码来源:level3_model_rf.py

示例3: ProbabilityCalibrationClassifier

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
class ProbabilityCalibrationClassifier(Classifier):
	
	def __init__(self, matrixdatabase):
		self._matrix_database = matrixdatabase
		self._has_fit = False
		self._pcc = CalibratedClassifierCV(base_estimator=BASEESTIMATOR, method=METHOD, cv=CV)

	def learn(self, ingredients, cuisine):
		return

	def classify(self, ingredients):
		if not self._has_fit:
			matrix, classes = self._matrix_database.make_train_matrix()
			self._pcc = self._pcc.fit(matrix, classes)
			print 'Fitting complete...'
			self._has_fit = True
		output = self._pcc.predict(self._matrix_database.make_row_from_recipe(ingredients))
		return output[0]
开发者ID:ryanwb,项目名称:ucla-cs145-kaggle,代码行数:20,代码来源:probabilitycalibrationclassifier.py

示例4: test_calibration_prefit

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration_prefit():
    """Test calibration for prefitted classifiers"""
    n_samples = 50
    X, y = make_classification(n_samples=3 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_calib, y_calib, sw_calib = \
        X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
        sample_weight[n_samples:2 * n_samples]
    X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]

    # Naive-Bayes
    clf = MultinomialNB()
    clf.fit(X_train, y_train, sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    # Naive Bayes with calibration
    for this_X_calib, this_X_test in [(X_calib, X_test),
                                      (sparse.csr_matrix(X_calib),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")

            for sw in [sw_calib, None]:
                pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
                y_prob = pc_clf.predict_proba(this_X_test)
                y_pred = pc_clf.predict(this_X_test)
                prob_pos_pc_clf = y_prob[:, 1]
                assert_array_equal(y_pred,
                                   np.array([0, 1])[np.argmax(y_prob, axis=1)])

                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss(y_test, prob_pos_pc_clf))
开发者ID:abecadel,项目名称:scikit-learn,代码行数:41,代码来源:test_calibration.py

示例5: EN_optA

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
XV = np.hstack(p_valid)
XT = np.hstack(p_test)

#EN_optA
enA = EN_optA(n_classes)
enA.fit(XV, y_valid)
w_enA = enA.w
y_enA = enA.predict(XT)
y_enA_all = enA.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'error rate  =>', 1-accuracy_score(y_test, y_enA)))
print('{:20s} {:2s} {:1.7f}'.format('EN_optA:', 'log loss  =>', log_loss(y_test, y_enA_all)))

# Calibrated version of EN_optA
cc_optA = CalibratedClassifierCV(enA, method='isotonic')
cc_optA.fit(XV, y_valid)
y_ccA = cc_optA.predict(XT)
y_ccA_all = cc_optA.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'error rate  =>', 1-accuracy_score(y_test, y_ccA)))
print('{:20s} {:2s} {:1.7f}'.format('Calibrated_EN_optA:', 'log loss  =>', log_loss(y_test, y_ccA_all)))

#EN_optB
enB = EN_optB(n_classes)
enB.fit(XV, y_valid)
w_enB = enB.w
y_enB = enB.predict(XT)
y_enB_all = enB.predict_proba(XT)
print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'error rate  =>', 1-accuracy_score(y_test, y_enB)))
print('{:20s} {:2s} {:1.7f}'.format('EN_optB:', 'log loss  =>', log_loss(y_test, y_enB_all)))

#Calibrated version of EN_optB
cc_optB = CalibratedClassifierCV(enB, method='isotonic')
开发者ID:wangzhao1988,项目名称:DataMiningProject,代码行数:33,代码来源:ensemble.py

示例6: test_calibration

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def test_calibration():
    """Test calibration objects with isotonic and sigmoid"""
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train, sample_weight=sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    # Naive Bayes with calibration
    for this_X_train, this_X_test in [(X_train, X_test),
                                      (sparse.csr_matrix(X_train),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
            # Note that this fit overwrites the fit on the entire training
            # set
            pc_clf.fit(this_X_train, y_train, sample_weight=sw_train)
            prob_pos_pc_clf = pc_clf.predict_proba(this_X_test)[:, 1]

            # Check that brier score has improved after calibration
            assert_greater(brier_score_loss(y_test, prob_pos_clf),
                           brier_score_loss(y_test, prob_pos_pc_clf))

            # Check invariance against relabeling [0, 1] -> [1, 2]
            pc_clf.fit(this_X_train, y_train + 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [-1, 1]
            pc_clf.fit(this_X_train, 2 * y_train - 1, sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = pc_clf.predict_proba(this_X_test)[:, 1]
            assert_array_almost_equal(prob_pos_pc_clf,
                                      prob_pos_pc_clf_relabeled)

            # Check invariance against relabeling [0, 1] -> [1, 0]
            pc_clf.fit(this_X_train, (y_train + 1) % 2,
                       sample_weight=sw_train)
            prob_pos_pc_clf_relabeled = \
                pc_clf.predict_proba(this_X_test)[:, 1]
            if method == "sigmoid":
                assert_array_almost_equal(prob_pos_pc_clf,
                                          1 - prob_pos_pc_clf_relabeled)
            else:
                # Isotonic calibration is not invariant against relabeling
                # but should improve in both cases
                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss((y_test + 1) % 2,
                                                prob_pos_pc_clf_relabeled))

        # check that calibration can also deal with regressors that have
        # a decision_function
        clf_base_regressor = CalibratedClassifierCV(Ridge())
        clf_base_regressor.fit(X_train, y_train)
        clf_base_regressor.predict(X_test)

        # Check failure cases:
        # only "isotonic" and "sigmoid" should be accepted as methods
        clf_invalid_method = CalibratedClassifierCV(clf, method="foo")
        assert_raises(ValueError, clf_invalid_method.fit, X_train, y_train)

        # base-estimators should provide either decision_function or
        # predict_proba (most regressors, for instance, should fail)
        clf_base_regressor = \
            CalibratedClassifierCV(RandomForestRegressor(), method="sigmoid")
        assert_raises(RuntimeError, clf_base_regressor.fit, X_train, y_train)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:81,代码来源:test_calibration.py

示例7: zip

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
    df = pd.read_csv(df_filename, index_col=0)
    y_array = df[target_column_name].values
    X_dict = df.drop(target_column_name, axis=1).to_dict(orient='records')
    vf_raw = pd.read_csv(vf_filename, index_col=0, compression='gzip')
    vf_dict = vf_raw.applymap(csv_array_to_float).to_dict(orient='records')
    X_dict = [merge_two_dicts(d_inst, v_inst) for d_inst, v_inst in zip(X_dict, vf_dict)]
    return X_dict, y_array

if __name__ == '__main__':
    print("Reading file ...")
    X_dict, y_array = read_data(train_filename, vf_train_filename)
    skf = StratifiedShuffleSplit(y_array, n_iter=2, test_size=0.5, random_state=57)
    print("Training file ...")
    for valid_train_is, valid_test_is in skf:
        X_valid_train_dict = [X_dict[i] for i in valid_train_is]
        y_valid_train = y_array[valid_train_is]
        X_valid_test_dict = [X_dict[i] for i in valid_test_is]
        y_valid_test = y_array[valid_test_is]
        fe = feature_extractor.FeatureExtractor()
        fe.fit(X_valid_train_dict, y_valid_train)
        X_valid_train_array = fe.transform(X_valid_train_dict)
        X_valid_test_array = fe.transform(X_valid_test_dict)

        clf = classifier.Classifier()
        clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
        clf_c.fit(X_valid_train_array, y_valid_train)
        y_valid_pred = clf_c.predict(X_valid_test_array)
        y_valid_proba = clf_c.predict_proba(X_valid_test_array)
        #print y_valid_proba
        print 'accuracy = ', accuracy_score(y_valid_pred, y_valid_test)
开发者ID:ChristianArnault,项目名称:Ramp,代码行数:32,代码来源:user_test_model.py

示例8: print

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
print ("Train a XGBoost model")
params = {
    "objective": "binary:logistic",
    "eta": 0.2,  # used to be 0.2 or 0.1
    "max_depth": 7,  # used to be 5 or 6
    "min_child_weight": 1,
    "silent": 1,
    "colsample_bytree": 0.7,
    "seed": 1,
}
num_trees = 450  # used to be 300, 375 is better
gbm = xgb.train(params, xgb.DMatrix(train[features], train["signal"]), num_trees)
gbm = CalibratedClassifierCV(gbm, method="isotonic", cv=skf)

print "saving gbm prediction"
temp = pd.DataFrame({"id": test["id"], "prediction": gbm.predict(xgb.DMatrix(test[features]))})
temp.to_csv("parts/gbm.csv", index=False)


print ("Make predictions on the test set")
# test_probs = (0.35*rf.predict_proba(test[features])[:,1]) + (0.35*gbm.predict(xgb.DMatrix(test[features])))+(0.15*predskeras) + (0.15*fb_preds)
test_probs = (
    (0.24 * rf.predict_proba(test[features])[:, 1])
    + (0.3 * gbm.predict(xgb.DMatrix(test[features])))
    + (0.26 * predskeras)
    + (0.20 * fb_preds)
)  # is better
# test_probs = (0.25*rf.predict_proba(test[features])[:,1]) + (0.25*gbm.predict(xgb.DMatrix(test[features])))+(0.25*predskeras) + (0.25*fb_preds)
submission = pd.DataFrame({"id": test["id"], "prediction": test_probs})
submission.to_csv("predictions/benchmark_calibrated.csv", index=False)
开发者ID:ternaus,项目名称:kaggle_particles,代码行数:32,代码来源:benchmark_calibrated.py

示例9: main

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
def main(argv):
    # Change to parent directory to load data
    # os.chdir(os.path.pardir)
    X = np.load("data/X51.npy")
    Y = np.load("data/y51.npy")
    labels = np.load("data/LOO.npy")
    print(X.shape)
    # fixes errors with Nan data
    # 	X= preprocessing.Imputer().fit_transform(X)

    # Recursive oversampling and undersampling
    # adsn = ADASYN(imb_threshold=0.5,ratio=0.7)
    # X,Y = adsn.fit_transform(X,Y)
    # X,Y = adsn.fit_transform(X,Y)
    # X,Y = deleteClass(X,Y,100,2)

    # Grouping 5 classes to 3
    """for i in range(0,Y.shape[0]):
		if Y[i]==0 or Y[i]==1:
			Y[i]==0
		elif Y[i]==2:
			Y[i]=1
		else:
			Y[i]=2
"""
    print(Counter(Y))

    # Synthetic data is only to be used during training to
    # enhance recall of minority classes. New data are appended
    # as first rows of X,y

    size_b = X.shape[0]
    adsn = ADASYN(imb_threshold=0.5, ratio=0.7)
    X, Y = adsn.fit_transform(X, Y)
    size_a = X.shape[0]
    generated_samp = size_a - size_b

    newX = X[1:generated_samp]
    newY = Y[1:generated_samp]

    # Shuffling original data to ensure no time dependence
    realX, realY = shuffle(X[generated_samp:-1], Y[generated_samp:-1], random_state=0)
    realX, realY = shuffle(realX, realY, random_state=15)

    print("--------------")
    # appending real data after generated so that test set will not contain synthetic data
    allX = np.concatenate((newX, realX), axis=0)
    allY = np.concatenate((newY, realY), axis=0)

    X, Y = deleteClass(allX, allY, 200, 2)
    print(X.shape, Y.shape)

    # creating training set with synthetic data, test set only real data
    train = [i for i in range(0, int(0.7 * X.shape[0]))]
    test = [i for i in range(int(0.7 * X.shape[0]), X.shape[0])]
    print(Counter(Y))

    if sys.argv[1] == "-ensemble":
        RF = []
        outputRF = []
        outRFtest = []
        totalacc = 0
        totalRF = 0
        totalXGB = 0

        # Tests with all features / most important
        # feats =[0,1,2,3,4,5,6,7,13,16,22,23,24,25,26,27,29,30,31,32,33,35,38,39,40,41,44,46,47,50]
        # X = X[:,feats]
        print(X.shape, Y.shape)

        n_folds = 3
        skf = StratifiedKFold(Y, n_folds=n_folds)
        kf = KFold(X.shape[0], n_folds=n_folds, shuffle=True)
        for traini, testi in kf:
            print(len(traini), len(testi))

            # Although data is oversampled, still a small imbalance is present
            rfr = RandomForestClassifier(
                n_estimators=300,
                class_weight="auto",
                n_jobs=-1,
                criterion="entropy",
                max_features=X.shape[1],
                min_samples_split=1,
            )
            gbm = xgb.XGBClassifier(n_estimators=50, learning_rate=0.5, colsample_bytree=0.3).fit(X[traini], Y[traini])

            rfr.fit(X[traini], Y[traini])
            pred = rfr.predict(X[testi])
            pred1 = gbm.predict(X[testi])
            # Print to screen mean error and Tolerance Score
            tempacc, trueRF = tolAcc(Y[testi], pred)
            print("Random Forest: %s" % tempacc)

            tempacc1, trueXGB = tolAcc(Y[testi], pred1)
            print("XGBoost: %s" % tempacc1)
            totalXGB += trueXGB
            totalRF += trueRF
            totalacc += tempacc

#.........这里部分代码省略.........
开发者ID:Diwahars,项目名称:StudentLife-DataMining-ModelTraining,代码行数:103,代码来源:ensembleGROUP.py

示例10: mean_squared_error

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
print 'predict on training set'
score = mean_squared_error(target, fit.predict(training))
print score

try:
    os.mkdir('logs')
except:
    pass

#save score to log
fName = open(os.path.join('logs', method + '.log'), 'w')
print >> fName, 'mean squared error on the training set is: ' + str(score)
fName.close()

print 'predict on testing'
prediction = ccv.predict(test)
print 'saving prediction to file'
submission = pd.DataFrame(prediction)
submission.columns = ['units']

submission['units'] = submission['units'].apply(lambda x: math.exp(x) - 1)

def merge_data(df):
    return ''.join([str(df["store_nbr"]), "_", str(df["item_nbr"]), "_", df["date"]])

submission["id"] = test[["store_nbr", "item_nbr", "date"]].apply(merge_data, 1)

try:
    os.mkdir('predictions')
except:
    pass
开发者ID:ternaus,项目名称:kaggle_wallmart,代码行数:33,代码来源:sklearn_callibratedCV.py

示例11: print

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
                                  random_state=2014)

predictions = []
validations = []

print("\nTraining")
clf = GradientBoostingClassifier(n_estimators=2500, 
                                 learning_rate=0.026, 
                                 max_depth=2,
                                 random_state=2015)
                                 
cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
cal.fit(X,y)

pred = cal.predict_proba(tX)[:,1]
prednp = cal.predict(tX)

print("\nValidation for Calibrated GBM")
print(log_loss(ty, pred))
print(accuracy_score(ty, prednp))
print(roc_auc_score(ty, pred))

predictions.append(cal.predict_proba(testing)[:,1])
validations.append(prednp)

validt = sum(validations)/len(validations)
submit = sum(predictions)/len(predictions)

print("\nMake predictions and submission")
sample = pd.read_csv("sampleSubmission.csv")
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:32,代码来源:training_001.py

示例12: zip

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
    X_dict = df.drop(target_column_name, axis=1).to_dict(orient='records')
    vf_raw = pd.read_csv(vf_filename, index_col=0, compression='gzip')
    vf_dict = vf_raw.applymap(csv_array_to_float).to_dict(orient='records')
    X_dict = [merge_two_dicts(d_inst, v_inst) for d_inst, v_inst in zip(X_dict, vf_dict)]
    return X_dict, y_array

if __name__ == '__main__':
    print("Reading file ...")
    X_dict, y_array = read_data(train_filename, vf_train_filename)
    skf = StratifiedShuffleSplit(y_array, n_iter=2, test_size=0.5, random_state=57)
    print("Training file ...")
    for valid_train_is, valid_test_is in skf:
        X_valid_train_dict = [X_dict[i] for i in valid_train_is]
        y_valid_train = y_array[valid_train_is]
        X_valid_test_dict = [X_dict[i] for i in valid_test_is]
        y_valid_test = y_array[valid_test_is]
        fe = feature_extractor.FeatureExtractor()
        fe.fit(X_valid_train_dict, y_valid_train)
        X_valid_train_array = fe.transform(X_valid_train_dict)
        X_valid_test_array = fe.transform(X_valid_test_dict)
        print("extracted...")

        clf = classifier.Classifier()
        clf_c = CalibratedClassifierCV(clf, cv=2, method='isotonic')
        clf_c.fit(X_valid_train_array, y_valid_train)
        y_valid_pred = clf_c.predict(X_valid_test_array)
        y_valid_proba = clf_c.predict_proba(X_valid_test_array)
        #print y_valid_proba
        print('accuracy train=', accuracy_score(clf_c.predict(X_valid_train_array), y_valid_train))
        print 'accuracy test = ', accuracy_score(y_valid_pred, y_valid_test)
开发者ID:mehdidc,项目名称:astro-theano,代码行数:32,代码来源:user_test_model.py

示例13: train_test_split

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
# Split data into Train and hold out for model blending
X, X_holdout, y, y_holdout = train_test_split(X,y,test_size=0.25, random_state=22)

'''
This section of the code runs different moodels. Currently our tool is using logistic regression, Knnn
,Random Forests, and eXtreme Gradient Boosting.
Important consideration is to calibrate the posterior probabilities.
Certain algorithms give out under-confident predictions 
'''    

# Logistic regression
clf = LogisticRegression()
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
score = cross_val_score(calibrated_clf,X,y,cv=3,scoring="roc_auc").mean() 
calibrated_clf.fit(X,y)
roc_auc_score(y_holdout,calibrated_clf.predict(X_holdout))
ypred_lr = calibrated_clf.predict(test)

# Random Forest
clf = RandomForestClassifier(n_estimators=np.int(np.sqrt(.75*shape)),min_samples_split=20, n_jobs=-1)
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
calibrated_clf.fit(X, y)
roc_auc_score(y_holdout,calibrated_clf.predict(X_holdout))

ypred_rf = calibrated_clf.predict(test)

# KNN
clf = KNeighborsClassifier(n_neighbors = np.int(np.sqrt(.75*shape)))
calibrated_clf = CalibratedClassifierCV(clf, method='sigmoid', cv=3)
score = cross_val_score(calibrated_clf,X,y,cv=3,scoring="roc_auc").mean() 
calibrated_clf.fit(X,y)
开发者ID:pankaj077,项目名称:TI_work,代码行数:33,代码来源:AutoClassification.py

示例14: RF

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]

#.........这里部分代码省略.........
		space = (
			hp.quniform('n_estimators',100,1000,100),
			hp.quniform('max_depth', 4, 10, 1),
			hp.quniform('max_features', 0.1,1., 0.2)
		)
		trials = Trials()

		best_sln = fmin(objective, space, algo=tpe.suggest, max_evals=40,trials=trials)
		rinfo = trials.results
		df = pd.DataFrame(rinfo)

		df.to_csv('./tune.csv',index=False)
		print best_sln




	def do_submit(self, CALIB=False):


		if CALIB:

			tp = {}
			tp['base_estimator'] = self.model
			tp['method'] = 'sigmoid'
			tp['cv'] = 5
			self.model = CalibratedClassifierCV(**tp)

		X = self.data['train_x']
		y = self.data['train_y']
		test_x = self.data['test_x']
		test_id = self.data['test_id']
		self.model.fit(X, y)
		pre = self.model.predict_proba(test_x)
		logger.info(self.model)

		
		self.write_submission(test_id, pre, 'rf_test_l1.csv')



		#retrain

		kf = StratifiedKFold(y, 5, random_state=555)
		sub_col = handle_data.get_col()

		lr_train = pd.DataFrame(index=range(len(X)), columns=sub_col)

		index_cv = 0


		index_cv = 0


		for tr_ind, te_ind in kf:
			train_x  = X.iloc[tr_ind]
			train_y = y.iloc[tr_ind]

			test_x = X.iloc[te_ind]
			test_y = y.iloc[te_ind]
			self.model.fit(train_x, train_y)

			pred = self.model.predict_proba(test_x)

			lr_train.iloc[te_ind] = pred
			score = log_loss(test_y,pred,eps=1e-15, normalize=True)
开发者ID:hongguangguo,项目名称:kaggle_framework,代码行数:70,代码来源:rf.py

示例15: OneClassClassifierWrapper

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict [as 别名]
class OneClassClassifierWrapper(ClassifierWrapper):
    """Classifier."""

    def __init__(self,
                 program=SGDClassifier(average=True,
                                       class_weight='balanced',
                                       shuffle=True),
                 nu=0.5):
        """Construct."""
        super(OneClassClassifierWrapper, self).__init__(program)
        self.nu = nu

    def fit(self, graphs):
        """fit."""
        try:

            # make matrix
            data_matrix = vectorize(graphs,
                                    vectorizer=self.vectorizer,
                                    **self.params_vectorize)
            data_matrix_neg = data_matrix.multiply(-1)
            data_matrix_both = vstack([data_matrix, data_matrix_neg], format="csr")
            # make labels
            length = data_matrix.shape[0]
            y = [-1] * length + [1] * length
            y = np.ravel(y)
            # fit:
            estimator = self.program.fit(data_matrix_both, y)
            # moving intercept:

            scores = [estimator.decision_function(sparse_vector)[0]
                      for sparse_vector in data_matrix]
            scores_sorted = sorted(scores)
            pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
            estimator.intercept_ -= pivot

            # calibration:
            data_y = np.asarray([1 if score >= pivot else -1 for score in scores])
            self.program = CalibratedClassifierCV(estimator, method='sigmoid')
            self.program.fit(data_matrix, data_y)
            return self

        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)

    def predict(self, graphs):
        """predict.
        only overwrite is this:
        decision_function -> predict_proba

        graph.graph['score'] will be a (len 2) list
        """
        try:
            graphs, graphs_ = tee(graphs)
            data_matrix = vectorize(graphs_,
                                    vectorizer=self.vectorizer,
                                    **self.params_vectorize)
            predictions = self.program.predict(data_matrix)
            # scores = self.program.decision_function(data_matrix)
            scores = self.program.predict_proba(data_matrix)
            for score, prediction, graph in izip(scores, predictions, graphs):
                graph.graph['prediction'] = prediction
                graph.graph['score'] = score
                yield graph
        except Exception as e:
            logger.debug('Failed iteration. Reason: %s' % e)
            logger.debug('Exception', exc_info=True)
开发者ID:smautner,项目名称:EDeN,代码行数:70,代码来源:__init__.py


注:本文中的sklearn.calibration.CalibratedClassifierCV.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。