当前位置: 首页>>代码示例>>Python>>正文


Python CalibratedClassifierCV.predict_proba方法代码示例

本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.predict_proba方法的具体用法?Python CalibratedClassifierCV.predict_proba怎么用?Python CalibratedClassifierCV.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.calibration.CalibratedClassifierCV的用法示例。


在下文中一共展示了CalibratedClassifierCV.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_sample_weight_warning

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def test_sample_weight_warning():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        # LinearSVC does not currently support sample weights but they
        # can still be used for the calibration step (with a warning)
        msg = "LinearSVC does not support sample_weight."
        assert_warns_message(
            UserWarning, msg,
            calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:30,代码来源:test_calibration.py

示例2: calibrate_probs

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
    """ Calling from R:

        suppressMessages(library("rPython")) # Load RPython
        python.load("path/to/util_rpython.py")

        data.pred.calib <- python.call('calibrate_probs',
                                   y_val=y_val, # Actual values from validation
                                   prob_val=pred_val, # Predicted values from validation
                                   prob_test=pred_test) # Predicted values from test

        # data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
        calib_pred_val = data.pred.calib$val
        calib_pred_test = data.pred.calib$test

    """

    y_val = np.asarray(y_val, dtype=float)
    prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
    prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))

    prob_clb_val = np.zeros(len(y_val))
    prob_clb_test = np.zeros(len(prob_test))

    kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)

    for ix_train, ix_test in kf_val_full:
        kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
        clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
        clf.fit(prob_val[ix_train], y_val[ix_train])
        prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
        prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds

    return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
开发者ID:ChenglongChen,项目名称:avito_context_click_2015,代码行数:36,代码来源:util_rpython.py

示例3: simple_model

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def simple_model(data, test):
    targets = data.target
    X, tX, y, ty = train_test_split(data.drop("target", axis=1), 
                                              targets, 
                                              test_size=0.2,
                                              random_state=2016)
                                              
    
    predictions = []
    
    print("\n\nTraining")
    # Sklearn GBM
    clf = RandomForestClassifier(n_estimators=2500,  
                                 max_depth=2,
                                 random_state=2015)
                                     
    cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
    cal.fit(X,y)
    
    pred = cal.predict_proba(tX)[:,1]
    print("\n\tValidation for Calibrated RFC")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["gbm"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    predictions = sum(predictions)/len(predictions)
    
    return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:32,代码来源:rfc.py

示例4: train

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=29008, max_features=36,
            max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
            oob_score=False, random_state=979271, verbose=0,
            warm_start=False)

    clf.fit(train_x, train_y)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
    ccv.fit(valid_x,valid_y)

    valid_predictions = ccv.predict_proba(valid_x)
    test_predictions= ccv.predict_proba(test_x)

    loss = test(valid_y,valid_predictions,True)
    if  loss<0.52:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
开发者ID:hujiewang,项目名称:otto,代码行数:28,代码来源:rf2.py

示例5: main

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def main():
    X, Y, encoder, scale = load_train_data('train.csv')
    estimators = 500
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
    X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
    log.info('Loaded training file')
    X_test, _ = load_csv_file('test.csv', cut_end=False)
    log.info('Loaded test file')

    #Classifier Setup
    tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
                                    random_state=42, max_depth=55, min_samples_split=1)

    clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
    log.info('Fitting GradientBoost')
    clf.fit(X_train_real, Y_train_real)
    clf_probs = clf.predict_proba(X_test_real)
    score = log_loss(Y_test_real, clf_probs)
    log.info('Log Loss score un-trained = %f' % score)
    # Calibrate Classifier using ground truth in X,Y_valid
    sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
    log.info('Fitting CalibratedClassifierCV')
    sig_clf.fit(X_valid, Y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test_real)
    sig_score = log_loss(Y_test_real, sig_clf_probs)
    log.info('Log loss score trained = %f' % sig_score)

    # Ok lets predict the test data with our funky new classifier
    sig_submission_probs = sig_clf.predict_proba(X_test)

    write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:33,代码来源:main.py

示例6: simple_model

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def simple_model(data, test):
    targets = data.target
    X, tX, y, ty = train_test_split(data.drop("target", axis=1), 
                                              targets, 
                                              test_size=0.2,
                                              random_state=2016)
                                              
    
    predictions = []
    
    print("\n\nTraining")
    # Sklearn GBM
    clf = GradientBoostingClassifier(n_estimators=2500, 
                                     learning_rate=0.026, 
                                     max_depth=2,
                                     random_state=2015)
                                     
    cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
    cal.fit(X,y)
    
    pred = cal.predict_proba(tX)[:,1]
    print("\n\tValidation for Calibrated GBM")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["gbm"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    # XGBoost
    data = X.values
    label = y.values
    dtrain = xgb.DMatrix(data, label=label)
    
    datat = tX.values
    dtest = xgb.DMatrix(datat)
    
    param = {}
    param['objective'] = 'binary:logistic'
    param['eta'] = 0.1
    param['max_depth'] = 8
    param['eval_metric'] = 'auc'
    param['silent'] = 1
    param['min_child_weight'] = 2
    param['subsample'] = 0.5
    param['colsample_bytree'] = 0.5
    param['nthread'] = 4
    num_round = 50
    
    bst = xgb.train(param, dtrain, num_round)
    pred = bst.predict(dtest)
    print("\n\tValidation for XGBoost")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["xgb"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    predictions = sum(predictions)/len(predictions)
    
    return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:62,代码来源:training_003.py

示例7: train_test

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
    def train_test(self, X, y, X_test):
        """
        """
        sss = StratifiedShuffleSplit(y, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0, X1 = X[train_id], X[valid_id]
            y0, y1 = y[train_id], y[valid_id]  
            
        #First half
        
        w0 = np.zeros(len(y0))
        for i in range(len(w0)):
            w0[i] = self.w[int(y0[i])]
        xg0_train = DMatrix(X0, label=y0, weight=w0)  
        xg0_test = DMatrix(X1, label=y1)   
        xgt_test = DMatrix(X_test)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round)
        y0_pred = bst0.predict(xg0_test).reshape(X1.shape[0], 9)
        yt_pred = bst0.predict(xgt_test).reshape(X_test.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)
        cal.fit(X0, y0)
        y0_cal = cal.predict_proba(X1)
        yt_cal = cal.predict_proba(X_test)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yt_pred = ss.fit_transform(yt_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yt_cal = ss.fit_transform(yt_cal)
        X1 = np.hstack((X1, y0_pred, y0_cal))
        X_test = np.hstack((X_test, yt_pred, yt_cal))  
        w1 = np.zeros(len(y1))
        
#        self.param['eta'] = 0.01
        self.num_round = 450

        for i in range(len(w1)):
            w1[i] = self.w[int(y1[i])]
        xg1_train = DMatrix(X1, label=y1, weight=w1)    
        xg_test= DMatrix(X_test)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round)
        y_pred = bst1.predict(xg_test).reshape(X_test.shape[0], 9)
        
        return y_pred






                    
        
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:51,代码来源:clf_xgboost_split.py

示例8: train_validate

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
    def train_validate(self, X_train, y_train, X_valid, y_valid):
        """
        """
        sss = StratifiedShuffleSplit(y_train, 1, test_size=0.5)    
        for train_id, valid_id in sss:
            X0_train, X1_train = X_train[train_id], X_train[valid_id]
            y0_train, y1_train = y_train[train_id], y_train[valid_id]  
            
        #First half
       
        w0_train = np.zeros(len(y0_train))
        for i in range(len(w0_train)):
            w0_train[i] = self.w[int(y0_train[i])]
        xg0_train = DMatrix(X0_train, label=y0_train, weight=w0_train)  
        xg0_valid = DMatrix(X1_train, label=y1_train)   
        xgv_valid = DMatrix(X_valid, label=y_valid)
        watchlist = [(xg0_train,'train'), (xg0_valid, 'validation0')]
        
#        bst0 = train(self.param, xg0_train, self.num_round, watchlist)
        bst0 = my_train_xgboost(self.param, xg0_train, self.num_round, watchlist)
        y0_pred = bst0.predict(xg0_valid).reshape(X1_train.shape[0], 9)
        yv_pred = bst0.predict(xgv_valid).reshape(X_valid.shape[0], 9)
        
        #Calibrated RF
        rf = RandomForestClassifier(n_estimators=600, criterion='gini', 
                                    class_weight='auto', max_features='auto')
        cal = CalibratedClassifierCV(rf, method='isotonic', cv=3)        
        cal.fit(X0_train, y0_train)
        y0_cal = cal.predict_proba(X1_train)
        yv_cal = cal.predict_proba(X_valid)
        
        #Second half
        ss = StandardScaler()
        y0_pred = ss.fit_transform(y0_pred)
        yv_pred = ss.fit_transform(yv_pred)
        y0_cal = ss.fit_transform(y0_cal)
        yv_cal = ss.fit_transform(yv_cal)
        X1_train = np.hstack((X1_train, y0_pred, y0_cal))
        X_valid = np.hstack((X_valid, yv_pred, yv_cal))        
        w1_train = np.zeros(len(y1_train))
        
#        self.param['eta'] = 0.05
        self.num_round = 450

        for i in range(len(w1_train)):
            w1_train[i] = self.w[int(y1_train[i])]
        xg1_train = DMatrix(X1_train, label=y1_train, weight=w1_train)    
        xg_valid = DMatrix(X_valid, label=y_valid)
        watchlist = [(xg1_train,'train'), (xg_valid, 'validation')]
        
#        bst1 = train(self.param, xg1_train, self.num_round, watchlist)
        bst1 = my_train_xgboost(self.param, xg1_train, self.num_round, watchlist)
        y_pred = bst1.predict(xg_valid).reshape(X_valid.shape[0], 9)

#        pdb.set_trace()
        return y_pred
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:58,代码来源:clf_xgboost_split.py

示例9: test_calibration_multiclass

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def test_calibration_multiclass():
    """Test calibration for multiclass """
    # test multi-class setting with classifier that implements
    # only decision function
    clf = LinearSVC()
    X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                          centers=3, cluster_std=3.0)

    # Use categorical labels to check that CalibratedClassifierCV supports
    # them correctly
    target_names = np.array(['a', 'b', 'c'])
    y = target_names[y_idx]

    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf.fit(X_train, y_train)
    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
        cal_clf.fit(X_train, y_train)
        probas = cal_clf.predict_proba(X_test)
        assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))

        # Check that log-loss of calibrated classifier is smaller than
        # log-loss of naively turned OvR decision function to probabilities
        # via softmax
        def softmax(y_pred):
            e = np.exp(-y_pred)
            return e / e.sum(axis=1).reshape(-1, 1)

        uncalibrated_log_loss = \
            log_loss(y_test, softmax(clf.decision_function(X_test)))
        calibrated_log_loss = log_loss(y_test, probas)
        assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)

    # Test that calibration of a multiclass classifier decreases log-loss
    # for RandomForestClassifier
    X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
                      cluster_std=3.0)
    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X_train, y_train)
    clf_probs = clf.predict_proba(X_test)
    loss = log_loss(y_test, clf_probs)

    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
        cal_clf.fit(X_train, y_train)
        cal_clf_probs = cal_clf.predict_proba(X_test)
        cal_loss = log_loss(y_test, cal_clf_probs)
        assert_greater(loss, cal_loss)
开发者ID:abecadel,项目名称:scikit-learn,代码行数:55,代码来源:test_calibration.py

示例10: trainrf

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def trainrf(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(n_estimators=random.randint(50,5000),
                                 criterion='gini',
                                 max_depth=random.randint(10,1000),
                                 min_samples_split=random.randint(2,50),
                                 min_samples_leaf=random.randint(1,10),
                                 min_weight_fraction_leaf=random.uniform(0.0,0.5),
                                 max_features=random.uniform(0.1,1.0),
                                 max_leaf_nodes=random.randint(1,10),
                                 bootstrap=False,
                                 oob_score=False,
                                 n_jobs=30,
                                 random_state=random_state,
                                 verbose=0,
                                 warm_start=True,
                                 class_weight=None
                )

    clf.fit(train_x, train_y)

    valid_predictions1 = clf.predict_proba(valid_x)
    test_predictions1= clf.predict_proba(test_x)

    t1 = test(valid_y,valid_predictions1)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv='prefit')
    ccv.fit(valid_x,valid_y)

    valid_predictions2 = ccv.predict_proba(valid_x)
    test_predictions2= ccv.predict_proba(test_x)

    t2 = test(valid_y,valid_predictions2)

    if t2<t1:
        valid_predictions=valid_predictions2
        test_predictions=test_predictions2
        t=t2
    else:
        valid_predictions=valid_predictions1
        test_predictions=test_predictions1
        t=t1

    if t < 0.450:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
开发者ID:hujiewang,项目名称:otto,代码行数:53,代码来源:com.py

示例11: internal_processing

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
    def internal_processing(self, X, y, X_test):
        """
        """  
        Xs = np.hsplit(X, 5)
        Xts = np.hsplit(X_test, 5)
        Xts_cal = []
        
        for i in range(len(Xs)):           
            Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
         
        XX_test = np.hstack(Xts_cal)   
        
        ec = EC(n_preds=5)
        ec.fit(X, y)
        y_ens = ec.predict_proba(XX_test)
#        y_pred = ec.predict_proba(X_test)
        
        #validation
        yv = ec.predict_proba(X)
        print 'Weights: %s' %(ec.w)
        print 'Validation log-loss: %s' %(logloss_mc(y, yv))
        
        cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5), 
                                    method='isotonic', cv=10)
                                    
        cc.fit(X, y)
        y_cal = cc.predict_proba(XX_test)
        
        y_pred = (y_ens + y_cal)/2.
         
        return y_pred       
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:33,代码来源:ens_opt_cal.py

示例12: setTrainTestDataAndCheckModel

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
    model = RandomForestClassifier(125)
    model.fit(X_train,Y_train)
    '''
    clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
    
    clf.fit(X_train,Y_train)
    print(clf.best_score_)
    print(clf.best_params_)    
    
    output = model.predict(X_test)
    print "-------------------RFC-----------------------"
    #print accuracy_score(Y_test,output)
    #print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
    
    ypreds = model.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)

    
    clfbag = BaggingClassifier(model, n_estimators=5)
    clfbag.fit(X_train, Y_train)
    ypreds = clfbag.predict(X_test)    
    #print accuracy_score(Y_test,ypreds)    
    
    ypreds = clfbag.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
    '''
    calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    #ypreds = calibrated_clf.predict(X_test)
    #print accuracy_score(Y_test,ypreds)
    
    ypreds = calibrated_clf.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:36,代码来源:randomforestclassifier.py

示例13: setTrainDataAndMakeModel

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def setTrainDataAndMakeModel(X_train,Y_train,X_test):
    clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
    calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    ypreds = calibrated_clf.predict_proba(X_test)    
    return ypreds
    
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:8,代码来源:MultinomialNB.py

示例14: calibrate_proba_fitted_models

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def calibrate_proba_fitted_models(iDf, iFeatures, iModelsDict):
    iCalibratedModelsDict = {}

    for model_name in iModelsDict.keys():
        target = model_name.replace('_gbr', '').replace('_rf', '')
        proba_cal_sig = CalibratedClassifierCV(iModelsDict[model_name], method='sigmoid', cv='prefit')
        proba_cal_iso = CalibratedClassifierCV(iModelsDict[model_name], method='isotonic', cv='prefit')
        proba_cal_sig.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        proba_cal_iso.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        brier_sig = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_sig.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])
        brier_iso = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_iso.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])

        if brier_sig <= brier_iso:
            iCalibratedModelsDict[model_name] = proba_cal_sig.calibrated_classifiers_
        else:
            iCalibratedModelsDict[model_name] = proba_cal_iso.calibrated_classifiers_
    return iCalibratedModelsDict
开发者ID:Fanchouille,项目名称:Speculoos,代码行数:21,代码来源:StockModels.py

示例15: main

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import predict_proba [as 别名]
def main():
    X, Y = load_csv_file('train.csv')
    estimators = 1000
    test_size = 0.05
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=test_size, random_state=0)
    X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=test_size, random_state=42)
    log.info('Loaded training file')
    X_test, _ = load_csv_file('test.csv', cut_end=False)
    log.info('Loaded test file')

    #Classifier Setup
    logistic = linear_model.LogisticRegression()
    rbm = BernoulliRBM(random_state=0, verbose=True)
    tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
                                    random_state=0, max_depth=None)

    rbm.learning_rate = 0.06
    rbm.n_iter = 20
    rbm.n_components = 500
    logistic.C = 6000.0

    pipeline = make_pipeline(tree_clf, rbm, logistic)
    #clf = GridSearchCV(pipeline, param_grid, n_jobs=-1, verbose=1)
    clf = pipeline
    log.info('Fitting Boltzman with %s' str([name for name, _ in pipeline.steps]))
    clf.fit(X_train_real, Y_train_real)
    clf_probs = clf.predict_proba(X_test_real)
    score = log_loss(Y_test_real, clf_probs)
    log.info('Log Loss score un-trained = %f' % score)

    # Calibrate Classifier using ground truth in X,Y_valid
    sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
    log.info('Fitting CalibratedClassifierCV')
    sig_clf.fit(X_valid, Y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test_real)
    sig_score = log_loss(Y_test_real, sig_clf_probs)
    log.info('Log loss score trained = %f' % sig_score)

    # Ok lets predict the test data with our funky new classifier
    sig_submission_probs = sig_clf.predict_proba(X_test)

    write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:44,代码来源:boltzman.py


注:本文中的sklearn.calibration.CalibratedClassifierCV.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。