当前位置: 首页>>代码示例>>Python>>正文


Python CalibratedClassifierCV.fit方法代码示例

本文整理汇总了Python中sklearn.calibration.CalibratedClassifierCV.fit方法的典型用法代码示例。如果您正苦于以下问题:Python CalibratedClassifierCV.fit方法的具体用法?Python CalibratedClassifierCV.fit怎么用?Python CalibratedClassifierCV.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.calibration.CalibratedClassifierCV的用法示例。


在下文中一共展示了CalibratedClassifierCV.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: setTrainDataAndMakeModel

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def setTrainDataAndMakeModel(X_train,Y_train,X_test):
    clf = MultinomialNB(alpha=125535, class_prior=None, fit_prior=True)
    calibrated_clf = CalibratedClassifierCV(clf, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    ypreds = calibrated_clf.predict_proba(X_test)    
    return ypreds
    
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:8,代码来源:MultinomialNB.py

示例2: simple_model

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def simple_model(data, test):
    targets = data.target
    X, tX, y, ty = train_test_split(data.drop("target", axis=1), 
                                              targets, 
                                              test_size=0.2,
                                              random_state=2016)
                                              
    
    predictions = []
    
    print("\n\nTraining")
    # Sklearn GBM
    clf = GradientBoostingClassifier(n_estimators=2500, 
                                     learning_rate=0.026, 
                                     max_depth=2,
                                     random_state=2015)
                                     
    cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
    cal.fit(X,y)
    
    pred = cal.predict_proba(tX)[:,1]
    print("\n\tValidation for Calibrated GBM")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["gbm"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    # XGBoost
    data = X.values
    label = y.values
    dtrain = xgb.DMatrix(data, label=label)
    
    datat = tX.values
    dtest = xgb.DMatrix(datat)
    
    param = {}
    param['objective'] = 'binary:logistic'
    param['eta'] = 0.1
    param['max_depth'] = 8
    param['eval_metric'] = 'auc'
    param['silent'] = 1
    param['min_child_weight'] = 2
    param['subsample'] = 0.5
    param['colsample_bytree'] = 0.5
    param['nthread'] = 4
    num_round = 50
    
    bst = xgb.train(param, dtrain, num_round)
    pred = bst.predict(dtest)
    print("\n\tValidation for XGBoost")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["xgb"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    predictions = sum(predictions)/len(predictions)
    
    return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:62,代码来源:training_003.py

示例3: svc_test2

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def svc_test2():
    """
    Submission:
    E_val:
    E_in:
    E_out:
    """
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import SVC
    from sklearn.cross_validation import StratifiedKFold
    from sklearn.calibration import CalibratedClassifierCV

    X, y = dataset.load_train()

    raw_scaler = StandardScaler()
    raw_scaler.fit(X)
    X_scaled = raw_scaler.transform(X)

    svc = SVC(kernel='linear', class_weight='auto', cache_size=10240)
    svc.fit(X_scaled, y)

    isotonic = CalibratedClassifierCV(svc, cv=StratifiedKFold(y, 5),
                                      method='isotonic')
    isotonic.fit(X_scaled, y)

    logger.debug('Got best isotonic CalibratedClassifier.')
    logger.debug('E_in (isotonic): %f', Util.auc_score(isotonic, X_scaled, y))
开发者ID:Divergent914,项目名称:yakddcup2015,代码行数:29,代码来源:modeling.py

示例4: train_model_rfc_calibrated

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def train_model_rfc_calibrated (features, labels) :
	# First, set aside a some of the training set for calibration
	# Use stratified shuffle split so that class ratios are maintained after the split
	splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = 0.7, random_state = 30)

	# Length is 1 in this case since we have a single fold for splitting
	print (len(splitter))

	for train_idx, calib_idx in splitter:
		features_train, features_calib = features[train_idx], features[calib_idx]
		labels_train, labels_calib = labels[train_idx], labels[calib_idx]

	print ("features_train shape: ", features_train.shape)
	print ("features_calib shape: ", features_calib.shape)
	print ("labels_train shape: ", labels_train.shape)
	print ("labels_calib shape: ", labels_calib.shape)
		
	print ("Performing Grid Search ...")
	# params_dict = {'criterion': ['entropy'], 'n_estimators':[30, 35, 40, 45], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10]}
	params_dict = {'criterion': ['entropy'], 'n_estimators':[60, 70, 80, 90], 'max_depth':[5, 6], 'min_samples_leaf': [1, 2, 5], 'min_samples_split': [2, 5, 10], 'max_features' : [6, 7, 8]}
	clf = GridSearchCV(rfc(random_state = 30, n_jobs = 4), params_dict, scoring = 'roc_auc', cv = 5)
	clf.fit(features_train, labels_train)

	print ("Best estimator: ", clf.best_estimator_)
	print ("Best best scores: %.4f" %(clf.best_score_))
	# print ("Best grid scores: ", clf.grid_scores_)

	# Perform calibration 
	# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
	print ("Performing Calibration now ...")
	sigmoid = CalibratedClassifierCV(clf, cv='prefit', method='sigmoid')
	sigmoid.fit(features_calib, labels_calib)
	return sigmoid
开发者ID:sathishrvijay,项目名称:Kaggle-HumanVsRobot,代码行数:35,代码来源:classifier_exp.py

示例5: main

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def main():
    X, Y, encoder, scale = load_train_data('train.csv')
    estimators = 500
    X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.2, random_state=0)
    X_train_real, X_test_real, Y_train_real, Y_test_real = train_test_split(X_train, Y_train, test_size=0.2, random_state=42)
    log.info('Loaded training file')
    X_test, _ = load_csv_file('test.csv', cut_end=False)
    log.info('Loaded test file')

    #Classifier Setup
    tree_clf = ExtraTreesClassifier(n_estimators=estimators, n_jobs=-1,
                                    random_state=42, max_depth=55, min_samples_split=1)

    clf = make_pipeline(TfidfTransformer(), DenseTransformer(), tree_clf)
    log.info('Fitting GradientBoost')
    clf.fit(X_train_real, Y_train_real)
    clf_probs = clf.predict_proba(X_test_real)
    score = log_loss(Y_test_real, clf_probs)
    log.info('Log Loss score un-trained = %f' % score)
    # Calibrate Classifier using ground truth in X,Y_valid
    sig_clf = CalibratedClassifierCV(clf, method="isotonic", cv="prefit")
    log.info('Fitting CalibratedClassifierCV')
    sig_clf.fit(X_valid, Y_valid)
    sig_clf_probs = sig_clf.predict_proba(X_test_real)
    sig_score = log_loss(Y_test_real, sig_clf_probs)
    log.info('Log loss score trained = %f' % sig_score)

    # Ok lets predict the test data with our funky new classifier
    sig_submission_probs = sig_clf.predict_proba(X_test)

    write_out_submission(sig_submission_probs, 'submission.csv')
开发者ID:Almclean,项目名称:otto-group,代码行数:33,代码来源:main.py

示例6: train

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def train(model_id,train_x,train_y,valid_x,valid_y,test_x):
    train_x,train_y=shuffle(train_x,train_y)


    random_state=random.randint(0, 1000000)
    print('random state: {state}'.format(state=random_state))

    clf = RandomForestClassifier(bootstrap=False, class_weight=None,
            criterion='entropy', max_depth=29008, max_features=36,
            max_leaf_nodes=None, min_samples_leaf=5, min_samples_split=3,
            min_weight_fraction_leaf=0.0, n_estimators=4494, n_jobs=8,
            oob_score=False, random_state=979271, verbose=0,
            warm_start=False)

    clf.fit(train_x, train_y)

    ccv = CalibratedClassifierCV(base_estimator=clf,method="sigmoid",cv="prefit")
    ccv.fit(valid_x,valid_y)

    valid_predictions = ccv.predict_proba(valid_x)
    test_predictions= ccv.predict_proba(test_x)

    loss = test(valid_y,valid_predictions,True)
    if  loss<0.52:
        data.saveData(valid_predictions,"../valid_results/valid_"+str(model_id)+".csv")
        data.saveData(test_predictions,"../results/results_"+str(model_id)+".csv")
开发者ID:hujiewang,项目名称:otto,代码行数:28,代码来源:rf2.py

示例7: internal_processing

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
    def internal_processing(self, X, y, X_test):
        """
        """  
        Xs = np.hsplit(X, 5)
        Xts = np.hsplit(X_test, 5)
        Xts_cal = []
        
        for i in range(len(Xs)):           
            Xts_cal.append(calibrate(Xs[i], y, Xts[i]))
         
        XX_test = np.hstack(Xts_cal)   
        
        ec = EC(n_preds=5)
        ec.fit(X, y)
        y_ens = ec.predict_proba(XX_test)
#        y_pred = ec.predict_proba(X_test)
        
        #validation
        yv = ec.predict_proba(X)
        print 'Weights: %s' %(ec.w)
        print 'Validation log-loss: %s' %(logloss_mc(y, yv))
        
        cc = CalibratedClassifierCV(base_estimator=EC(n_preds=5), 
                                    method='isotonic', cv=10)
                                    
        cc.fit(X, y)
        y_cal = cc.predict_proba(XX_test)
        
        y_pred = (y_ens + y_cal)/2.
         
        return y_pred       
开发者ID:chrinide,项目名称:kaggle_otto_group,代码行数:33,代码来源:ens_opt_cal.py

示例8: setTrainTestDataAndCheckModel

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def setTrainTestDataAndCheckModel(X_train,Y_train,X_test,Y_test):
    model = RandomForestClassifier(125)
    model.fit(X_train,Y_train)
    '''
    clf = GridSearchCV(model,{'n_estimators':[100,125,150]},verbose=1)
    
    clf.fit(X_train,Y_train)
    print(clf.best_score_)
    print(clf.best_params_)    
    
    output = model.predict(X_test)
    print "-------------------RFC-----------------------"
    #print accuracy_score(Y_test,output)
    #print "%.2f" % log_loss(Y_test,output, eps=1e-15, normalize=True)
    
    ypreds = model.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)

    
    clfbag = BaggingClassifier(model, n_estimators=5)
    clfbag.fit(X_train, Y_train)
    ypreds = clfbag.predict(X_test)    
    #print accuracy_score(Y_test,ypreds)    
    
    ypreds = clfbag.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test,ypreds, eps=1e-15, normalize=True)
    '''
    calibrated_clf = CalibratedClassifierCV(model, method='isotonic', cv=5)
    calibrated_clf.fit(X_train, Y_train)
    #ypreds = calibrated_clf.predict(X_test)
    #print accuracy_score(Y_test,ypreds)
    
    ypreds = calibrated_clf.predict_proba(X_test)
    print "%.2f" % log_loss(Y_test, ypreds, eps=1e-15, normalize=True)
开发者ID:gokul180288,项目名称:Kaggle-1,代码行数:36,代码来源:randomforestclassifier.py

示例9: simple_model

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def simple_model(data, test):
    targets = data.target
    X, tX, y, ty = train_test_split(data.drop("target", axis=1), 
                                              targets, 
                                              test_size=0.2,
                                              random_state=2016)
                                              
    
    predictions = []
    
    print("\n\nTraining")
    # Sklearn GBM
    clf = RandomForestClassifier(n_estimators=2500,  
                                 max_depth=2,
                                 random_state=2015)
                                     
    cal = CalibratedClassifierCV(clf, cv=5, method="isotonic")
    cal.fit(X,y)
    
    pred = cal.predict_proba(tX)[:,1]
    print("\n\tValidation for Calibrated RFC")
    print("\t", log_loss(ty, pred))
    print("\t", roc_auc_score(ty, pred))
    
    # ens["gbm"] = pred
    predictions.append(cal.predict_proba(test)[:,1])
    
    predictions = sum(predictions)/len(predictions)
    
    return predictions
开发者ID:leonardodaniel,项目名称:kaggle_mosco,代码行数:32,代码来源:rfc.py

示例10: calibrate_probs

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def calibrate_probs(y_val, prob_val, prob_test, n_folds=2, method='isotonic', random_state=5968):
    """ Calling from R:

        suppressMessages(library("rPython")) # Load RPython
        python.load("path/to/util_rpython.py")

        data.pred.calib <- python.call('calibrate_probs',
                                   y_val=y_val, # Actual values from validation
                                   prob_val=pred_val, # Predicted values from validation
                                   prob_test=pred_test) # Predicted values from test

        # data.pred.calib will be a list, so to get the calibrated predictions for each value we do:
        calib_pred_val = data.pred.calib$val
        calib_pred_test = data.pred.calib$test

    """

    y_val = np.asarray(y_val, dtype=float)
    prob_val = np.asarray(prob_val, dtype=float).reshape((-1, 1))
    prob_test = np.asarray(prob_test, dtype=float).reshape((-1, 1))

    prob_clb_val = np.zeros(len(y_val))
    prob_clb_test = np.zeros(len(prob_test))

    kf_val_full = KFold(len(y_val), n_folds=n_folds, random_state=random_state)

    for ix_train, ix_test in kf_val_full:
        kf_val_inner = KFold(len(ix_train), n_folds=n_folds, random_state=random_state)
        clf = CalibratedClassifierCV(method=method, cv=kf_val_inner)
        clf.fit(prob_val[ix_train], y_val[ix_train])
        prob_clb_val[ix_test] = clf.predict_proba(prob_val[ix_test])[:, 1]
        prob_clb_test += clf.predict_proba(prob_test)[:, 1]/n_folds

    return {'val': list(prob_clb_val), 'test': list(prob_clb_test)}
开发者ID:ChenglongChen,项目名称:avito_context_click_2015,代码行数:36,代码来源:util_rpython.py

示例11: test_sample_weight_warning

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def test_sample_weight_warning():
    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    sample_weight = np.random.RandomState(seed=42).uniform(size=len(y))
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_test = X[n_samples:]

    for method in ['sigmoid', 'isotonic']:
        base_estimator = LinearSVC(random_state=42)
        calibrated_clf = CalibratedClassifierCV(base_estimator, method=method)
        # LinearSVC does not currently support sample weights but they
        # can still be used for the calibration step (with a warning)
        msg = "LinearSVC does not support sample_weight."
        assert_warns_message(
            UserWarning, msg,
            calibrated_clf.fit, X_train, y_train, sample_weight=sw_train)
        probs_with_sw = calibrated_clf.predict_proba(X_test)

        # As the weights are used for the calibration, they should still yield
        # a different predictions
        calibrated_clf.fit(X_train, y_train)
        probs_without_sw = calibrated_clf.predict_proba(X_test)

        diff = np.linalg.norm(probs_with_sw - probs_without_sw)
        assert_greater(diff, 0.1)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:30,代码来源:test_calibration.py

示例12: calibrate

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
def calibrate(X_val, y_val, estimator):

    clf = CalibratedClassifierCV(base_estimator=estimator, 
                                method='isotonic', cv='prefit')

    clf.fit(X_val, y_val)
    return clf
开发者ID:fnd212,项目名称:ML2016_EDU,代码行数:9,代码来源:model_calibration.py

示例13: move_bias

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
    def move_bias(self, data_matrix, estimator=None, nu=.5, cv=2):
        '''
            move bias until nu of data_matrix are in the negative class
            then use scikits calibrate to calibrate self.estimator around the input
        '''
        #  move bias
        # l = [(estimator.decision_function(g)[0], g) for g in data_matrix]
        # l.sort(key=lambda x: x[0])
        # element = int(len(l) * nu)
        # estimator.intercept_ -= l[element][0]

        scores = [estimator.decision_function(sparse_vector)[0]
                  for sparse_vector in data_matrix]
        scores_sorted = sorted(scores)
        pivot = scores_sorted[int(len(scores_sorted) * self.nu)]
        estimator.intercept_ -= pivot

        # calibrate
        if self.move_bias_recalibrate:
            # data_matrix_binary = vstack([a[1] for a in l])
            # data_y = numpy.asarray([0] * element + [1] * (len(l) - element))
            data_y = numpy.asarray([1 if score >= pivot else -1 for score in scores])
            self.testimator = SGDClassifier(loss='log')
            self.testimator.fit(data_matrix, data_y)
            # estimator = CalibratedClassifierCV(estimator, cv=cv, method='sigmoid')
            estimator = CalibratedClassifierCV(self.testimator, cv=cv, method='sigmoid')
            estimator.fit(data_matrix, data_y)
        return estimator
开发者ID:smautner,项目名称:GraphLearn,代码行数:30,代码来源:estimate.py

示例14: get_score

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
    def get_score(self, params):
        params['n_estimators'] = int(params['n_estimators'])
        params['max_depth'] = int(params['max_depth'])
        params['min_samples_split'] = int(params['min_samples_split'])
        params['min_samples_leaf'] = int(params['min_samples_leaf'])
        params['n_estimators'] = int(params['n_estimators'])

        print('Training with params:')
        print(params)

        # cross validation here
        scores = []
        for train_ix, test_ix in makeKFold(5, self.y, 1):
            X_train, y_train = self.X[train_ix, :], self.y[train_ix]
            X_test, y_test = self.X[test_ix, :], self.y[test_ix]
            weight = y_train.shape[0] / (2 * np.bincount(y_train))
            sample_weight = np.array([weight[i] for i in y_train])

            clf = RandomForestClassifier(**params)
            cclf = CalibratedClassifierCV(base_estimator=clf,
                                          method='isotonic',
                                          cv=makeKFold(3, y_train, 1))
            cclf.fit(X_train, y_train, sample_weight)
            pred = cclf.predict(X_test)
            scores.append(f1_score(y_true=y_test, y_pred=pred))

        print(scores)
        score = np.mean(scores)

        print(score)
        return {'loss': -score, 'status': STATUS_OK}
开发者ID:jingxiang-li,项目名称:kaggle-yelp,代码行数:33,代码来源:level3_model_rf.py

示例15: prepare_model

# 需要导入模块: from sklearn.calibration import CalibratedClassifierCV [as 别名]
# 或者: from sklearn.calibration.CalibratedClassifierCV import fit [as 别名]
 def prepare_model(self, obj_fn=None, num_steps=None, model_params=None, batch_size: int = None):
     model = CalibratedClassifierCV(KNeighborsClassifier(**model_params), method="sigmoid")
     model_clf = model.fit(self.ds[self.data_groups["data_train_group"]].to_ndarray(),
                           self.ds[self.data_groups["target_train_group"]].to_ndarray())
     cal_model = CalibratedClassifierCV(model_clf, method="sigmoid", cv="prefit")
     cal_model.fit(self.ds[self.data_groups["data_validation_group"]].to_ndarray(),
                   self.ds[self.data_groups["target_validation_group"]].to_ndarray())
     return self.ml_model(cal_model)
开发者ID:elaeon,项目名称:ML,代码行数:10,代码来源:w_sklearn.py


注:本文中的sklearn.calibration.CalibratedClassifierCV.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。