当前位置: 首页>>代码示例>>Python>>正文


Python metrics.brier_score_loss函数代码示例

本文整理汇总了Python中sklearn.metrics.brier_score_loss函数的典型用法代码示例。如果您正苦于以下问题:Python brier_score_loss函数的具体用法?Python brier_score_loss怎么用?Python brier_score_loss使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了brier_score_loss函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: check_calibration

def check_calibration(method):
    # Adpated from sklearn/tests/test_calibration.py
    # Authors: Alexandre Gramfort
    # License: BSD 3 clause

    n_samples = 100
    X, y = make_classification(n_samples=2 * n_samples, n_features=6,
                               random_state=42)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train = X[:n_samples], y[:n_samples]
    X_test, y_test = X[n_samples:], y[n_samples:]

    # Naive-Bayes
    clf = MultinomialNB().fit(X_train, y_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    pc_clf = CalibratedClassifierCV(clf, cv=y.size + 1)
    assert_raises(ValueError, pc_clf.fit, X, y)

    pc_clf = CalibratedClassifierCV(clf, method=method, cv=2)
    # Note that this fit overwrites the fit on the entire training set
    pc_clf.fit(X_train, y_train)
    prob_pos_pc_clf = pc_clf.predict_proba(X_test)[:, 1]

    # Check that brier score has improved after calibration
    assert_greater(brier_score_loss(y_test, prob_pos_clf),
                   brier_score_loss(y_test, prob_pos_pc_clf))

    # Check invariance against relabeling [0, 1] -> [1, 2]
    pc_clf.fit(X_train, y_train + 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf,
                              prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [-1, 1]
    pc_clf.fit(X_train, 2 * y_train - 1)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    assert_array_almost_equal(prob_pos_pc_clf,
                              prob_pos_pc_clf_relabeled)

    # Check invariance against relabeling [0, 1] -> [1, 0]
    pc_clf.fit(X_train, (y_train + 1) % 2)
    prob_pos_pc_clf_relabeled = pc_clf.predict_proba(X_test)[:, 1]
    if method == "sigmoid":
        assert_array_almost_equal(prob_pos_pc_clf,
                                  1 - prob_pos_pc_clf_relabeled)
    else:
        # Isotonic calibration is not invariant against relabeling
        # but should improve in both cases
        assert_greater(brier_score_loss(y_test, prob_pos_clf),
                       brier_score_loss((y_test + 1) % 2,
                                        prob_pos_pc_clf_relabeled))
开发者ID:joshloyal,项目名称:carl,代码行数:55,代码来源:test_calibration.py

示例2: test_brier_score_loss

def test_brier_score_loss():
    """Check brier_score_loss function"""
    y_true = np.array([0, 1, 1, 0, 1, 1])
    y_pred = np.array([0.1, 0.8, 0.9, 0.3, 1., 0.95])
    true_score = linalg.norm(y_true - y_pred) ** 2 / len(y_true)

    assert_almost_equal(brier_score_loss(y_true, y_true), 0.0)
    assert_almost_equal(brier_score_loss(y_true, y_pred), true_score)
    assert_almost_equal(brier_score_loss(1. + y_true, y_pred),
                        true_score)
    assert_almost_equal(brier_score_loss(2 * y_true - 1, y_pred),
                        true_score)
    assert_raises(ValueError, brier_score_loss, y_true, y_pred[1:])
    assert_raises(ValueError, brier_score_loss, y_true, y_pred + 1.)
    assert_raises(ValueError, brier_score_loss, y_true, y_pred - 1.)
开发者ID:nateyoder,项目名称:scikit-learn,代码行数:15,代码来源:test_classification.py

示例3: process

    def process(self):
        # 读取数据
        data = pd.DataFrame.from_csv(self.parameters['ex'])
        self.y_score = data[['pre_below', 'pre_normal', 'pre_above']]
        self.y_true = data[['obs_below', 'obs_normal', 'obs_above']]
        # 绘图
        fpr = dict()  # False Positive Rate
        tpr = dict()  # True Positive Rate
        roc_auc = dict() #ROC AREA UNDER CURVE
        bs = dict() #Brier Score Loss
        # turn off the interactive mode
        plt.clf()

        fpr[self.parameters['index']], tpr[self.parameters['index']], _ = metrics.roc_curve(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])
        roc_auc[self.parameters['index']] = metrics.roc_auc_score(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])
        bs[self.parameters['index']] = metrics.brier_score_loss(self.y_true.ix[:, self.parameters['index']], self.y_score.ix[:, self.parameters['index']])

        if self.args.verbose:
            print("====False Positive Ratio(fpr) And True Positive Ratio(tpr) Pair====")
            for idx,val in enumerate(fpr[self.parameters['index']]):
                print(idx,val,fpr[self.parameters['index']][idx])
        plt.plot(fpr[self.parameters['index']], tpr[self.parameters['index']],label='Num:%d,AUC: %0.2f,BS: %0.2f' \
                %(self.y_true.shape[0], roc_auc[self.parameters['index']],bs[self.parameters['index']]))
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlim([0.0, 1.05])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(self.args.title[0] if self.args.title else 'Receiver Operating Characteristic(ROC)')
        plt.legend(loc="lower right")
        print('Saving image to {}'.format(self.parameters['name']))
        plt.savefig(self.parameters['name'])
        print('Completely Finshed.')
开发者ID:bazingaedwaqrd,项目名称:MODES,代码行数:33,代码来源:ROC.py

示例4: plot_calibration_curve

def plot_calibration_curve(est, name, fig_index):
    """Plot calibration curve for est w/o and with calibration. """
    # Calibrated with isotonic calibration
    isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

    # Calibrated with ROC convex hull calibration
    rocch = CalibratedClassifierCV(est, cv=2, method='rocch')

    # Logistic regression with no calibration as baseline
    lr = LogisticRegression(C=1., solver='lbfgs')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(lr, 'Logistic'),
                      (est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid'),
                      (rocch, name + ' + ROCConvexHull')]:
        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        if hasattr(clf, "predict_proba"):
            prob_pos = clf.predict_proba(X_test)[:, 1]
        else:  # use decision function
            prob_pos = clf.decision_function(X_test)
            prob_pos = \
                (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min())

        clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
        print("%s:" % name)
        print("\tBrier: %1.4f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_test, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_test, y_pred))
        print("\tF1: %1.3f" % f1_score(y_test, y_pred))
        print("\tAuc: %1.4f\n" % roc_auc_score(y_test, prob_pos))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_test, prob_pos, n_bins=10)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
                 label="%s (%1.4f)" % (name, clf_score))

        ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
                 histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
开发者ID:albahnsen,项目名称:scikit-learn,代码行数:60,代码来源:plot_calibration_curve.py

示例5: calibration_curve_plotter

def calibration_curve_plotter(y_test, prob_pos, n_bins=10):

    brier = brier_score_loss(y_test, prob_pos, pos_label=1)

    fig = plt.figure(0, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    df = pd.DataFrame({"true": y_test})
    bins = np.linspace(0.0, 1.0, n_bins + 1)
    binids = np.digitize(prob_pos, bins) - 1
    df["Bin center"] = bins[binids] + 0.5 / n_bins
    df[""] = "Model calibration: (%1.5f)" % brier
    o = bins + 0.5 / n_bins

    df2 = pd.DataFrame({"true": o, "Bin center": o})
    df2[""] = "Perfect calibration"

    df = pd.concat([df, df2])

    sns.pointplot(x="Bin center", y="true", data=df, order=o, hue="", ax=ax1)

    ax2.hist(prob_pos, range=(0, 1), bins=10, label="Model", histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    # ax1.legend(loc="lower right")
    ax1.set_title("Calibration plots")

    ax2.set_xlabel("Predicted Probability")
    ax2.set_ylabel("Count")

    plt.tight_layout()
开发者ID:ewulczyn,项目名称:wiki-detox,代码行数:33,代码来源:ngram.py

示例6: plot_probability_calibration_curves

 def plot_probability_calibration_curves(self):
 
     """ Compute true and predicted probabilities for a calibration plot 
         fraction_of_positives - The true probability in each bin (fraction of positives).
         mean_predicted_value - The mean predicted probability in each bin.
     """
     
     fig = plt.figure()
     ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
     ax2 = plt.subplot2grid((3, 1), (2, 0), rowspan=2)
     
     ax1.set_ylabel("Fraction of positives")
     ax1.set_ylim([-0.05, 1.05])
     ax1.legend(loc="lower right")
     ax1.set_title('Calibration plots  (reliability curve) ' + self.description)
 
     ax2.set_xlabel("Mean predicted value")
     ax2.set_ylabel("Count")
     ax2.legend(loc="upper center", ncol=2)
     
     clf_score = brier_score_loss(self.y_true, self.y_pred, pos_label=1)
     
     
     fraction_of_positives, mean_predicted_value = calibration_curve(self.y_true, self.y_pred, n_bins=50)
     
     ax1.plot(mean_predicted_value, fraction_of_positives, "s-", color="#660066",  alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score))
     ax2.hist(self.y_pred, range=(0, 1), bins=50, color="#660066", linewidth=2.0 , alpha = 0.6, label="%s (%1.3f)" % (self.description, clf_score), histtype="step", lw=2)
     plt.yscale('log')
     return
开发者ID:nancyya,项目名称:Predictors,代码行数:29,代码来源:validation.py

示例7: plot_calibration_curve

def plot_calibration_curve(est, name, fig_index):
	'''
	Plot calibration curve for est w/o and with calibration.
	'''
	# Calibrated with isotonic calibration 
	isotonic = CalibratedClassifierCV(est, cv=2, method='isotonic')

	# Calibrated with sigmoid calibration 
	sigmoid = CalibratedClassifierCV(est, cv=2, method='sigmoid')

	# Logistic regression with no calibration as baseline 
	lr = LogisticRegression(C=1.0, solver='lbfgs')
	fig = plt.figure(fig_index, figsize=(10, 10))
	ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
	ax2 = plt.subplot2grid((3, 1), (2, 0))

	ax1.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated')
	for clf, name in [
		(lr, 'Logistic'),
		(est, name),
		(isotonic, name + ' + Isotonic'),
		(sigmoid, name + ' + Sigmoid')
	]:
		clf.fit(X_train, y_train)
		y_pred = clf.predict(X_test)
		if hasattr(clf, 'predict_proba'):
			prob_pos = clf.predict_proba(X_test)[:, 1]
		else:
			# use decision function 
			prob_pos = clf.decision_function(X_test)
			prob_pos = \
				(prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) 

		clf_score = brier_score_loss(y_test, prob_pos, pos_label=y.max())
		print('%s:' % name)
		print('\tBrier: %1.3f' % (clf_score))
		print('\tPrecision: %1.3f' % precision_score(y_test, y_pred))
		print('\tRecall: %1.3f' % recall_score(y_test, y_pred))
		print('\tF1: %1.3f\n' % f1_score(y_test, y_pred))

		fraction_of_positives, mean_predicted_value = \
			calibration_curve(y_test, prob_pos, n_bins = 10)

		ax1.plot(mean_predicted_value, fraction_of_positives, 's-',
			label='%s (%1.3f)' % (name, clf_score))

		ax2.hist(prob_pos, range=(0, 1), bins=10, label=name,
			histtype='step', lw=2)

	ax1.set_ylabel('Fraction of positives')
	ax1.set_ylim([-0.05, 1.05])
	ax1.legend(loc='lower right')
	ax1.set_title('Calibration plots (reliability curve)')

	ax2.set_xlabel('Mean predicted value')
	ax2.set_ylabel('Count')
	ax2.legend(loc='upper center', ncol=2)

	plt.tight_layout()
开发者ID:0x0all,项目名称:machineLearning,代码行数:59,代码来源:plot_calibration_curve.py

示例8: brier

def brier(ytrue, yprob, num_classes):
    rv = 0.
    for i in xrange(num_classes):
        ind = np.where(ytrue == i)[0]
        tmp = np.zeros(ytrue.size)
        tmp[ind] += 1
        rv += brier_score_loss(ytrue, yprob[:, i])
    rv /= num_classes
    return rv
开发者ID:dmitro-nazarenko,项目名称:chemfin-open,代码行数:9,代码来源:rand_forest.py

示例9: calibrate_proba_fitted_models

def calibrate_proba_fitted_models(iDf, iFeatures, iModelsDict):
    iCalibratedModelsDict = {}

    for model_name in iModelsDict.keys():
        target = model_name.replace('_gbr', '').replace('_rf', '')
        proba_cal_sig = CalibratedClassifierCV(iModelsDict[model_name], method='sigmoid', cv='prefit')
        proba_cal_iso = CalibratedClassifierCV(iModelsDict[model_name], method='isotonic', cv='prefit')
        proba_cal_sig.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        proba_cal_iso.fit(iDf.loc[:, iFeatures.values], iDf.loc[:, target].values)
        brier_sig = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_sig.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])
        brier_iso = brier_score_loss(iDf.loc[:, target].value,
                                     proba_cal_iso.predict_proba(iDf.loc[:, iFeatures.values])[:, 1])

        if brier_sig <= brier_iso:
            iCalibratedModelsDict[model_name] = proba_cal_sig.calibrated_classifiers_
        else:
            iCalibratedModelsDict[model_name] = proba_cal_iso.calibrated_classifiers_
    return iCalibratedModelsDict
开发者ID:Fanchouille,项目名称:Speculoos,代码行数:19,代码来源:StockModels.py

示例10: plot_calibration_curve_cv

def plot_calibration_curve_cv(X, y, est, name, bins=10, n_folds=8, n_jobs=1, fig_index=1):
    """Plot calibration curve for est w/o and with calibration. """
    import sklearn.cross_validation as cross_validation
    from sklearn import (metrics, cross_validation)
    from model_selection import cross_val_predict_proba
    
    # Calibrated with isotonic calibration
    cv = 2
    isotonic = CalibratedClassifierCV(est, cv=cv, method='isotonic')

    # Calibrated with sigmoid calibration
    sigmoid = CalibratedClassifierCV(est, cv=cv, method='sigmoid')

    fig = plt.figure(fig_index, figsize=(10, 10))
    ax1 = plt.subplot2grid((3, 1), (0, 0), rowspan=2)
    ax2 = plt.subplot2grid((3, 1), (2, 0))

    ax1.plot([0, 1], [0, 1], "k:", label="Perfectly calibrated")
    for clf, name in [(est, name),
                      (isotonic, name + ' + Isotonic'),
                      (sigmoid, name + ' + Sigmoid')]:
        
        y_true = y
        scoring = 'roc_auc'
        cv1 = cross_validation.StratifiedKFold(y,n_folds)
        y_proba, scores = cross_val_predict_proba(clf, X, y, scoring=scoring, 
            cv=cv1, n_jobs=n_jobs, verbose=0, fit_params=None, pre_dispatch='2*n_jobs')
        y_pred = np.array(y_proba>0.5,dtype=int)

        clf_score = brier_score_loss(y_true, y_proba, pos_label=y_true.max())
        print("%s:" % name)
        print("\tBrier: %1.3f" % (clf_score))
        print("\tPrecision: %1.3f" % precision_score(y_true, y_pred))
        print("\tRecall: %1.3f" % recall_score(y_true, y_pred))
        print("\tF1: %1.3f\n" % f1_score(y_true, y_pred))

        fraction_of_positives, mean_predicted_value = \
            calibration_curve(y_true, y_proba, n_bins=bins)

        ax1.plot(mean_predicted_value, fraction_of_positives, "s-",
                 label="%s (%1.3f)" % (name, clf_score))

        ax2.hist(y_proba, range=(0, 1), bins=bins, label=name,
                 histtype="step", lw=2)

    ax1.set_ylabel("Fraction of positives")
    ax1.set_ylim([-0.05, 1.05])
    ax1.legend(loc="lower right")
    ax1.set_title('Calibration plots  (reliability curve)')

    ax2.set_xlabel("Mean predicted value")
    ax2.set_ylabel("Count")
    ax2.legend(loc="upper center", ncol=2)

    plt.tight_layout()
开发者ID:orazaro,项目名称:kgml,代码行数:55,代码来源:plot_calibration_curve.py

示例11: test_calibration_prefit

def test_calibration_prefit():
    """Test calibration for prefitted classifiers"""
    n_samples = 50
    X, y = make_classification(n_samples=3 * n_samples, n_features=6,
                               random_state=42)
    sample_weight = np.random.RandomState(seed=42).uniform(size=y.size)

    X -= X.min()  # MultinomialNB only allows positive X

    # split train and test
    X_train, y_train, sw_train = \
        X[:n_samples], y[:n_samples], sample_weight[:n_samples]
    X_calib, y_calib, sw_calib = \
        X[n_samples:2 * n_samples], y[n_samples:2 * n_samples], \
        sample_weight[n_samples:2 * n_samples]
    X_test, y_test = X[2 * n_samples:], y[2 * n_samples:]

    # Naive-Bayes
    clf = MultinomialNB()
    clf.fit(X_train, y_train, sw_train)
    prob_pos_clf = clf.predict_proba(X_test)[:, 1]

    # Naive Bayes with calibration
    for this_X_calib, this_X_test in [(X_calib, X_test),
                                      (sparse.csr_matrix(X_calib),
                                       sparse.csr_matrix(X_test))]:
        for method in ['isotonic', 'sigmoid']:
            pc_clf = CalibratedClassifierCV(clf, method=method, cv="prefit")

            for sw in [sw_calib, None]:
                pc_clf.fit(this_X_calib, y_calib, sample_weight=sw)
                y_prob = pc_clf.predict_proba(this_X_test)
                y_pred = pc_clf.predict(this_X_test)
                prob_pos_pc_clf = y_prob[:, 1]
                assert_array_equal(y_pred,
                                   np.array([0, 1])[np.argmax(y_prob, axis=1)])

                assert_greater(brier_score_loss(y_test, prob_pos_clf),
                               brier_score_loss(y_test, prob_pos_pc_clf))
开发者ID:abecadel,项目名称:scikit-learn,代码行数:39,代码来源:test_calibration.py

示例12: print_stats

 def print_stats():
     print(metrics.classification_report(y_true, y_pred,
           target_names=target_names))
     print("roc_auc_score: {:1.4f} | LogLoss: {:1.3f} | Brier score loss:"
           " {:1.3f}".format(metrics.roc_auc_score(y_true, y_proba),
                             metrics.log_loss(y_true, y_proba),
                             metrics.brier_score_loss(y_true, y_proba)))
     if hasattr(model, 'threshold') and model.threshold:
         precision, sensitivity, specificity = \
             precision_sensitivity_specificity(y_true, y_proba,
                                               threshold=model.threshold)
         print("sensitivity(recall): {:1.2f} and specificity: {:1.2f}"
               " with threshold={:1.2f}".format(
                   sensitivity, specificity, model.threshold))
开发者ID:orazaro,项目名称:kgml,代码行数:14,代码来源:classifier.py

示例13: get_error

def get_error(est_track, true_track):
    """
    """
    
    if est_track.ndim > 1:
        true_track = true_track.reshape((true_track.shape[0],1))
    
    error = np.recarray(shape=est_track.shape,
                        dtype=[('position', float),
                               ('orientation', float),
                               ('orientation_weighted', float)])
    
    # Position error
    pos_err = (true_track.x - est_track.x)**2 + (true_track.y - est_track.y)**2
    error.position = np.sqrt(pos_err)
    
    # Orientation error
    error.orientation = anglediff(true_track.angle, est_track.angle, units='deg')    
    error.orientation_weighted = anglediff(true_track.angle, est_track.angle_w, units='deg')
    
    descr = {}
    bix = np.logical_not(np.isnan(error.orientation))
    descr['orientation_median'] = np.median(np.abs(error.orientation[bix]))
    descr['orientation_mean'] = np.mean(np.abs(error.orientation[bix]))
    bix = np.logical_not(np.isnan(error.orientation_weighted))
    descr['orientation_weighted_median'] = np.nanmedian(np.abs(error.orientation_weighted[bix]))
    descr['orientation_weighted_mean'] = np.nanmean(np.abs(error.orientation_weighted[bix]))
    # no angle
    true_no_angle = np.isnan(true_track.angle)
    est_no_angle = np.isnan(est_track.angle)
    agree = np.logical_and(true_no_angle, est_no_angle)
    disagree = np.logical_xor(true_no_angle, est_no_angle)
    both = np.logical_or(true_no_angle, est_no_angle)
    #ipdb.set_trace()
    descr['no_angle_auc'] = roc_auc_score(true_no_angle, est_no_angle)
    descr['no_angle_mcc'] = matthews_corrcoef(true_no_angle, est_no_angle)
    descr['no_angle_brier'] = brier_score_loss(true_no_angle, est_no_angle)    
    descr['no_angle_acc'] = agree.sum()/both.sum()
    descr['no_angle_p_per_frame'] = disagree.sum()/disagree.shape[0]
    descr['position_median'] = np.median(error.position)
    descr['position_mean'] = np.mean(error.position)
    
    #print('True frequency of angle-does-not-apply:',
     #     true_no_angle.sum()/true_no_angle.shape[0])
    
    #print('Estimated frequency of angle-does-not-apply:',
     #     est_no_angle.sum()/est_no_angle.shape[0])    

    return error, descr
开发者ID:kalleknast,项目名称:head-tracker,代码行数:49,代码来源:ht_helper.py

示例14: process

    def process(self):
        """ process """
        ##directory check
        files = glob.glob(os.path.join(self.parameters['csv_dir'],'*.csv'))

        if not files:
            print('No .csv file found in {}.'.format(self.parameters['csv_dir']))
            exit(-1)

        self.auc = np.zeros([self.lats,self.lons])
        self.bs = np.zeros([self.lats,self.lons])
        self.sum = np.zeros([self.lats,self.lons])

        ##loop for reshape data
        for lat in np.arange(self.lats):
            for lon in np.arange(self.lons):
                if self.args.verbose:
                    print('Now Calculating Grid({},{})......'.format(lat,lon))
                y_true = list()
                y_score = list()
                for path in files:
                    row = pd.DataFrame.from_csv(path).query('latitude=={} and longitude=={}'.format(lat,lon))
                    if row.empty:
                        continue
                    y_true.append(row.iloc[0]['obs_'+self.nclass[self.parameters['index']]])
                    y_score.append(row.iloc[0]['pre_'+self.nclass[self.parameters['index']]])

                ##校验y_true结果,如果全是0则跳过后面的计算
                if not y_true:
                    print('Warning: y_true is empty in Grid({},{}).'.format(lat,lon))
                    continue

                if all(i==0 for i in y_true):
                    print('Warning:Grid({},{}) y_true has only one class(0 or 1)'.format(lat,lon))
                    continue

                ##计算auc,bs
                self.auc[lat,lon] = metrics.roc_auc_score(y_true,y_score)
                self.bs[lat,lon] = metrics.brier_score_loss(y_true,y_score)
                self.sum[lat,lon] = len(y_true)
                print(self.auc[lat,lon],self.bs[lat,lon])
                del(y_true)
                del(y_score)

        ##save result
        np.save(self.parameters['name']+'_auc',self.auc)
        np.save(self.parameters['name']+'_bs',self.bs)
        np.save(self.parameters['name']+'_sum',self.sum)
开发者ID:bazingaedwaqrd,项目名称:MODES,代码行数:48,代码来源:csv2npy.py

示例15: train_model_rfc_calibrated_cv

def train_model_rfc_calibrated_cv (features, labels, hold_out = False, train_sz = 0.9) :
	features_train, features_test = [], []
	labels_train, labels_test = [], []
	if (hold_out == True) :
		# First, set aside a some of the training set for calibration
		# Use stratified shuffle split so that class ratios are maintained after the split
		splitter = StratifiedShuffleSplit(labels, n_iter = 1, train_size = train_sz, random_state = 30)

		# Length is 1 in this case since we have a single fold for splitting
		print (len(splitter))

		for train_idx, test_idx in splitter:
			features_train, features_test = features[train_idx], features[test_idx]
			labels_train, labels_test = labels[train_idx], labels[test_idx]
	else :
		features_train = features
		labels_train = labels

	print ("features_train shape: ", features_train.shape)
	print ("labels_train shape: ", labels_train.shape)
	if (hold_out == True) :
		print ("features_test shape: ", features_test.shape)
		print ("labels_test shape: ", labels_test.shape)
		
	print ("Parameters selected based on prior grid Search ...")
	#clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', max_depth = 7, min_samples_leaf = 2, min_samples_split = 5, n_estimators = 50)
	#clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', max_depth = 8, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 120)
	# clf = rfc(random_state = 30, n_jobs = 4, criterion = 'gini', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 100)
	clf = rfc(random_state = 30, n_jobs = 4, criterion = 'entropy', class_weight = 'auto', max_depth = 5, min_samples_leaf = 5, min_samples_split = 2, n_estimators = 60)

	# Perform calibration 
	# Use 'sigmoid' because sklearn cautions against using 'isotonic' for lesser than 1000 calibration samples as it can result in overfitting
	# 05/22 - Looks like isotonic does better than sigmoid for both Brier score and roc_auc_score.
	# Using 30-40% holdout actually improves ROC AUC for holdout score from 0.88 to 0.925 with CV=5
	print ("Performing Calibration now ...")
	# sigmoid = CalibratedClassifierCV(clf, cv=5, method='sigmoid')
	sigmoid = CalibratedClassifierCV(clf, cv=5, method='isotonic')
	sigmoid.fit(features_train, labels_train)

	if (hold_out == True) :
		# Calculate Brier score loss
		y_probs = sigmoid.predict_proba(features_test)[:, 1]
		clf_score = brier_score_loss(labels_test, y_probs)
		print ("Brier score: ", clf_score)
		auc_score = estimate_roc_auc (sigmoid, features_test, labels_test)

	return sigmoid
开发者ID:sathishrvijay,项目名称:Kaggle-HumanVsRobot,代码行数:47,代码来源:classifier_exp.py


注:本文中的sklearn.metrics.brier_score_loss函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。