Python LinearSVC.predict_proba方法代码示例

本文整理汇总了Python中sklearn.svm.LinearSVC.predict_proba方法的典型用法代码示例。如果您正苦于以下问题：Python LinearSVC.predict_proba方法的具体用法？Python LinearSVC.predict_proba怎么用？Python LinearSVC.predict_proba使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.svm.LinearSVC的用法示例。

在下文中一共展示了LinearSVC.predict_proba方法的7个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: doench_on_fold

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def doench_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):
    auto_class_weight = None  # 'auto'/None
    verbose = False
    penalty = [
        0.005 * pow(1.15, x) for x in range(0, 45)
    ]  # ian's code:  tvals = [0.005*pow(1.15,x) for x in range(0,45)]
    y_bin = y_all[learn_options["binary target name"]].values[:, None]

    label_encoder = sklearn.preprocessing.LabelEncoder()
    label_encoder.fit(y_all["Target gene"].values[train])
    gene_classes = label_encoder.transform(y_all["Target gene"].values[train])
    cv = sklearn.cross_validation.StratifiedKFold(gene_classes, n_folds=10, shuffle=True)

    best_penalty = None

    cv_results = np.zeros((10, len(penalty)))

    for j, split in enumerate(cv):
        train_inner, test_inner = split
        for i, c in enumerate(penalty):
            # fit an L1-penalized SVM classifier
            clf = LinearSVC(penalty="l1", C=c, dual=False, class_weight=auto_class_weight)
            clf.fit(X[train][train_inner], y_bin[train][train_inner].flatten())

            # pass features with non-zero coeff to Logistic with l2 penalty (original code?)
            non_zero_coeff = clf.coef_ != 0.0

            if np.all(non_zero_coeff is False):
                # if all are zero, turn one on so as to be able to run the code.
                non_zero_coeff[0] = True

            clf = LogisticRegression(penalty="l2", class_weight=auto_class_weight)
            clf.fit(X[train][train_inner][:, non_zero_coeff.flatten()], y[train][train_inner].flatten())
            y_test = clf.predict_proba(X[train][test_inner][:, non_zero_coeff.flatten()])[:, 1]

            fpr, tpr, _ = sklearn.metrics.roc_curve(y_bin[train][test_inner], y_test)
            assert np.nan not in fpr, "found nan fpr"
            assert np.nan not in tpr, "found nan tpr"
            roc_auc = sklearn.metrics.auc(fpr, tpr)
            if verbose:
                print j, i, roc_auc
            cv_results[j][i] = roc_auc

    best_penalty = penalty[np.argmax(np.mean(cv_results, axis=0))]
    print "best AUC for penalty: ", np.median(cv_results, axis=0)
    clf = LinearSVC(penalty="l1", C=best_penalty, dual=False, class_weight=auto_class_weight)
    clf.fit(X[train], y_bin[train].flatten())
    non_zero_coeff = clf.coef_ != 0.0

    clf = LogisticRegression(penalty="l2", class_weight=auto_class_weight)
    clf.fit(X[train][:, non_zero_coeff.flatten()], y[train].flatten())
    y_pred = clf.predict_proba(X[test][:, non_zero_coeff.flatten()])[:, 1:2]

    return y_pred, clf

开发者ID:MicrosoftResearch，项目名称:Azimuth，代码行数:56，代码来源:baselines.py

示例2: run

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def run(input_train, input_test, output_name):
    """
    Takes a file path as input, a file path as output, and produces a sorted csv of
    item IDs for Kaggle submission
    -------
    input_train : 'full path of the training file'
    input_test : 'full path of the testing file'
    output_name : 'full path of the output file'
    """

    data = pd.read_table(input_train)
    test = pd.read_table(input_test)
    testItemIds = test.itemid
    response = data.is_blocked
    dummies = sparse.csc_matrix(pd.get_dummies(data.subcategory))
    pretestdummies = pd.get_dummies(test.subcategory)
    testdummies = sparse.csc_matrix(pretestdummies.drop(['Растения', 'Товары для компьютера'],axis=1))
    words = np.array(data.description,str)
    testwords = np.array(test.description,str)
    del data, test
    vect = text.CountVectorizer(decode_error = u'ignore', strip_accents='unicode', ngram_range=(1,2))
    corpus = np.concatenate((words, testwords))
    vect.fit(corpus)
    counts = vect.transform(words)
    features = sparse.hstack((dummies,counts))
    clf = LinearSVC()
    clf.fit(features, response)
    testcounts = vect.transform(testwords)
    testFeatures = sparse.hstack((testdummies,testcounts))
    predicted_scores = clf.predict_proba(testFeatures).T[1]
    f = open(output_name,'w')
    f.write("id\n") 
    for pred_score, item_id in sorted(zip(predicted_scores, testItemIds), reverse = True):
        f.write("%d\n" % (item_id))
    f.close()

开发者ID:eyedvabny，项目名称:CDIPS-WS-2014，代码行数:37，代码来源:svm_wordbag.py

示例3: test_calibration_multiclass

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def test_calibration_multiclass():
    """Test calibration for multiclass """
    # test multi-class setting with classifier that implements
    # only decision function
    clf = LinearSVC()
    X, y_idx = make_blobs(n_samples=100, n_features=2, random_state=42,
                          centers=3, cluster_std=3.0)

    # Use categorical labels to check that CalibratedClassifierCV supports
    # them correctly
    target_names = np.array(['a', 'b', 'c'])
    y = target_names[y_idx]

    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf.fit(X_train, y_train)
    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=2)
        cal_clf.fit(X_train, y_train)
        probas = cal_clf.predict_proba(X_test)
        assert_array_almost_equal(np.sum(probas, axis=1), np.ones(len(X_test)))

        # Check that log-loss of calibrated classifier is smaller than
        # log-loss of naively turned OvR decision function to probabilities
        # via softmax
        def softmax(y_pred):
            e = np.exp(-y_pred)
            return e / e.sum(axis=1).reshape(-1, 1)

        uncalibrated_log_loss = \
            log_loss(y_test, softmax(clf.decision_function(X_test)))
        calibrated_log_loss = log_loss(y_test, probas)
        assert_greater_equal(uncalibrated_log_loss, calibrated_log_loss)

    # Test that calibration of a multiclass classifier decreases log-loss
    # for RandomForestClassifier
    X, y = make_blobs(n_samples=100, n_features=2, random_state=42,
                      cluster_std=3.0)
    X_train, y_train = X[::2], y[::2]
    X_test, y_test = X[1::2], y[1::2]

    clf = RandomForestClassifier(n_estimators=10, random_state=42)
    clf.fit(X_train, y_train)
    clf_probs = clf.predict_proba(X_test)
    loss = log_loss(y_test, clf_probs)

    for method in ['isotonic', 'sigmoid']:
        cal_clf = CalibratedClassifierCV(clf, method=method, cv=3)
        cal_clf.fit(X_train, y_train)
        cal_clf_probs = cal_clf.predict_proba(X_test)
        cal_loss = log_loss(y_test, cal_clf_probs)
        assert_greater(loss, cal_loss)

开发者ID:abecadel，项目名称:scikit-learn，代码行数:55，代码来源:test_calibration.py

示例4: ctr_svm

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
def ctr_svm(model='sklearn-clicklog', from_cache=False, train_dataset_length=100000, test_dataset_length=100000):
    """
    Doesn't work
    """
    TRAIN_FILE, TEST_FILE = create_dataset(model, from_cache, train_dataset_length, test_dataset_length)

    prediction_model = LinearSVC(
        penalty='l1',
        loss='squared_hinge',
        dual=False,
        tol=0.0001,
        C=1.0,
        multi_class='ovr',
        fit_intercept=True,
        intercept_scaling=1,
        class_weight=None,
        verbose=1,
        random_state=None,
        max_iter=1000,
    )


    x_train, y_train = clean_data(TRAIN_FILE)
    x_test, y_test = clean_data(TEST_FILE)

    with Timer('fit model'):
        prediction_model.fit(x_train, y_train)

    with Timer('evaluate model'):
        y_prediction_train = prediction_model.predict_proba(x_train)
        y_prediction_test = prediction_model.predict_proba(x_test)

        loss_train = log_loss(y_train, y_prediction_train)
        loss_test = log_loss(y_test, y_prediction_test)

    print 'loss_train: %s' % loss_train
    print 'loss_test: %s' % loss_test

开发者ID:kazarinov，项目名称:hccf，代码行数:39，代码来源:sklearn_experiments.py

示例5: GridSearchCV

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
                   'random_state':[0]}]
    n_jobs_ = 1
    num_cv_ = 5
    clf_cv = GridSearchCV(LinearSVC(), 
                          parameters, 
                          scoring = "f1",
                          cv = num_cv_, n_jobs = n_jobs_,
                          verbose = 10)
    clf_cv.fit(X_train, y_train)
    print clf_cv.best_params_

    clf = LinearSVC()
    clf.set_params(**clf_cv.best_params_)
    del clf_cv
    clf.fit(X_train, y_train)

    if hasattr(clf, "predict_prob"):
        prob_pos = clf.predict_proba(X_test)[:,1]
    else: # use decision function
        prob_pos = clf.decision_function(X_test)
        prob_pos = \
            (prob_pos - prob_pos.min()) / (prob_pos.max() - prob_pos.min()) # scailing [0,1]
    print prob_pos

    [[TP,FP],[FN,TN]] = metrics.confusion_matrix(y_test, clf.predict(X_test))
    accuracy = float(TP + TN) / float(TP + FP + FN + TN)
    precision = float(TP) / float(TP + FP)
    recall = float(TP) / float(TP + FN)
    f = 2.0 * precision * recall / (precision + recall)
    print "accuracy=%1.5e, precision=%1.5e, recall=%1.5e, f=%1.5e" % (accuracy, precision, recall, f)

开发者ID:Quasi-quant2010，项目名称:gbdt，代码行数:32，代码来源:svm.py

示例6: len

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
	svc.fit(features['train'],labels['train'])

#test classifier
prediction = svc.predict(features['test'])
if use_linear:
	proba = svc.decision_function(features['test'])
	if len(proba.shape)==1:
		proba = np.vstack((proba,1-proba)).transpose()
	for i in range(len(proba)):
		proba[i] = 1 / (1 + np.exp(-proba[i]))
		proba[i] = proba[i] / sum(proba[i])
		proba_sorted = sorted(proba[i],reverse=True)
#		if proba_sorted[0] < 0.4 or proba_sorted[0] / proba_sorted[1] < 2:
#			prediction[i] = 0
else:
	proba = svc.predict_proba(features['test'])
print 'Accuracy %.2f%%' % (svc.score(features['test'],labels['test'])*100)

if plot_weights:
	colors = {0:'#ffffff',1:'#ff0000',2:'#00ff00',3:'#0000ff',4:'#ffff00',5:'#ff00ff',6:'#00ffff'}
	import matplotlib.pyplot as plt
	for i in range(len(svc.coef_)):
		w = svc.coef_[i]
		plt.plot(np.arange(len(w)),w,lw=2,color=colors[i+1])
	plt.legend(loc='upper left')
	plt.show()


#output data
file = open(outputFile,'w')
file.write('labels ')

开发者ID:jingdao，项目名称:PointCloudApp，代码行数:33，代码来源:svc.py

示例7: precision_score

# 需要导入模块: from sklearn.svm import LinearSVC [as 别名]
# 或者: from sklearn.svm.LinearSVC import predict_proba [as 别名]
            pl.show()

        #stats by type
        pre = precision_score(test_label, preds, average=None)
        rec = recall_score(test_label, preds, average=None)
        k=0
        while k<6:
            acc_type[k][itr].append(cm[k,k])
            precision_type[k][itr].append(pre[k])
            recall_type[k][itr].append(rec[k])
            k += 1
        '''

        #entropy based example selection block
        #compute entropy for each instance and rank
        label_pr = np.sort(clf.predict_proba(validate_data)) #sort in ascending order
        preds = clf.predict(validate_data)
        res = []
        for h,i,pr in zip(validate,preds,label_pr):
            #entropy = np.sum(-p*math.log(p,2) for p in pr if p!=0)
            if len(pr)<2:
                margin = 1
            else:
                margin = pr[-1]-pr[-2]
            #margin = 1 - margin
            #margin *= p_x[h]
            res.append([h,i,margin])
        #print 'iter', itr, 'wrong #', len(wrong)

        '''
        #Entropy-based, sort and pick the one with largest H

开发者ID:Thunder1989，项目名称:SDB，代码行数:33，代码来源:type_al.py

注：本文中的sklearn.svm.LinearSVC.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。