当前位置: 首页>>代码示例>>Python>>正文


Python LogisticRegression.predict_proba方法代码示例

本文整理汇总了Python中sklearn.linear_model.logistic.LogisticRegression.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegression.predict_proba方法的具体用法?Python LogisticRegression.predict_proba怎么用?Python LogisticRegression.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.logistic.LogisticRegression的用法示例。


在下文中一共展示了LogisticRegression.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    parser.add_argument('--threshold',type=float,default=0.5)
    parser.add_argument('--annotator',type=str,default="03")
    parser.add_argument('--penalty',type=str,choices=["l1","l2"],default="l1")


    args = parser.parse_args()
    current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+args.annotator+".lbl.conll"
    testfile = scriptdir+"/../data/cwi_testing/cwi_testing.txt.lbl.conll"
    X__dict_train, y_train, v_train = feats_and_classify.collect_features(current_single_ann,vectorize=False)
    X_dict_test, y_test, v_test = feats_and_classify.collect_features(testfile,vectorize=False)
    featdicts = list([x for x in X__dict_train + X_dict_test])
    vect = DictVectorizer()
    X = vect.fit_transform(featdicts).toarray()
    X_train=X[:len(y_train)]
    X_test=X[len(y_train):]

    maxent = LogisticRegression(penalty=args.penalty)
    maxent.fit(X_train,y_train)
    y_pred_proba = maxent.predict_proba(X_test)
    ypred_i=["1" if pair[1]>=args.threshold else "0" for pair in y_pred_proba]
    fout = open(args.annotator+".pred",mode="w")
    print("\n".join(ypred_i),file=fout)
    fout.close()
    sys.exit(0)
开发者ID:jbingel,项目名称:cwi2016,代码行数:29,代码来源:feats_and_predict_single_annotator.py

示例2: __init__

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
class LogReg:
    def __init__(self):
        self.load_data()
        self.clf = LogisticRegression(class_weight = 'balanced')
        self.train()
        self.predict()

    def load_data(self):
        train_csv = './data/train.csv'
        test_csv = './data/test.csv'
        df_train = pd.read_csv(train_csv, header=0)
        df_test = pd.read_csv(test_csv, header=0)
        arr_train = df_train.values
        arr_test = df_test.values
        self.train_X = arr_train[0::,1::]
        self.train_Y = arr_train[0::, 0]
        self.test_X = arr_test[0::, 1::]
        self.test_ID = arr_test[0::,0]

    def train(self):
        self.clf.fit(self.train_X, self.train_Y)

    def predict(self):
        self.test_Y = self.clf.predict_proba(self.test_X)

    def get_training_accuracy(self):
        return (self.clf.score(self.train_X, self.train_Y))

    def store_result(self):
        df_out = pd.DataFrame()
        df_out['Id'] = self.test_ID
        df_out['Action'] = self.test_Y[0::,1]
        df_out.to_csv('./data/results/c1_result.csv',index=False)
开发者ID:cagdasyelen,项目名称:AmazonKaggleChallenge,代码行数:35,代码来源:c1_logistic_regression.py

示例3: test_logreg_predict_proba_multinomial

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_logreg_predict_proba_multinomial():
    X, y = make_classification(n_samples=10, n_features=20, random_state=0, n_classes=3, n_informative=10)

    # Predicted probabilites using the true-entropy loss should give a
    # smaller loss than those using the ovr method.
    clf_multi = LogisticRegression(multi_class="multinomial", solver="lbfgs")
    clf_multi.fit(X, y)
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_ovr = LogisticRegression(multi_class="ovr", solver="lbfgs")
    clf_ovr.fit(X, y)
    clf_ovr_loss = log_loss(y, clf_ovr.predict_proba(X))
    assert_greater(clf_ovr_loss, clf_multi_loss)

    # Predicted probabilites using the soft-max function should give a
    # smaller loss than those using the logistic function.
    clf_multi_loss = log_loss(y, clf_multi.predict_proba(X))
    clf_wrong_loss = log_loss(y, clf_multi._predict_proba_lr(X))
    assert_greater(clf_wrong_loss, clf_multi_loss)
开发者ID:txuninho,项目名称:scikit-learn,代码行数:20,代码来源:test_logistic.py

示例4: test_nnet

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=7, distance=0.8, complete=False):
    """
    :param complete: if True, all possible combinations will be checked, and quality is printed
    """
    X, y = generate_sample(n_samples=n_samples, n_features=n_features, distance=distance)

    nn_types = [
        nnet.SimpleNeuralNetwork,
        nnet.MLPClassifier,
        nnet.SoftmaxNeuralNetwork,
        nnet.RBFNeuralNetwork,
        nnet.PairwiseNeuralNetwork,
        nnet.PairwiseSoftplusNeuralNetwork,
    ]

    if complete:
        # checking all possible combinations
        for loss in nnet.losses:
            for NNType in nn_types:
                for trainer in nnet.trainers:
                    nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42, epochs=100)
                    nn.fit(X, y )
                    print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)

        lr = LogisticRegression().fit(X, y)
        print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))

        assert 0 == 1, "Let's see and compare results"
    else:
        # checking combinations of losses, nn_types, trainers, most of them are used once during tests.
        attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
        losses_shift = numpy.random.randint(10)
        trainers_shift = numpy.random.randint(10)
        for attempt in range(attempts):
            # each combination is tried 3 times. before raising exception
            retry_attempts = 3
            for retry_attempt in range(retry_attempts):
                loss = list(nnet.losses.keys())[(attempt + losses_shift) % len(nnet.losses)]
                trainer = list(nnet.trainers.keys())[(attempt + trainers_shift) % len(nnet.trainers)]

                nn_type = nn_types[attempt % len(nn_types)]

                nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42 + retry_attempt, epochs=200)
                print(nn)
                nn.fit(X, y)
                quality = roc_auc_score(y, nn.predict_proba(X)[:, 1])
                computed_loss = nn.compute_loss(X, y)
                if quality > 0.8:
                    break
                else:
                    print('attempt {} : {}'.format(retry_attempt, quality))
                    if retry_attempt == retry_attempts - 1:
                        raise RuntimeError('quality of model is too low: {} {}'.format(quality, nn))
开发者ID:MelanieEich,项目名称:cmssw,代码行数:55,代码来源:testhep_ml.py

示例5: fit_model_2

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
    def fit_model_2(self, lol = .07, toWrite = False):
        model = LogisticRegression(C = lol, penalty = 'l1', tol = 1e-6)

        for data in self.cv_data:
            X_train, X_test, Y_train, Y_test = data
            X_train,Y_train = self.balance_data(X_train,Y_train)
            model.fit(X_train,Y_train)
            pred = model.predict_proba(X_test)[:,1]
            print("Model 2 Score: %f" % (logloss(Y_test,pred),))

        if toWrite:
            f2 = open('model2/model.pkl','w')
            pickle.dump(model,f2)
            f2.close()
开发者ID:JakeMick,项目名称:kaggle,代码行数:16,代码来源:days_work.py

示例6: test_multinomial_binary_probabilities

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_multinomial_binary_probabilities():
    # Test multinomial LR gives expected probabilities based on the
    # decision function, for a binary problem.
    X, y = make_classification()
    clf = LogisticRegression(multi_class='multinomial', solver='saga')
    clf.fit(X, y)

    decision = clf.decision_function(X)
    proba = clf.predict_proba(X)

    expected_proba_class_1 = (np.exp(decision) /
                              (np.exp(decision) + np.exp(-decision)))
    expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1]

    assert_almost_equal(proba, expected_proba)
开发者ID:huafengw,项目名称:scikit-learn,代码行数:17,代码来源:test_logistic.py

示例7: test_predict_iris

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_predict_iris():
    """Test logistic regression with the iris dataset"""
    n_samples, n_features = iris.data.shape

    target = iris.target_names[iris.target]
    clf = LogisticRegression(C=len(iris.data)).fit(iris.data, target)
    assert_array_equal(np.unique(target), clf.classes_)

    pred = clf.predict(iris.data)
    assert_greater(np.mean(pred == target), .95)

    probabilities = clf.predict_proba(iris.data)
    assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))

    pred = iris.target_names[probabilities.argmax(axis=1)]
    assert_greater(np.mean(pred == target), .95)
开发者ID:JinguoGao,项目名称:scikit-learn,代码行数:18,代码来源:test_logistic.py

示例8: test_nnet

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=5, distance=0.5, complete=False):
    X, y = make_blobs(
        n_samples=n_samples,
        n_features=5,
        centers=[numpy.ones(n_features) * distance, -numpy.ones(n_features) * distance],
    )

    nn_types = [
        nnet.SimpleNeuralNetwork,
        nnet.MultiLayerNetwork,
        nnet.SoftmaxNeuralNetwork,
        nnet.RBFNeuralNetwork,
        nnet.PairwiseNeuralNetwork,
        nnet.PairwiseSoftplusNeuralNetwork,
    ]

    if complete:
        # checking all possible combinations
        for loss in nnet.losses:
            for NNType in nn_types:
                for trainer in nnet.trainers:
                    nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42)
                    nn.fit(X, y, epochs=100)
                    print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)

        lr = LogisticRegression().fit(X, y)
        print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))

        assert 0 == 1, "Let's see and compare results"
    else:
        # checking combinations of losses, nn_types, trainers, most of them are used once during tests.
        attempts = max(len(nnet.losses), len(nnet.trainers), len(nn_types))
        attempts = 4
        losses_shift = numpy.random.randint(10)
        trainers_shift = numpy.random.randint(10)
        for attempt in range(attempts):
            loss = nnet.losses.keys()[(attempt + losses_shift) % len(nnet.losses)]
            trainer = nnet.trainers.keys()[(attempt + trainers_shift) % len(nnet.trainers)]

            nn_type = nn_types[attempt % len(nn_types)]

            nn = nn_type(layers=[5], loss=loss, trainer=trainer, random_state=42)
            print(nn)
            nn.fit(X, y, epochs=200)
            assert roc_auc_score(y, nn.predict_proba(X)[:, 1]) > 0.8, "quality of model is too low: {}".format(nn)
开发者ID:chrinide,项目名称:hep_ml,代码行数:47,代码来源:test_nnet.py

示例9: WebsiteMatchConfidencePredictor

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
class WebsiteMatchConfidencePredictor(object):
    def __init__(self):
        self.model = LogisticRegression()


    def fit(self, urls, websites, y):
        """

        :param urls: list of urls
        :param websites: list of corresponding scraped websites
        :param y: list of corresponding booleans - matches or not
        """
        X = [make_features(url, web) for url, web in zip(urls, websites)]
        self.model.fit(X, y)

    def predict(self, url, website):
        X = make_features(url, website)
        return self.model.predict_proba(X)
开发者ID:nadborduedil,项目名称:url_matching,代码行数:20,代码来源:match_confidence.py

示例10: main

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def main():
    parser = argparse.ArgumentParser(description="""Export AMT""")
    parser.add_argument('--input', default="../res/dga_extendedamt_simplemajority.tsv")
    parser.add_argument('--dump_to_predict', default="../res/dga_data_october2016.tsv")
    parser.add_argument('--embeddings', default="/Users/hmartine/data/glove.6B/glove.6B.50d.txt")
    args = parser.parse_args()

    E = load_embeddings(args.embeddings)

    predarrays = {}

    variants = ["bcd","cd"]
    for variant in variants:
    #1 collect features for train
        trainfeatures, labels, vec = collect_features(args.input,embeddings=E,variant=variant,vectorize=False)
        maxent = LogisticRegression(penalty='l2')
        #TODO collect features for new data
        #TODO proper vectorization
        dumpfeatdicts = features_from_dump(args.dump_to_predict,variant=variant,embeddings=E,bowfilter=trainingbow)
        #dumpfeats = vec.fit_transform(dumpfeatdicts)
        vec = DictVectorizer()
        X_train = vec.fit_transform(trainfeatures)

        maxent.fit(X_train,labels)
        X_test = vec.transform(dumpfeatdicts)

        predarrays[variant+"_pred_label"] = ["SAME" if x == 0 else "OMISSION" for x in maxent.predict(X_test)]
        predarrays[variant + "_pred_prob"] = ['{:.2}'.format(y) for x,y in maxent.predict_proba(X_test)]


    #maxent.fit(np.array(allfeatures[:len(labels)]),labels)
    #print(maxent.predict(allfeatures[len(labels):]))
    # predict using {features, features without lenght} --> instance 'variants' properly
        #TODO compare prediction similarity
        #TODO provide an output format with labels and probs for both feature templates
    frame = read_dump(args.dump_to_predict)
    keyindices = sorted(predarrays.keys())

    header = "Index Ref TitleRef URLRef Target TitleTarget URLTarget Source Contains BCD_label BCD_prob CD_label CD_prob".replace(" ","\t")

    print(header)
    for a in zip([str(x) for x in range(len(frame.Ref))],list(frame.Ref),list(frame.Target),list(frame.TitleRef),list(frame.URLRef),list(frame.TitleTarget),list(frame.URLTarget),list(frame.Source),list(frame.Contains),predarrays[keyindices[0]],predarrays[keyindices[1]],predarrays[keyindices[2]],predarrays[keyindices[3]]):
        print("\t".join(a))
开发者ID:hectormartinez,项目名称:verdisandbox,代码行数:45,代码来源:classify_dga_dump.py

示例11: test_nnet

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def test_nnet(n_samples=200, n_features=5, distance=0.5):
    X, y = make_blobs(n_samples=n_samples, n_features=5,
                      centers=[numpy.ones(n_features) * distance, - numpy.ones(n_features) * distance])

    nn_types = [
        nnet.SimpleNeuralNetwork,
        nnet.MultiLayerNetwork,
        nnet.SoftmaxNeuralNetwork,
        nnet.RBFNeuralNetwork,
        nnet.PairwiseNeuralNetwork,
        nnet.PairwiseSoftplusNeuralNetwork,
    ]

    for loss in nnet.losses:
        for NNType in nn_types:
            for trainer in nnet.trainers:
                nn = NNType(layers=[5], loss=loss, trainer=trainer, random_state=42)
                nn.fit(X, y, stages=100, verbose=nnet.SILENT)
                print(roc_auc_score(y, nn.predict_proba(X)[:, 1]), nn)

    lr = LogisticRegression().fit(X, y)
    print(lr, roc_auc_score(y, lr.predict_proba(X)[:, 1]))

    assert 0 == 1
开发者ID:anaderi,项目名称:cms_unsupervised_eval,代码行数:26,代码来源:test_nnet.py

示例12: readData

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
from sklearn.metrics import classification_report  
import time  
#计算运行时间 
start_time = time.time() 


path = "E:/Desktop/Image/SVMData/gender_wechat_scale.txt"
x,y = readData(path)

average = 0
testNum = 10
clf = LogisticRegression()  
print clf
for i in range(0,testNum):
    x_train, x_test, y_train, y_test = train_test_split(x, y)
    clf = LogisticRegression()   
    clf.fit(x_train, y_train)  
    y_pred = clf.predict(x_test)  
    p = np.mean(y_pred == y_test)  
    print(p)  
    average += p  


answer = clf.predict_proba(x_test)[:,1]  
precision, recall, thresholds = precision_recall_curve(y_test, answer)      
report = answer > 0.5  
print(classification_report(y_test, report, target_names = ['neg', 'pos']))  
print("average precision:", average/testNum)  
print("time spent:", time.time() - start_time)  

开发者ID:renhuaigui,项目名称:python-machine,代码行数:31,代码来源:log_Reg_test.py

示例13: train

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train():
    weather = load_weather()
    training = load_training()
    
    X, y = assemble_X_y(training, weather)
    mean, std = normalize(X)
    #y = assemble_y(training)
    '''    
    input_size = len(X[0])
    
    learning_rate = theano.shared(np.float32(0.1))
    
    net = NeuralNet(
    layers=[  
        ('input', InputLayer),
         ('hidden1', DenseLayer),
        ('dropout1', DropoutLayer),
        ('hidden2', DenseLayer),
        ('dropout2', DropoutLayer),
        ('output', DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, input_size), 
    hidden1_num_units=256, 
    dropout1_p=0.4,
    hidden2_num_units=256, 
    dropout2_p=0.4,
    output_nonlinearity=sigmoid, 
    output_num_units=1, 

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=learning_rate,
    update_momentum=0.9,
    
    # Decay the learning rate
    on_epoch_finished=[
            AdjustVariable(learning_rate, target=0, half_life=4),
            ],

    # This is silly, but we don't want a stratified K-Fold here
    # To compensate we need to pass in the y_tensor_type and the loss.
    regression=True,
    y_tensor_type = T.imatrix,
    objective_loss_function = binary_crossentropy,
     
    max_epochs=32, 
    eval_size=0.1,
    verbose=1,
    )
    '''
    clf = LogisticRegression(C = 10)
    #clf = svm.SVC()
    X, y = shuffle(X, y, random_state=123)
    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.33)
    clf.fit(X_train, y_train)

    probas = clf.predict_proba(X_test)[:,1]
    print("ROC score", metrics.roc_auc_score(np.ravel(y_test), probas))
    
    print("fitting...")
    clf.fit(X, y)
    
    #clf.fit(X[:100, :], y[:100])
    #Tracer()()
    #probas = clf.predict(X[:100, :])[:,1]
    #y_pred = (probas > 0.5).astype(int)
    
    #print(np.abs(y_pred-y[:100]).sum()) 

    return clf, mean, std     
开发者ID:amirnasri,项目名称:Kaggle_west_nile_virus_prediction,代码行数:75,代码来源:kaggle_script.py

示例14: train_model

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train_model(clf_factory, X, Y, name, plot=False):
    """
        Trains and saves model to disk.
    """
    labels = np.unique(Y)
    cv = ShuffleSplit( n=len(X), n_iterations=1, test_fraction=0.3, indices=True, random_state=0)
    #print "cv = ",cv
    train_errors = []
    test_errors = []

    scores = []

    pr_scores, precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list), defaultdict(list)

    roc_scores, tprs, fprs = defaultdict(list), defaultdict(list) ,defaultdict(list)

    clfs = []  # just to later get the median

    cms = []

    for train, test in cv:
        X_train, y_train = X[train], Y[train]
        X_test, y_test = X[test], Y[test]
        global clf
        clf = LogisticRegression()
        clf.fit(X_train, y_train)
        clfs.append(clf)

        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        scores.append(test_score)

        train_errors.append(1 - train_score)
        test_errors.append(1 - test_score)

        y_pred = clf.predict(X_test)
        cm = confusion_matrix(y_test, y_pred)
        cms.append(cm)
        
        for label in labels:
            y_label_test = np.asarray(y_test == label, dtype=int)
            proba = clf.predict_proba(X_test)
            proba_label = proba[:, label]

            precision, recall, pr_thresholds = precision_recall_curve(
                y_label_test, proba_label)
            pr_scores[label].append(auc(recall, precision))
            precisions[label].append(precision)
            recalls[label].append(recall)
            thresholds[label].append(pr_thresholds)

            fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
            roc_scores[label].append(auc(fpr, tpr))
            tprs[label].append(tpr)
            fprs[label].append(fpr)

    if plot:
        for label in labels:
            #print("Plotting %s"%genre_list[label])
            scores_to_sort = roc_scores[label]
            median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
            desc = "%s %s" % (name, genre_list[label])
            #plot_pr(pr_scores[label][median], desc, precisions[label][median],recalls[label][median], label='%s vs rest' % genre_list[label])
            #plot_roc(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores),np.mean(all_pr_scores), np.std(all_pr_scores))
    print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    #save the trained model to disk
    joblib.dump(clf, 'saved_model_fft/my_model.pkl')
    
    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
开发者ID:Shreya29,项目名称:music-genre-classifier,代码行数:75,代码来源:classifier_fft.py

示例15: train_model

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict_proba [as 别名]
def train_model(X, Y, name, plot=False):
    """
        train_model(vector, vector, name[, plot=False])
        
        Trains and saves model to disk.
    """
    labels = np.unique(Y)
    print labels

    cv = ShuffleSplit(n=len(X), n_iter=1, test_size=0.3, random_state=0)
    
    
    train_errors = []
    test_errors = []

    scores = []
    pr_scores = defaultdict(list)
    precisions, recalls, thresholds = defaultdict(list), defaultdict(list), defaultdict(list)

    roc_scores = defaultdict(list)
    tprs = defaultdict(list)
    fprs = defaultdict(list)

    clfs = []  # for the median

    cms = []

    for train, test in cv:
        X_train, y_train = X[train], Y[train]
        X_test, y_test = X[test], Y[test]

        clf = LogisticRegression()
        clf.fit(X_train, y_train)
        clfs.append(clf)

        train_score = clf.score(X_train, y_train)
        test_score = clf.score(X_test, y_test)
        scores.append(test_score)

        train_errors.append(1 - train_score)
        test_errors.append(1 - test_score)

        y_pred = clf.predict(X_test)
        cm = confusion_matrix(y_test, y_pred)
        cms.append(cm)

        for label in labels:
            y_label_test = np.asarray(y_test == label, dtype=int)
            proba = clf.predict_proba(X_test)
            proba_label = proba[:, label]

            fpr, tpr, roc_thresholds = roc_curve(y_label_test, proba_label)
            roc_scores[label].append(auc(fpr, tpr))
            tprs[label].append(tpr)
            fprs[label].append(fpr)

    if plot:
        for label in labels:
            scores_to_sort = roc_scores[label]
            median = np.argsort(scores_to_sort)[len(scores_to_sort) / 2]
            desc = "%s %s" % (name, genre_list[label])
            plot_roc_curves(roc_scores[label][median], desc, tprs[label][median],fprs[label][median], label='%s vs rest' % genre_list[label])

    all_pr_scores = np.asarray(pr_scores.values()).flatten()
    summary = (np.mean(scores), np.std(scores), np.mean(all_pr_scores), np.std(all_pr_scores))
    #print("%.3f\t%.3f\t%.3f\t%.3f\t" % summary)

    #save the trained model to disk
    joblib.dump(clf, 'saved_model/model_ceps.pkl')
    
    return np.mean(train_errors), np.mean(test_errors), np.asarray(cms)
开发者ID:vikrambahl1,项目名称:music-genre-detection,代码行数:73,代码来源:untitled8.py


注:本文中的sklearn.linear_model.logistic.LogisticRegression.predict_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。