当前位置: 首页>>代码示例>>Python>>正文


Python LogisticRegression.predict方法代码示例

本文整理汇总了Python中sklearn.linear_model.logistic.LogisticRegression.predict方法的典型用法代码示例。如果您正苦于以下问题:Python LogisticRegression.predict方法的具体用法?Python LogisticRegression.predict怎么用?Python LogisticRegression.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.logistic.LogisticRegression的用法示例。


在下文中一共展示了LogisticRegression.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mlogistic

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def mlogistic():
	X = []

	# 前三行作为输入样本
	X.append("fuck you")
	X.append("fuck you all")
	X.append("hello everyone")

	# 后两句作为测试样本
	X.append("fuck me")
	X.append("hello boy")

	# y为样本标注
	y = [1,1,0]

	vectorizer = TfidfVectorizer()

	# 取X的前三句作为输入做tfidf转换
	X_train = vectorizer.fit_transform(X[:-2])
	print X_train
	# 取X的后两句用“上句生成”的tfidf做转换
	X_test = vectorizer.transform(X[-2:])
	print X_test

	# 用逻辑回归模型做训练
	classifier = LogisticRegression()
	classifier.fit(X_train, y)

	# 做测试样例的预测
	predictions = classifier.predict(X_test)
	print predictions
开发者ID:tuling56,项目名称:Python,代码行数:33,代码来源:mlogistic.py

示例2: main

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def main():
    scriptdir = os.path.dirname(os.path.realpath(__file__))
    default_pool = scriptdir+"/../data/cwi_training/cwi_training.txt.lbl.conll"
    parser = argparse.ArgumentParser(description="Skeleton for features and classifier for CWI-2016--optimisation of threshhold")
    parser.add_argument('--iterations',type=int,default=5)

    args = parser.parse_args()


    all_feats = []
    all_labels = defaultdict(list)
    scores = defaultdict(list)




    for idx in "01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18 19 20".split(" "):
#    for idx in "01".split(" "):
        current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_"+idx+".lbl.conll"
        f_current, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=False,generateFeatures=False)
        for instance_index,l in enumerate(labels_current):
            all_labels[instance_index].append(l)
    current_single_ann = scriptdir+"/../data/cwi_training/cwi_training_01.lbl.conll"
    feats, labels_current, v_current = feats_and_classify.collect_features(current_single_ann,vectorize=True,generateFeatures=True)

    for it in range(args.iterations):
        for TrainIndices, TestIndices in cross_validation.KFold(n=feats.shape[0], n_folds=10, shuffle=True, random_state=None):
            maxent = LogisticRegression(penalty='l2')

            TrainX_i = feats[TrainIndices]
            Trainy_i = [all_labels[x][random.randrange(0,20)] for x in TrainIndices]

            TestX_i = feats[TestIndices]
            Testy_i =  [all_labels[x][random.randrange(0,20)] for x in TestIndices]

            maxent.fit(TrainX_i,Trainy_i)
            ypred_i = maxent.predict(TestX_i)

            acc = accuracy_score(ypred_i, Testy_i)
            pre = precision_score(ypred_i, Testy_i)
            rec = recall_score(ypred_i, Testy_i)
            # shared task uses f1 of *accuracy* and recall!
            f1 = 2 * acc * rec / (acc + rec)

            scores["Accuracy"].append(acc)
            scores["F1"].append(f1)
            scores["Precision"].append(pre)
            scores["Recall"].append(rec)
        #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
        print("--")

    for key in sorted(scores.keys()):
        currentmetric = np.array(scores[key])
        print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
    print("--")

    sys.exit(0)
开发者ID:jbingel,项目名称:cwi2016,代码行数:59,代码来源:feats_and_classify_crossv_on_single_annotators.py

示例3: classify_logistic

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def classify_logistic(train_features, train_labels, test_features):
    global SAVE
    clf = LogisticRegression()
    clf.fit(train_features, train_labels)

    if not TEST and SAVE:
        save_pickle("logistic", clf)

    return clf.predict(test_features)
开发者ID:afshaanmaz,项目名称:FoodClassifier,代码行数:11,代码来源:5_svm.py

示例4: test_liblinear_decision_function_zero

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_liblinear_decision_function_zero():
    # Test negative prediction when decision_function values are zero.
    # Liblinear predicts the positive class when decision_function values
    # are zero. This is a test to verify that we do not do the same.
    # See Issue: https://github.com/scikit-learn/scikit-learn/issues/3600
    # and the PR https://github.com/scikit-learn/scikit-learn/pull/3623
    X, y = make_classification(n_samples=5, n_features=5, random_state=0)
    clf = LogisticRegression(fit_intercept=False)
    clf.fit(X, y)

    # Dummy data such that the decision function becomes zero.
    X = np.zeros((5, 5))
    assert_array_equal(clf.predict(X), np.zeros(5))
开发者ID:huafengw,项目名称:scikit-learn,代码行数:15,代码来源:test_logistic.py

示例5: clazzify

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def clazzify(train_mat, test_mat, true_train_labels):
    """
    """
    # learn
    logging.info('learning...')
    model = LogisticRegression(random_state=17, penalty='l1')
    model.fit(train_mat, true_train_labels)
    logging.info('finished learning.')

    # test
    logging.info('testing')
    predicted_test_labels = model.predict(test_mat)
    logging.info('finished testing')

    return predicted_test_labels, model
开发者ID:mapleyustat,项目名称:simsets,代码行数:17,代码来源:classify.py

示例6: test_predict_iris

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_predict_iris():
    """Test logistic regression with the iris dataset"""
    n_samples, n_features = iris.data.shape

    target = iris.target_names[iris.target]
    clf = LogisticRegression(C=len(iris.data)).fit(iris.data, target)
    assert_array_equal(np.unique(target), clf.classes_)

    pred = clf.predict(iris.data)
    assert_greater(np.mean(pred == target), .95)

    probabilities = clf.predict_proba(iris.data)
    assert_array_almost_equal(probabilities.sum(axis=1), np.ones(n_samples))

    pred = iris.target_names[probabilities.argmax(axis=1)]
    assert_greater(np.mean(pred == target), .95)
开发者ID:JinguoGao,项目名称:scikit-learn,代码行数:18,代码来源:test_logistic.py

示例7: generate_submission

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def generate_submission():
    global alg, predictions, submission
    # The columns we'll use to predict the target
    # Initialize the algorithm class
    alg = LogisticRegression(random_state=1)
    # Train the algorithm using all the training data
    alg.fit(train[predictors], train["Survived"])
    # Make predictions using the test set.
    predictions = alg.predict(test[predictors])
    # Create a new dataframe with only the columns Kaggle wants from the dataset.
    submission = pandas.DataFrame({
        "PassengerId": test["PassengerId"],
        "Survived": predictions
    })
    submission.to_csv("kaggle.csv", index=False)
    print("kaggele.csv is generated")
开发者ID:maxpavlovdp,项目名称:titanic,代码行数:18,代码来源:logistic_regression.py

示例8: test_multinomial_binary

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_binary():
    """Test multinomial LR on a binary problem."""
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    clf = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    clf.fit(iris.data, target)

    assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
    assert_equal(clf.intercept_.shape, (1,))
    assert_array_equal(clf.predict(iris.data), target)

    mlr = LogisticRegression(solver='lbfgs', multi_class='multinomial',
                             fit_intercept=False)
    mlr.fit(iris.data, target)
    pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data), axis=1)]
    assert_greater(np.mean(pred == target), .9)
开发者ID:AngelaGuoguo,项目名称:scikit-learn,代码行数:19,代码来源:test_logistic.py

示例9: LogisticRegressionSMSFilteringExample

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def LogisticRegressionSMSFilteringExample():
    import numpy as np
    import pandas as pd
    from sklearn.feature_extraction.text import TfidfVectorizer
    from sklearn.linear_model.logistic import LogisticRegression
    from sklearn.cross_validation import train_test_split, cross_val_score
    df = pd.read_csv('C:/Users/Ahmad/Documents/Mastering ML with Scikitlearn/ml/DataSets/smsspamcollection/SMSSpamCollection', delimiter='\t',header=None)
    X_train_raw, X_test_raw, y_train, y_test = train_test_split(df[1],df[0])
    vectorizer = TfidfVectorizer()
    X_train = vectorizer.fit_transform(X_train_raw)
    X_test = vectorizer.transform(X_test_raw)

    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)

    for i in xrange(0,5):
        print X_test_raw.values.tolist()[i],"\r\n Classification: ", predictions[i]
开发者ID:PsychoGeek13,项目名称:ml,代码行数:20,代码来源:main.py

示例10: __init__

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
class mentoryWEB:

    def __init__(self, file):
        self.vect = TfidfVectorizer(max_df=0.25, stop_words=None, max_features=2500, ngram_range=(1,2), use_idf=True, norm='l2')
        df = pd.read_csv(file, delimiter='\t', header=None)
        X_train_raw, y_train = df[1], df[0]

        X_train = self.vect.fit_transform(X_train_raw)

        self.clf = LogisticRegression(penalty='l2', C=10)
        self.clf.fit(X_train, y_train)


    def test(self, string):
        X_test = self.vect.transform([string])
        prediction = self.clf.predict(X_test)

        return prediction[0]
开发者ID:mikebohdan,项目名称:int20h,代码行数:20,代码来源:classifier.py

示例11: makeClassificationAndMeasureAccuracy

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def makeClassificationAndMeasureAccuracy(genre_wise_train_data, genre_wise_test_data, meta_dict):
    accuracy_for_genre = dict()
    for genre in genre_wise_train_data:
        meta_dict_for_genre = meta_dict[genre]
        train_data, train_result = genre_wise_train_data[genre]
        test_data, test_result = genre_wise_test_data[genre]
        train_data = [list(meta_dict_for_genre[file_name][TAGS].values()) for file_name in train_data]
        test_data = [list(meta_dict_for_genre[file_name][TAGS].values()) for file_name in test_data]
        log_r = LogisticRegression()
        log_r.fit(train_data, train_result)
        accuracy = 0.0
        for i in range(len(test_data)):
            label = int(log_r.predict(test_data[i]))
            if label == test_result[i]:
                accuracy += 1.0
        accuracy = accuracy/len(test_data)
        accuracy_for_genre[genre] = accuracy
    return  accuracy_for_genre
开发者ID:SriganeshNk,项目名称:NLP_Project,代码行数:20,代码来源:NaivePOSDictPredictor.py

示例12: test_multinomial_binary

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_binary():
    # Test multinomial LR on a binary problem.
    target = (iris.target > 0).astype(np.intp)
    target = np.array(["setosa", "not-setosa"])[target]

    for solver in ['lbfgs', 'newton-cg', 'sag']:
        clf = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, max_iter=2000)
        clf.fit(iris.data, target)

        assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
        assert_equal(clf.intercept_.shape, (1,))
        assert_array_equal(clf.predict(iris.data), target)

        mlr = LogisticRegression(solver=solver, multi_class='multinomial',
                                 random_state=42, fit_intercept=False)
        mlr.fit(iris.data, target)
        pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
                                      axis=1)]
        assert_greater(np.mean(pred == target), .9)
开发者ID:0664j35t3r,项目名称:scikit-learn,代码行数:22,代码来源:test_logistic.py

示例13: test_multinomial_logistic_regression_string_inputs

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def test_multinomial_logistic_regression_string_inputs():
    # Test with string labels for LogisticRegression(CV)
    n_samples, n_features, n_classes = 50, 5, 3
    X_ref, y = make_classification(n_samples=n_samples, n_features=n_features,
                                   n_classes=n_classes, n_informative=3,
                                   random_state=0)
    y_str = LabelEncoder().fit(['bar', 'baz', 'foo']).inverse_transform(y)
    # For numerical labels, let y values be taken from set (-1, 0, 1)
    y = np.array(y) - 1
    # Test for string labels
    lr = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')
    lr_str = LogisticRegression(solver='lbfgs', multi_class='multinomial')
    lr_cv_str = LogisticRegressionCV(solver='lbfgs', multi_class='multinomial')

    lr.fit(X_ref, y)
    lr_cv.fit(X_ref, y)
    lr_str.fit(X_ref, y_str)
    lr_cv_str.fit(X_ref, y_str)

    assert_array_almost_equal(lr.coef_, lr_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_array_almost_equal(lr_cv.coef_, lr_cv_str.coef_)
    assert_equal(sorted(lr_str.classes_), ['bar', 'baz', 'foo'])
    assert_equal(sorted(lr_cv_str.classes_), ['bar', 'baz', 'foo'])

    # The predictions should be in original labels
    assert_equal(sorted(np.unique(lr_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))),
                 ['bar', 'baz', 'foo'])

    # Make sure class weights can be given with string labels
    lr_cv_str = LogisticRegression(
        solver='lbfgs', class_weight={'bar': 1, 'baz': 2, 'foo': 0},
        multi_class='multinomial').fit(X_ref, y_str)
    assert_equal(sorted(np.unique(lr_cv_str.predict(X_ref))), ['bar', 'baz'])
开发者ID:huafengw,项目名称:scikit-learn,代码行数:39,代码来源:test_logistic.py

示例14: classify

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def classify(data_set_df, user_info_df, feat_set_name, features=None, label='gender',
             classifier=None, reg_param=1.0, selection=False, num_feat=20, sel_method='LR',
             cv=10):
    instance_num = len(data_set_df.columns)
    df_filtered, y_v = pc.get_filtered_x_y(data_set_df, user_info_df, label)
    x = df_filtered if features is None else df_filtered.loc[features]

    x = x.dropna(how='all', axis=0)
    x = x.dropna(how='all', axis=1)
    if x.isnull().any().any() or (x == np.inf).any().any() or (x == -np.inf).any().any():
        x_imp = pc.fill_nan_features(x)
        # x_imp = dense_df.loc[x.index, x.columns]
    else:
        x_imp = x
    y_filtered = y_v[(map(int, x.columns.values))]

    clf = LogisticRegression(C=reg_param) if classifier is None else classifier
    cv_num = min(len(y_filtered), cv)
    score_mean = 0.0
    miss_clf_rate = 1.0
    if cv_num > 1 and len(y_filtered.unique()) > 1:
        kf = KFold(y_filtered.shape[0], n_folds=cv_num, shuffle=True)
        # skf = StratifiedKFold(y_filtered, n_folds=cv_num, shuffle=True)
        fold = 0
        result_str = ""
        matrix_str = ""
        for tr_index, te_index in kf:
            fold += 1
            x_train, x_test = x_imp.T.iloc[tr_index], x_imp.T.iloc[te_index]
            y_train, y_test = y_filtered.iloc[tr_index], y_filtered.iloc[te_index]

            if selection:
                if sel_method == 'LR' or 'RF' in sel_method:
                    feat_index = fimp.feature_selection(x_train.T, user_info_df, num_feat,
                                                        method=sel_method, label=label)
                else:
                    x_tr_df, x_te_df = x.T.iloc[tr_index].T, x.T.iloc[te_index].T
                    feat_index = fimp.feature_selection(x_tr_df, user_info_df, num_feat,
                                                        method=sel_method, label=label)
                x_train = x_train.loc[:, feat_index].values
                x_test = x_test.loc[:, feat_index].values

            try:
                clf.fit(x_train, y_train)
                score = clf.score(x_test, y_test)
                score_mean += score

                result_str += "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" \
                              % (label, True if param.FILL_SUFFIX in feat_set_name else False,
                                 True if param.SCALING_SUFFIX in feat_set_name else False, selection, 'LR',
                                 reg_param, cv, fold, x_train.shape[1], score)
                cf_mat = confusion_matrix(y_test, clf.predict(x_test),
                                          labels=range(len(info.LABEL_CATEGORY[label])))
                matrix_str += np.array_str(cf_mat) + "\n"
            except ValueError:
                pass
                # traceback.print_exc()
                # print i, "why error? skip!"

        print result_str
        file_name = "%s/new_%s.csv" % (param.EXPERIMENT_PATH, feat_set_name)
        with open(file_name, mode='a') as f:
            f.write(result_str)

        file_name = "%s/new_%s_mat.csv" % (param.EXPERIMENT_PATH, feat_set_name)
        with open(file_name, mode='a') as f:
            f.write(matrix_str)

        if fold > 0:
            score_mean = score_mean / fold
            miss_clf_rate = (float(instance_num - len(y_filtered)) / instance_num)
    return score_mean, miss_clf_rate
开发者ID:heevery,项目名称:ohp,代码行数:74,代码来源:classifier.py

示例15: crossval

# 需要导入模块: from sklearn.linear_model.logistic import LogisticRegression [as 别名]
# 或者: from sklearn.linear_model.logistic.LogisticRegression import predict [as 别名]
def crossval(features, labels,variant,printcoeffs=False):
    maxent = LogisticRegression(penalty='l2')
    dummyclass = DummyClassifier("most_frequent")
    #maxent = SGDClassifier(penalty='l1')
    #maxent = Perceptron(penalty='l1')
    maxent.fit(features,labels) # only needed for feature inspection, crossvalidation calls fit(), too


    scores = defaultdict(list)
    TotalCoeffCounter = Counter()

    for TrainIndices, TestIndices in cross_validation.KFold(n=features.shape[0], n_folds=10, shuffle=False, random_state=None):
        TrainX_i = features[TrainIndices]
        Trainy_i = labels[TrainIndices]

        TestX_i = features[TestIndices]
        Testy_i =  labels[TestIndices]
        dummyclass.fit(TrainX_i,Trainy_i)
        maxent.fit(TrainX_i,Trainy_i)

        ypred_i = maxent.predict(TestX_i)
        ydummypred_i = dummyclass.predict(TestX_i)
        #coeffs_i = list(maxent.coef_[0])
        #coeffcounter_i = Counter(vec.feature_names_)
        #for value,name in zip(coeffs_i,vec.feature_names_):
        #    coeffcounter_i[name] = value

        acc = accuracy_score(ypred_i, Testy_i)
        #pre = precision_score(ypred_i, Testy_i,pos_label=1)
        #rec = recall_score(ypred_i, Testy_i,pos_label=1)
        f1 = f1_score(ypred_i, Testy_i,pos_label=1)

        scores["Accuracy"].append(acc)
        scores["F1"].append(f1)
        #scores["Precision"].append(pre)
        #scores["Recall"].append(rec)

        #
        # acc = accuracy_score(ydummypred_i, Testy_i)
        # pre = precision_score(ydummypred_i, Testy_i,pos_label=1)
        # rec = recall_score(ydummypred_i, Testy_i,pos_label=1)
        # f1 = f1_score(ydummypred_i, Testy_i,pos_label=1)
        #
        # scores["dummy-Accuracy"].append(acc)
        # scores["dummy-F1"].append(f1)
        # scores["dummy-Precision"].append(pre)
        # scores["dummy-Recall"].append(rec)

        #posfeats = posfeats.intersection(set([key for (key,value) in coeffcounter.most_common()[:20]]))
        #negfeats = negfeats.intersection(set([key for (key,value) in coeffcounter.most_common()[-20:]]))

    #print("Pervasive positive: ", posfeats)
    #print("Pervasive negative: ",negfeats)

    #scores = cross_validation.cross_val_score(maxent, features, labels, cv=10)
    #print("--")
    #for key in sorted(scores.keys()):
    #    currentmetric = np.array(scores[key])
        #print("%s : %0.2f (+/- %0.2f)" % (key,currentmetric.mean(), currentmetric.std()))
        #print("%s : %0.2f" % (key,currentmetric.mean()))
    print("%s %.2f (%.2f)" % (variant,np.array(scores["Accuracy"]).mean(),np.array(scores["F1"]).mean()))
    if printcoeffs:

        maxent.fit(features,labels) # fit on everything

        coeffs_total = list(maxent.coef_[0])
        for (key,value) in TotalCoeffCounter.most_common()[:20]:
            print(key,value)
        print("---")
        for (key,value) in TotalCoeffCounter.most_common()[-20:]:
            print(key,value)
开发者ID:hectormartinez,项目名称:verdisandbox,代码行数:73,代码来源:classify_omission_vs_same.py


注:本文中的sklearn.linear_model.logistic.LogisticRegression.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。