当前位置: 首页>>代码示例>>Python>>正文


Python SGDClassifier.predict_log_proba方法代码示例

本文整理汇总了Python中sklearn.linear_model.SGDClassifier.predict_log_proba方法的典型用法代码示例。如果您正苦于以下问题:Python SGDClassifier.predict_log_proba方法的具体用法?Python SGDClassifier.predict_log_proba怎么用?Python SGDClassifier.predict_log_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.linear_model.SGDClassifier的用法示例。


在下文中一共展示了SGDClassifier.predict_log_proba方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_sgd_proba

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict_log_proba [as 别名]
    def test_sgd_proba(self):
        """Check SGD.predict_proba"""

        # Hinge loss does not allow for conditional prob estimate.
        # We cannot use the factory here, because it defines predict_proba
        # anyway.
        clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=10).fit(X, Y)
        assert_false(hasattr(clf, "predict_proba"))
        assert_false(hasattr(clf, "predict_log_proba"))

        # log and modified_huber losses can output probability estimates
        # binary case
        for loss in ["log", "modified_huber"]:
            clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
            clf.fit(X, Y)
            p = clf.predict_proba([3, 2])
            assert_true(p[0, 1] > 0.5)
            p = clf.predict_proba([-1, -1])
            assert_true(p[0, 1] < 0.5)

            p = clf.predict_log_proba([3, 2])
            assert_true(p[0, 1] > p[0, 0])
            p = clf.predict_log_proba([-1, -1])
            assert_true(p[0, 1] < p[0, 0])

        # log loss multiclass probability estimates
        clf = self.factory(loss="log", alpha=0.01, n_iter=10).fit(X2, Y2)

        d = clf.decision_function([[0.1, -0.1], [0.3, 0.2]])
        p = clf.predict_proba([[0.1, -0.1], [0.3, 0.2]])
        assert_array_equal(np.argmax(p, axis=1), np.argmax(d, axis=1))
        assert_almost_equal(p[0].sum(), 1)
        assert_true(np.all(p[0] >= 0))

        p = clf.predict_proba([-1, -1])
        d = clf.decision_function([-1, -1])
        assert_array_equal(np.argsort(p[0]), np.argsort(d[0]))

        l = clf.predict_log_proba([3, 2])
        p = clf.predict_proba([3, 2])
        assert_array_almost_equal(np.log(p), l)

        l = clf.predict_log_proba([-1, -1])
        p = clf.predict_proba([-1, -1])
        assert_array_almost_equal(np.log(p), l)

        # Modified Huber multiclass probability estimates; requires a separate
        # test because the hard zero/one probabilities may destroy the
        # ordering present in decision_function output.
        clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10)
        clf.fit(X2, Y2)
        d = clf.decision_function([3, 2])
        p = clf.predict_proba([3, 2])
        if not isinstance(self, SparseSGDClassifierTestCase):
            assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1))
        else:  # XXX the sparse test gets a different X2 (?)
            assert_equal(np.argmin(d, axis=1), np.argmin(p, axis=1))

        # the following sample produces decision_function values < -1,
        # which would cause naive normalization to fail (see comment
        # in SGDClassifier.predict_proba)
        x = X.mean(axis=0)
        d = clf.decision_function(x)
        if np.all(d < -1):  # XXX not true in sparse test case (why?)
            p = clf.predict_proba(x)
            assert_array_almost_equal(p[0], [1 / 3.0] * 3)
开发者ID:richlewis42,项目名称:scikit-learn,代码行数:68,代码来源:test_sgd.py

示例2: __init__

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict_log_proba [as 别名]
class SA_SGDClassifier:
    def __init__(self, train_data_percentage=70, count_vector_type='normal',
                 data_base_path=None, total_entries=1000, starting_pos=0):
        self.test_values = None
        self.training_data = None
        self.test_data_files = None
        self.train_data_percentage =train_data_percentage
        self.data_base_path = data_base_path
        self.starting_pos = starting_pos
        self.total_entries = total_entries
        self.counts = None
        self.classifier = None
        if count_vector_type == 'normal':
            self.count_vectorizer = CountVectorizer()
        else:
            # just put anything different than 'normal' count_vector_type while initializing
            self.count_vectorizer = CountVectorizer(ngram_range=(1,  2))
        self.tf_idf = TfidfTransformer()
        self.initialized = False
        self.trained = False
        self.initialize()

    def initialize(self):
        max_training_datapoints = int(self.total_entries * (self.train_data_percentage/100))
        if self.total_entries - max_training_datapoints < self.starting_pos:
            print('incorrect parameters provided check starting_pos and training_percentage')
            return
        temp = preprocessing.process_raw_data(max_training_datapoints=max_training_datapoints,
                                              starting_pos=self.starting_pos)
        self.training_data = temp[0]
        self.test_data_files = temp[1]
        self.counts = self.count_vectorizer.fit_transform(self.training_data['text'].values)
        self.initialized = True

    def train(self):
        if not self.initialized:
            print('classifier not initialized')
            return
        from sklearn.linear_model import SGDClassifier
        self.classifier = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42)
        targets = self.training_data['class'].values
        counts_tf_idf = self.tf_idf.fit_transform(self.counts)
        self.classifier.fit(counts_tf_idf, targets)
        print('classifier was trained from', len(self.training_data['text']), 'entries')
        self.trained = True

    def predict_from_test_data(self):
        if not self.initialized or not self.trained:
            print('classifier not initialized or not trained')
            return
        base_path= './txt_sentoken/'
        test_reviews = []
        true_outputs = []
        for polarity in self.test_data_files:
            for file_name in self.test_data_files[polarity]:
                file = open(base_path+polarity+'/'+file_name,'r')
                file_content = file.read().splitlines()
                file.close()
                test_reviews.append(" ".join(file_content))
                true_outputs.append(polarity)

        test_reviews_count = self.count_vectorizer.transform(test_reviews)
        print('total reviews analyzed(test data):', len(2 * self.test_data_files['neg']))
        predictions = self.classifier.predict(test_reviews_count)
        print('f1 score for negative polarity:', f1_score(true_outputs, predictions, pos_label='neg'))
        print('f1 score for positive polarity:', f1_score(true_outputs, predictions, pos_label='pos'))
        print('mean accuracy on test data:', self.classifier.score(test_reviews_count, true_outputs))

    def predict_from_user_string(self, string=None, true_output=None):
        if not self.initialized or not self.trained:
            print('classifier not initialized or not trained')
            return
        if string is None:
            string = input(prompt='Enter the string:\n')

        string_list = [string]
        string_count = self.count_vectorizer.transform(string_list)
        predicted_polarity = self.classifier.predict(string_count)
        if true_output is not None:
            if true_output == predicted_polarity:
                print('correct prediction :)')
            else:
                print('incorrect prediction :(')
        print('predicted polarity:', predicted_polarity)
        print('probability estimate', self.classifier.predict_proba(string_count))
        print('log probability estimate', self.classifier.predict_log_proba(string_count))
开发者ID:codebuff,项目名称:sentiment-analysis,代码行数:88,代码来源:sa_sgd_classifier.py

示例3: get_minibatch

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict_log_proba [as 别名]
clf = clf.partial_fit(X_val, y_val)

X_test, y_test = get_minibatch(doc_stream, size=5000)
X_test = vect.transform(X_test)
print('Accuracy: %.3f' % clf.score(X_test, y_test))
clf = clf.partial_fit(X_test, y_test)

# Serializing
import pickle, os

# 1. On cree un nouveau répertoire pour sauvegarder nos données
dest = os.path.join('movieclassifier', 'pkl_objects')
if not os.path.exists(dest):
    os.makedirs(dest)

# On sérialise notre classifier ainsi que nos stop words
stop = stopwords.words('english')

pickle.dump(stop,
            open(os.path.join(dest, 'stopwords.pkl'), 'wb'),
            protocol=4)
pickle.dump(clf,
            open(os.path.join(dest, 'classifier.pkl'), 'wb'),
            protocol=4)

# Predicting
x = "This is a stupid movies,I will not recomend it"
label = {0: 'negative', 1: "positive"}
X = vect.transform(x)
print('Prediction %s\nProbability : %.2f%%' % (label[clf.predict(X)[0]], np.max(clf.predict_log_proba(X) * 100)))
开发者ID:louishenrifranc,项目名称:MachineLearning,代码行数:32,代码来源:OutofCore.py

示例4: get_positive_c

# 需要导入模块: from sklearn.linear_model import SGDClassifier [as 别名]
# 或者: from sklearn.linear_model.SGDClassifier import predict_log_proba [as 别名]
pred = clf.predict(test4) 
np.mean(pred == d['test']['labs'])
metrics.confusion_matrix(d['test']['labs'], pred)

# -- Testing on particular people ---

def get_positive_c(rdd_c):
    return rdd_c.map(lambda x: _.flatten([x['msg']])[0]).collect()

rdd_c = rdd.filter(lambda x: x['user'] == 'TheBrosnanDigest' and x['board_id'] == '18971').cache()
msgs  = get_positive_c(rdd_c)
test2 = vect.transform(msgs)
test3 = ch2.transform(test2)
test4 = tfidf.transform(test3)
pred  = np.array([np.exp(x) for x in clf.predict_log_proba(test4)])

pred_ = sorted(zip(msgs, [p[0] for p in pred]), key = lambda x: x[1], reverse = True)
pprint(pred_[0:10])

# --- Saving Model ----
model = {
   'vect'  : vect,
   'ch2'   : ch2, 
   'tfidf' : tfidf,
   'clf'   : clf
}
pickle.dump(model, open('models/triclass_model_20150826_1200.pickle', 'wb'))


开发者ID:gophronesis,项目名称:penny,代码行数:29,代码来源:tri_class_hv_classifier.py


注:本文中的sklearn.linear_model.SGDClassifier.predict_log_proba方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。