当前位置: 首页>>代码示例>>Python>>正文

Python naive_bayes.BernoulliNB方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.BernoulliNB方法的典型用法代码示例。如果您正苦于以下问题:Python naive_bayes.BernoulliNB方法的具体用法?Python naive_bayes.BernoulliNB怎么用?Python naive_bayes.BernoulliNB使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes的用法示例。


示例1: test_feature_log_prob_bnb

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_feature_log_prob_bnb():
    # Test for issue #4268.
    # Tests that the feature log prob value computed by BernoulliNB when
    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
    # and Schuetze's "Introduction to Information Retrieval" book:
    # https://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

    X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
    Y = np.array([0, 0, 1, 2, 2])

    # Fit Bernoulli NB w/ alpha = 1.0
    clf = BernoulliNB(alpha=1.0)
    clf.fit(X, Y)

    # Manually form the (log) numerator and denominator that
    # constitute P(feature presence | class)
    num = np.log(clf.feature_count_ + 1.0)
    denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

    # Check manual estimate matches
    assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) 

示例2: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def __init__(self, distributions, weights=None, **kwargs):
        self.models = []
        for dist in distributions:
            dist = NaiveBayesianDistribution.from_string(dist)
            if dist is NaiveBayesianDistribution.GAUSSIAN:
                model = nb.GaussianNB(**kwargs)
            elif dist is NaiveBayesianDistribution.MULTINOMIAL:
                model = nb.MultinomialNB(**kwargs)
            elif dist is NaiveBayesianDistribution.BERNOULLI:
                model = nb.BernoulliNB(**kwargs)
                raise ValueError('Unknown distribution: {}.'.format(dist))
            kwargs['fit_prior'] = False  # Except the first model.

        self.weights = weights 

示例3: test_discretenb_pickle

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_discretenb_pickle():
    # Test picklability of discrete naive Bayes classifiers

    for cls in [BernoulliNB, MultinomialNB, GaussianNB]:
        clf = cls().fit(X2, y2)
        y_pred = clf.predict(X2)

        store = BytesIO()
        pickle.dump(clf, store)
        clf = pickle.load(BytesIO(store.getvalue()))

        assert_array_equal(y_pred, clf.predict(X2))

        if cls is not GaussianNB:
            # TODO re-enable me when partial_fit is implemented for GaussianNB

            # Test pickling of estimator trained with partial_fit
            clf2 = cls().partial_fit(X2[:3], y2[:3], classes=np.unique(y2))
            clf2.partial_fit(X2[3:], y2[3:])
            store = BytesIO()
            pickle.dump(clf2, store)
            clf2 = pickle.load(BytesIO(store.getvalue()))
            assert_array_equal(y_pred, clf2.predict(X2)) 

示例4: test_input_check_partial_fit

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_input_check_partial_fit():
    for cls in [BernoulliNB, MultinomialNB]:
        # check shape consistency
        assert_raises(ValueError, cls().partial_fit, X2, y2[:-1],

        # classes is required for first call to partial fit
        assert_raises(ValueError, cls().partial_fit, X2, y2)

        # check consistency of consecutive classes values
        clf = cls()
        clf.partial_fit(X2, y2, classes=np.unique(y2))
        assert_raises(ValueError, clf.partial_fit, X2, y2,

        # check consistency of input shape for partial_fit
        assert_raises(ValueError, clf.partial_fit, X2[:, :-1], y2)

        # check consistency of input shape for predict
        assert_raises(ValueError, clf.predict, X2[:, :-1]) 

示例5: test_discretenb_provide_prior_with_partial_fit

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_discretenb_provide_prior_with_partial_fit():
    # Test whether discrete NB classes use provided prior
    # when using partial_fit

    iris = load_iris()
    iris_data1, iris_data2, iris_target1, iris_target2 = train_test_split(
        iris.data, iris.target, test_size=0.4, random_state=415)

    for cls in [BernoulliNB, MultinomialNB]:
        for prior in [None, [0.3, 0.3, 0.4]]:
            clf_full = cls(class_prior=prior)
            clf_full.fit(iris.data, iris.target)
            clf_partial = cls(class_prior=prior)
            clf_partial.partial_fit(iris_data1, iris_target1,
                                    classes=[0, 1, 2])
            clf_partial.partial_fit(iris_data2, iris_target2)

示例6: test_feature_log_prob_bnb

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_feature_log_prob_bnb():
    # Test for issue #4268.
    # Tests that the feature log prob value computed by BernoulliNB when
    # alpha=1.0 is equal to the expression given in Manning, Raghavan,
    # and Schuetze's "Introduction to Information Retrieval" book:
    # http://nlp.stanford.edu/IR-book/html/htmledition/the-bernoulli-model-1.html

    X = np.array([[0, 0, 0], [1, 1, 0], [0, 1, 0], [1, 0, 1], [0, 1, 0]])
    Y = np.array([0, 0, 1, 2, 2])

    # Fit Bernoulli NB w/ alpha = 1.0
    clf = BernoulliNB(alpha=1.0)
    clf.fit(X, Y)

    # Manually form the (log) numerator and denominator that
    # constitute P(feature presence | class)
    num = np.log(clf.feature_count_ + 1.0)
    denom = np.tile(np.log(clf.class_count_ + 2.0), (X.shape[1], 1)).T

    # Check manual estimate matches
    assert_array_almost_equal(clf.feature_log_prob_, (num - denom)) 

示例7: test_discrete_prior

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_discrete_prior():
    # Test whether class priors are properly set.
    for cls in [BernoulliNB, MultinomialNB]:
        clf = cls().fit(X2, y2)
        assert_array_almost_equal(np.log(np.array([2, 2, 2]) / 6.0),
                                  clf.class_log_prior_, 8) 

示例8: test_discretenb_predict_proba

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_discretenb_predict_proba():
    # Test discrete NB classes' probability scores

    # The 100s below distinguish Bernoulli from multinomial.
    # FIXME: write a test to show this.
    X_bernoulli = [[1, 100, 0], [0, 1, 0], [0, 100, 1]]
    X_multinomial = [[0, 1], [1, 3], [4, 0]]

    # test binary case (1-d output)
    y = [0, 0, 2]   # 2 is regression test for binary case, 02e673
    for cls, X in zip([BernoulliNB, MultinomialNB],
                      [X_bernoulli, X_multinomial]):
        clf = cls().fit(X, y)
        assert_equal(clf.predict(X[-1:]), 2)
        assert_equal(clf.predict_proba([X[0]]).shape, (1, 2))
                                  np.array([1., 1.]), 6)

    # test multiclass case (2-d output, must sum to one)
    y = [0, 1, 2]
    for cls, X in zip([BernoulliNB, MultinomialNB],
                      [X_bernoulli, X_multinomial]):
        clf = cls().fit(X, y)
        assert_equal(clf.predict_proba(X[0:1]).shape, (1, 3))
        assert_equal(clf.predict_proba(X[:2]).shape, (2, 3))
        assert_almost_equal(np.sum(clf.predict_proba([X[1]])), 1)
        assert_almost_equal(np.sum(clf.predict_proba([X[-1]])), 1)
        assert_almost_equal(np.sum(np.exp(clf.class_log_prior_)), 1)
        assert_almost_equal(np.sum(np.exp(clf.intercept_)), 1) 

示例9: test_coef_intercept_shape

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_coef_intercept_shape():
    # coef_ and intercept_ should have shapes as in other linear models.
    # Non-regression test for issue #2127.
    X = [[1, 0, 0], [1, 1, 1]]
    y = [1, 2]  # binary classification

    for clf in [MultinomialNB(), BernoulliNB()]:
        clf.fit(X, y)
        assert_equal(clf.coef_.shape, (1, 3))
        assert_equal(clf.intercept_.shape, (1,)) 

示例10: test_check_accuracy_on_digits

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_check_accuracy_on_digits():
    # Non regression test to make sure that any further refactoring / optim
    # of the NB models do not harm the performance on a slightly non-linearly
    # separable dataset
    digits = load_digits()
    X, y = digits.data, digits.target
    binary_3v8 = np.logical_or(digits.target == 3, digits.target == 8)
    X_3v8, y_3v8 = X[binary_3v8], y[binary_3v8]

    # Multinomial NB
    scores = cross_val_score(MultinomialNB(alpha=10), X, y, cv=10)
    assert_greater(scores.mean(), 0.86)

    scores = cross_val_score(MultinomialNB(alpha=10), X_3v8, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.94)

    # Bernoulli NB
    scores = cross_val_score(BernoulliNB(alpha=10), X > 4, y, cv=10)
    assert_greater(scores.mean(), 0.83)

    scores = cross_val_score(BernoulliNB(alpha=10), X_3v8 > 4, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.92)

    # Gaussian NB
    scores = cross_val_score(GaussianNB(), X, y, cv=10)
    assert_greater(scores.mean(), 0.77)

    scores = cross_val_score(GaussianNB(var_smoothing=0.1), X, y, cv=10)
    assert_greater(scores.mean(), 0.89)

    scores = cross_val_score(GaussianNB(), X_3v8, y_3v8, cv=10)
    assert_greater(scores.mean(), 0.86) 

示例11: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def __init__(self, info, verbose=True, debug_mode=False):
        self.task = info['task']
        self.metric = info['metric']
        self.postprocessor = None
        #self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=True) # To calibrate proba
        self.postprocessor = MultiLabelEnsemble(LogisticRegression(), balance=False) # To calibrate proba
        if debug_mode>=2:
            self.name = "RandomPredictor"
            self.model = RandomPredictor(self.target_num)
            self.predict_method = self.model.predict_proba 
        if info['task']=='regression':
            if info['is_sparse']==True:
                self.name = "BaggingRidgeRegressor"
                self.model = BaggingRegressor(base_estimator=Ridge(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...
                self.name = "GradientBoostingRegressor"
                self.model = GradientBoostingRegressor(n_estimators=1, verbose=verbose, warm_start = True)
            self.predict_method = self.model.predict # Always predict probabilities
            if info['has_categorical']: # Out of lazziness, we do not convert categorical variables...
                self.name = "RandomForestClassifier"
                self.model = RandomForestClassifier(n_estimators=1, verbose=verbose) # unfortunately, no warm start...
            elif info['is_sparse']:                
                self.name = "BaggingNBClassifier"
                self.model = BaggingClassifier(base_estimator=BernoulliNB(), n_estimators=1, verbose=verbose) # unfortunately, no warm start...                          
                self.name = "GradientBoostingClassifier"
                self.model = eval(self.name + "(n_estimators=1, verbose=" + str(verbose) + ", min_samples_split=10, random_state=1, warm_start = True)")
            if info['task']=='multilabel.classification':
                self.model = MultiLabelEnsemble(self.model)
            self.predict_method = self.model.predict_proba 

示例12: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def __init__(self, alpha=1.0, binarize=0.0, fit_prior=True, class_prior=None):
        self._hyperparams = {
            'alpha': alpha,
            'binarize': binarize,
            'fit_prior': fit_prior,
            'class_prior': class_prior}
        self._wrapped_model = Op(**self._hyperparams) 

示例13: test_export_random_ind

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_export_random_ind():
    """Assert that the TPOTClassifier can generate the same pipeline export with random seed of 39."""
    tpot_obj = TPOTClassifier(random_state=39, config_dict="TPOT light")
    tpot_obj._pbar = tqdm(total=1, disable=True)
    pipeline = tpot_obj._toolbox.individual()
    expected_code = """import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import BernoulliNB

# NOTE: Make sure that the outcome column is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64)
features = tpot_data.drop('target', axis=1)
training_features, testing_features, training_target, testing_target = \\
            train_test_split(features, tpot_data['target'], random_state=39)

exported_pipeline = BernoulliNB(alpha=1.0, fit_prior=False)
# Fix random state in exported estimator
if hasattr(exported_pipeline, 'random_state'):
    setattr(exported_pipeline, 'random_state', 39)

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
    exported_code = export_pipeline(pipeline, tpot_obj.operators, tpot_obj._pset, random_state=tpot_obj.random_state)
    assert expected_code == exported_code 

示例14: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def __init__(self, options):

        out_params = convert_params(
            options.get('params', {}),
            floats=['alpha', 'binarize'],

        self.estimator = _BernoulliNB(**out_params) 

示例15: test_BernoulliNB

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import BernoulliNB [as 别名]
def test_BernoulliNB(self):
