当前位置: 首页>>代码示例>>Python>>正文


Python naive_bayes.MultinomialNB方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB方法的典型用法代码示例。如果您正苦于以下问题:Python naive_bayes.MultinomialNB方法的具体用法?Python naive_bayes.MultinomialNB怎么用?Python naive_bayes.MultinomialNB使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes的用法示例。


在下文中一共展示了naive_bayes.MultinomialNB方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_ngram_model

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def create_ngram_model(params=None):
    def preprocessor(tweet):
        global emoticons_replaced
        tweet = tweet.lower()

        for k in emo_repl_order:
            tweet = tweet.replace(k, emo_repl[k])
        for r, repl in re_repl.iteritems():
            tweet = re.sub(r, repl, tweet)

        return tweet

    tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
                                   analyzer="word")
    clf = MultinomialNB()
    pipeline = Pipeline([('tfidf', tfidf_ngrams), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline 
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:23,代码来源:03_clean.py

示例2: create_union_model

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def create_union_model(params=None):
    def preprocessor(tweet):
        tweet = tweet.lower()

        for k in emo_repl_order:
            tweet = tweet.replace(k, emo_repl[k])
        for r, repl in re_repl.iteritems():
            tweet = re.sub(r, repl, tweet)

        return tweet.replace("-", " ").replace("_", " ")

    tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
                                   analyzer="word")
    ling_stats = LinguisticVectorizer()
    all_features = FeatureUnion(
        [('ling', ling_stats), ('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
    #all_features = FeatureUnion([('ling', ling_stats)])
    clf = MultinomialNB()
    pipeline = Pipeline([('all', all_features), ('clf', clf)])

    if params:
        pipeline.set_params(**params)

    return pipeline 
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:27,代码来源:04_sent.py

示例3: test_mnb_prior_unobserved_targets

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_mnb_prior_unobserved_targets():
    # test smoothing of prior for yet unobserved targets

    # Create toy training data
    X = np.array([[0, 1], [1, 0]])
    y = np.array([0, 1])

    clf = MultinomialNB()

    assert_no_warnings(
        clf.partial_fit, X, y, classes=[0, 1, 2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 0

    # add a training example with previously unobserved class
    assert_no_warnings(
        clf.partial_fit, [[1, 1]], [2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 2 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:27,代码来源:test_naive_bayes.py

示例4: test_ovr_fit_predict

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_fit_predict():
    # A classifier which implements decision_function.
    ovr = OneVsRestClassifier(LinearSVC(random_state=0))
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert_equal(len(ovr.estimators_), n_classes)

    clf = LinearSVC(random_state=0)
    pred2 = clf.fit(iris.data, iris.target).predict(iris.data)
    assert_equal(np.mean(iris.target == pred), np.mean(iris.target == pred2))

    # A classifier which implements predict_proba.
    ovr = OneVsRestClassifier(MultinomialNB())
    pred = ovr.fit(iris.data, iris.target).predict(iris.data)
    assert_greater(np.mean(iris.target == pred), 0.65)


# 0.23. warning about tol not having its correct default value. 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:19,代码来源:test_multiclass.py

示例5: test_ovr_multiclass

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_multiclass():
    # Toy dataset where features correspond directly to labels.
    X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]])
    y = ["eggs", "spam", "ham", "eggs", "ham"]
    Y = np.array([[0, 0, 1],
                  [0, 1, 0],
                  [1, 0, 0],
                  [0, 0, 1],
                  [1, 0, 0]])

    classes = set("ham eggs spam".split())

    for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
                     LinearRegression(), Ridge(),
                     ElasticNet()):
        clf = OneVsRestClassifier(base_clf).fit(X, y)
        assert_equal(set(clf.classes_), classes)
        y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
        assert_array_equal(y_pred, ["eggs"])

        # test input as label indicator matrix
        clf = OneVsRestClassifier(base_clf).fit(X, Y)
        y_pred = clf.predict([[0, 0, 4]])[0]
        assert_array_equal(y_pred, [0, 0, 1]) 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:26,代码来源:test_multiclass.py

示例6: test_ovr_multilabel

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_multilabel():
    # Toy dataset where features correspond directly to labels.
    X = np.array([[0, 4, 5], [0, 5, 0], [3, 3, 3], [4, 0, 6], [6, 0, 0]])
    y = np.array([[0, 1, 1],
                  [0, 1, 0],
                  [1, 1, 1],
                  [1, 0, 1],
                  [1, 0, 0]])

    for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
                     LinearRegression(), Ridge(),
                     ElasticNet(), Lasso(alpha=0.5)):
        clf = OneVsRestClassifier(base_clf).fit(X, y)
        y_pred = clf.predict([[0, 4, 4]])[0]
        assert_array_equal(y_pred, [0, 1, 1])
        assert clf.multilabel_ 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:18,代码来源:test_multiclass.py

示例7: test_ovr_single_label_predict_proba

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_single_label_predict_proba():
    base_clf = MultinomialNB(alpha=1)
    X, Y = iris.data, iris.target
    X_train, Y_train = X[:80], Y[:80]
    X_test = X[80:]
    clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)

    # Decision function only estimator.
    decision_only = OneVsRestClassifier(svm.SVR(gamma='scale')
                                        ).fit(X_train, Y_train)
    assert not hasattr(decision_only, 'predict_proba')

    Y_pred = clf.predict(X_test)
    Y_proba = clf.predict_proba(X_test)

    assert_almost_equal(Y_proba.sum(axis=1), 1.0)
    # predict assigns a label if the probability that the
    # sample has the label is greater than 0.5.
    pred = np.array([l.argmax() for l in Y_proba])
    assert not (pred - Y_pred).any() 
开发者ID:PacktPublishing,项目名称:Mastering-Elasticsearch-7.0,代码行数:22,代码来源:test_multiclass.py

示例8: trainNB

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def trainNB(trainX,trainY,testX,testY,samples,limit):
    start = time.clock()
    clf = MultinomialNB()
    clf.fit(trainX[:samples], trainY[:samples])
    print time.clock()-start
    start = time.clock()
    
    predicted = clf.predict(trainX[0:samples])
    print "percent Trained correct: ", percentCorrect(trainY[:samples],predicted)
    print "f-score: ", f1_score(trainY[:samples],predicted)
    metric = precision_recall_fscore_support(trainY[:samples],predicted)
    print "precision: ", metric[0]
    print "recall: ", metric[1]

    predicted = clf.predict(testX[0:limit])
    print "percent Test correct: ", percentCorrect(testY[:limit],predicted)
    print "f-score: ", f1_score(testY[:limit],predicted)
    metric = precision_recall_fscore_support(testY[:limit],predicted)
    print "precision: ", metric[0]
    print "recall: ", metric[1]

    print time.clock()-start
    return clf 
开发者ID:jamespayor,项目名称:vector-homomorphic-encryption,代码行数:25,代码来源:ml.py

示例9: fit_naive_bayes

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def fit_naive_bayes(path, saveto=None, cv=12):

    model = Pipeline([
        ('norm', TextNormalizer()),
        ('tfidf', TfidfVectorizer(tokenizer=identity, lowercase=False)),
        ('clf', MultinomialNB())
    ])

    if saveto is None:
        saveto = "naive_bayes_{}.pkl".format(time.time())

    scores, delta = train_model(path, model, saveto, cv)
    logger.info((
        "naive bayes training took {:0.2f} seconds "
        "with an average score of {:0.3f}"
    ).format(delta, scores.mean())) 
开发者ID:foxbook,项目名称:atap,代码行数:18,代码来源:mp_train.py

示例10: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def __init__(self, df, weight=True, min_ct=0, total_iter=5):
        self.logger = logging.getLogger(__name__)
        super(MultinomialNaiveBayes, self).__init__(total_iterations=total_iter)  # call base constructor
        #self.set_min_count(min_ct)
        self.is_weighted_sample = weight

        # process data
        #df = self._filter_rows(df)  # filter out low count rows
        # row_sums = df.sum(axis=1).astype(float)
        # df = df.div(row_sums, axis=0)  # normalize each row
        # df = df.mul(100)
        # df.to_csv('tmp.nbclf.txt', sep='\t')
        df = df.fillna(df.mean())
        total = df['total']
        df = df[['recurrent missense', 'recurrent indel', 'frame shift',
                 'nonsense', 'missense', 'synonymous', 'inframe indel', 'no protein',
                 'lost stop', 'splicing mutation']]
        df = df.mul(total, axis=0).astype(int)  # get back counts instead of pct
        self.x, self.y = features.randomize(df)

        # setup classifier
        self.clf = MultinomialNB(alpha=1,  # laplacian smooth, i.e. pseudocounts
                                 fit_prior=True)  # use data for prior class probs 
开发者ID:KarchinLab,项目名称:2020plus,代码行数:25,代码来源:multinomial_nb_clf.py

示例11: __init__

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def __init__(self, distributions, weights=None, **kwargs):
        self.models = []
        for dist in distributions:
            dist = NaiveBayesianDistribution.from_string(dist)
            if dist is NaiveBayesianDistribution.GAUSSIAN:
                model = nb.GaussianNB(**kwargs)
            elif dist is NaiveBayesianDistribution.MULTINOMIAL:
                model = nb.MultinomialNB(**kwargs)
            elif dist is NaiveBayesianDistribution.BERNOULLI:
                model = nb.BernoulliNB(**kwargs)
            else:
                raise ValueError('Unknown distribution: {}.'.format(dist))
            kwargs['fit_prior'] = False  # Except the first model.
            self.models.append(model)

        self.weights = weights 
开发者ID:vacancy,项目名称:Jacinle,代码行数:18,代码来源:hybrid_nb.py

示例12: test_model_selection_works

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_selection_works(self):
        for x,y in self.get_multilabel_data_for_tests('dense'):
            parameters = {
                'classifier': [LabelPowerset(), BinaryRelevance()],
                'clusterer': [RandomLabelSpaceClusterer(None, None, False)],
                'clusterer__cluster_size': list(range(2, 3)),
                'clusterer__cluster_count': [3],
                'clusterer__allow_overlap': [False],
                'classifier__classifier': [MultinomialNB()],
                'classifier__classifier__alpha': [0.7, 1.0],
            }

            clf = GridSearchCV(LabelSpacePartitioningClassifier(), parameters, scoring='f1_macro')
            clf.fit(x, y)

            for p in list(parameters.keys()):
                self.assertIn(p, clf.best_params_)

            self.assertIsNotNone(clf.best_score_) 
开发者ID:scikit-multilearn,项目名称:scikit-multilearn,代码行数:21,代码来源:test_base.py

示例13: test_model_calibrated_classifier_cv_float

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_float(self):
        data = load_iris()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVMNB",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVFloat",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:22,代码来源:test_sklearn_calibrated_classifier_cv_converter.py

示例14: test_model_calibrated_classifier_cv_float_nozipmap

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_float_nozipmap(self):
        data = load_iris()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model, "scikit-learn CalibratedClassifierCVMNB",
            [("input", FloatTensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET,
            options={id(model): {'zipmap': False}})
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.float32), model, model_onnx,
            basename="SklearnCalibratedClassifierCVFloatNoZipMap",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')") 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:18,代码来源:test_sklearn_calibrated_classifier_cv_converter.py

示例15: test_model_calibrated_classifier_cv_int

# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_int(self):
        data = load_digits()
        X, y = data.data, data.target
        clf = MultinomialNB().fit(X, y)
        model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
        model_onnx = convert_sklearn(
            model,
            "scikit-learn CalibratedClassifierCVMNB",
            [("input", Int64TensorType([None, X.shape[1]]))],
            target_opset=TARGET_OPSET
        )
        self.assertTrue(model_onnx is not None)
        dump_data_and_model(
            X.astype(np.int64),
            model,
            model_onnx,
            basename="SklearnCalibratedClassifierCVInt-Dec4",
            allow_failure="StrictVersion(onnxruntime.__version__)"
            "<= StrictVersion('0.2.1')",
        ) 
开发者ID:onnx,项目名称:sklearn-onnx,代码行数:22,代码来源:test_sklearn_calibrated_classifier_cv_converter.py


注:本文中的sklearn.naive_bayes.MultinomialNB方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。