本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB方法的典型用法代码示例。如果您正苦于以下问题:Python naive_bayes.MultinomialNB方法的具体用法?Python naive_bayes.MultinomialNB怎么用?Python naive_bayes.MultinomialNB使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes
的用法示例。
在下文中一共展示了naive_bayes.MultinomialNB方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_ngram_model
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def create_ngram_model(params=None):
def preprocessor(tweet):
global emoticons_replaced
tweet = tweet.lower()
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
for r, repl in re_repl.iteritems():
tweet = re.sub(r, repl, tweet)
return tweet
tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
analyzer="word")
clf = MultinomialNB()
pipeline = Pipeline([('tfidf', tfidf_ngrams), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:23,代码来源:03_clean.py
示例2: create_union_model
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def create_union_model(params=None):
def preprocessor(tweet):
tweet = tweet.lower()
for k in emo_repl_order:
tweet = tweet.replace(k, emo_repl[k])
for r, repl in re_repl.iteritems():
tweet = re.sub(r, repl, tweet)
return tweet.replace("-", " ").replace("_", " ")
tfidf_ngrams = TfidfVectorizer(preprocessor=preprocessor,
analyzer="word")
ling_stats = LinguisticVectorizer()
all_features = FeatureUnion(
[('ling', ling_stats), ('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('tfidf', tfidf_ngrams)])
#all_features = FeatureUnion([('ling', ling_stats)])
clf = MultinomialNB()
pipeline = Pipeline([('all', all_features), ('clf', clf)])
if params:
pipeline.set_params(**params)
return pipeline
开发者ID:PacktPublishing,项目名称:Building-Machine-Learning-Systems-With-Python-Second-Edition,代码行数:27,代码来源:04_sent.py
示例3: test_mnb_prior_unobserved_targets
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_mnb_prior_unobserved_targets():
# test smoothing of prior for yet unobserved targets
# Create toy training data
X = np.array([[0, 1], [1, 0]])
y = np.array([0, 1])
clf = MultinomialNB()
assert_no_warnings(
clf.partial_fit, X, y, classes=[0, 1, 2]
)
assert clf.predict([[0, 1]]) == 0
assert clf.predict([[1, 0]]) == 1
assert clf.predict([[1, 1]]) == 0
# add a training example with previously unobserved class
assert_no_warnings(
clf.partial_fit, [[1, 1]], [2]
)
assert clf.predict([[0, 1]]) == 0
assert clf.predict([[1, 0]]) == 1
assert clf.predict([[1, 1]]) == 2
示例4: test_ovr_fit_predict
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_fit_predict():
# A classifier which implements decision_function.
ovr = OneVsRestClassifier(LinearSVC(random_state=0))
pred = ovr.fit(iris.data, iris.target).predict(iris.data)
assert_equal(len(ovr.estimators_), n_classes)
clf = LinearSVC(random_state=0)
pred2 = clf.fit(iris.data, iris.target).predict(iris.data)
assert_equal(np.mean(iris.target == pred), np.mean(iris.target == pred2))
# A classifier which implements predict_proba.
ovr = OneVsRestClassifier(MultinomialNB())
pred = ovr.fit(iris.data, iris.target).predict(iris.data)
assert_greater(np.mean(iris.target == pred), 0.65)
# 0.23. warning about tol not having its correct default value.
示例5: test_ovr_multiclass
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_multiclass():
# Toy dataset where features correspond directly to labels.
X = np.array([[0, 0, 5], [0, 5, 0], [3, 0, 0], [0, 0, 6], [6, 0, 0]])
y = ["eggs", "spam", "ham", "eggs", "ham"]
Y = np.array([[0, 0, 1],
[0, 1, 0],
[1, 0, 0],
[0, 0, 1],
[1, 0, 0]])
classes = set("ham eggs spam".split())
for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
LinearRegression(), Ridge(),
ElasticNet()):
clf = OneVsRestClassifier(base_clf).fit(X, y)
assert_equal(set(clf.classes_), classes)
y_pred = clf.predict(np.array([[0, 0, 4]]))[0]
assert_array_equal(y_pred, ["eggs"])
# test input as label indicator matrix
clf = OneVsRestClassifier(base_clf).fit(X, Y)
y_pred = clf.predict([[0, 0, 4]])[0]
assert_array_equal(y_pred, [0, 0, 1])
示例6: test_ovr_multilabel
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_multilabel():
# Toy dataset where features correspond directly to labels.
X = np.array([[0, 4, 5], [0, 5, 0], [3, 3, 3], [4, 0, 6], [6, 0, 0]])
y = np.array([[0, 1, 1],
[0, 1, 0],
[1, 1, 1],
[1, 0, 1],
[1, 0, 0]])
for base_clf in (MultinomialNB(), LinearSVC(random_state=0),
LinearRegression(), Ridge(),
ElasticNet(), Lasso(alpha=0.5)):
clf = OneVsRestClassifier(base_clf).fit(X, y)
y_pred = clf.predict([[0, 4, 4]])[0]
assert_array_equal(y_pred, [0, 1, 1])
assert clf.multilabel_
示例7: test_ovr_single_label_predict_proba
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_ovr_single_label_predict_proba():
base_clf = MultinomialNB(alpha=1)
X, Y = iris.data, iris.target
X_train, Y_train = X[:80], Y[:80]
X_test = X[80:]
clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)
# Decision function only estimator.
decision_only = OneVsRestClassifier(svm.SVR(gamma='scale')
).fit(X_train, Y_train)
assert not hasattr(decision_only, 'predict_proba')
Y_pred = clf.predict(X_test)
Y_proba = clf.predict_proba(X_test)
assert_almost_equal(Y_proba.sum(axis=1), 1.0)
# predict assigns a label if the probability that the
# sample has the label is greater than 0.5.
pred = np.array([l.argmax() for l in Y_proba])
assert not (pred - Y_pred).any()
示例8: trainNB
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def trainNB(trainX,trainY,testX,testY,samples,limit):
start = time.clock()
clf = MultinomialNB()
clf.fit(trainX[:samples], trainY[:samples])
print time.clock()-start
start = time.clock()
predicted = clf.predict(trainX[0:samples])
print "percent Trained correct: ", percentCorrect(trainY[:samples],predicted)
print "f-score: ", f1_score(trainY[:samples],predicted)
metric = precision_recall_fscore_support(trainY[:samples],predicted)
print "precision: ", metric[0]
print "recall: ", metric[1]
predicted = clf.predict(testX[0:limit])
print "percent Test correct: ", percentCorrect(testY[:limit],predicted)
print "f-score: ", f1_score(testY[:limit],predicted)
metric = precision_recall_fscore_support(testY[:limit],predicted)
print "precision: ", metric[0]
print "recall: ", metric[1]
print time.clock()-start
return clf
示例9: fit_naive_bayes
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def fit_naive_bayes(path, saveto=None, cv=12):
model = Pipeline([
('norm', TextNormalizer()),
('tfidf', TfidfVectorizer(tokenizer=identity, lowercase=False)),
('clf', MultinomialNB())
])
if saveto is None:
saveto = "naive_bayes_{}.pkl".format(time.time())
scores, delta = train_model(path, model, saveto, cv)
logger.info((
"naive bayes training took {:0.2f} seconds "
"with an average score of {:0.3f}"
).format(delta, scores.mean()))
示例10: __init__
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def __init__(self, df, weight=True, min_ct=0, total_iter=5):
self.logger = logging.getLogger(__name__)
super(MultinomialNaiveBayes, self).__init__(total_iterations=total_iter) # call base constructor
#self.set_min_count(min_ct)
self.is_weighted_sample = weight
# process data
#df = self._filter_rows(df) # filter out low count rows
# row_sums = df.sum(axis=1).astype(float)
# df = df.div(row_sums, axis=0) # normalize each row
# df = df.mul(100)
# df.to_csv('tmp.nbclf.txt', sep='\t')
df = df.fillna(df.mean())
total = df['total']
df = df[['recurrent missense', 'recurrent indel', 'frame shift',
'nonsense', 'missense', 'synonymous', 'inframe indel', 'no protein',
'lost stop', 'splicing mutation']]
df = df.mul(total, axis=0).astype(int) # get back counts instead of pct
self.x, self.y = features.randomize(df)
# setup classifier
self.clf = MultinomialNB(alpha=1, # laplacian smooth, i.e. pseudocounts
fit_prior=True) # use data for prior class probs
示例11: __init__
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def __init__(self, distributions, weights=None, **kwargs):
self.models = []
for dist in distributions:
dist = NaiveBayesianDistribution.from_string(dist)
if dist is NaiveBayesianDistribution.GAUSSIAN:
model = nb.GaussianNB(**kwargs)
elif dist is NaiveBayesianDistribution.MULTINOMIAL:
model = nb.MultinomialNB(**kwargs)
elif dist is NaiveBayesianDistribution.BERNOULLI:
model = nb.BernoulliNB(**kwargs)
else:
raise ValueError('Unknown distribution: {}.'.format(dist))
kwargs['fit_prior'] = False # Except the first model.
self.models.append(model)
self.weights = weights
示例12: test_model_selection_works
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_selection_works(self):
for x,y in self.get_multilabel_data_for_tests('dense'):
parameters = {
'classifier': [LabelPowerset(), BinaryRelevance()],
'clusterer': [RandomLabelSpaceClusterer(None, None, False)],
'clusterer__cluster_size': list(range(2, 3)),
'clusterer__cluster_count': [3],
'clusterer__allow_overlap': [False],
'classifier__classifier': [MultinomialNB()],
'classifier__classifier__alpha': [0.7, 1.0],
}
clf = GridSearchCV(LabelSpacePartitioningClassifier(), parameters, scoring='f1_macro')
clf.fit(x, y)
for p in list(parameters.keys()):
self.assertIn(p, clf.best_params_)
self.assertIsNotNone(clf.best_score_)
示例13: test_model_calibrated_classifier_cv_float
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_float(self):
data = load_iris()
X, y = data.data, data.target
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
model,
"scikit-learn CalibratedClassifierCVMNB",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32),
model,
model_onnx,
basename="SklearnCalibratedClassifierCVFloat",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)
示例14: test_model_calibrated_classifier_cv_float_nozipmap
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_float_nozipmap(self):
data = load_iris()
X, y = data.data, data.target
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
model, "scikit-learn CalibratedClassifierCVMNB",
[("input", FloatTensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET,
options={id(model): {'zipmap': False}})
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.float32), model, model_onnx,
basename="SklearnCalibratedClassifierCVFloatNoZipMap",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')")
示例15: test_model_calibrated_classifier_cv_int
# 需要导入模块: from sklearn import naive_bayes [as 别名]
# 或者: from sklearn.naive_bayes import MultinomialNB [as 别名]
def test_model_calibrated_classifier_cv_int(self):
data = load_digits()
X, y = data.data, data.target
clf = MultinomialNB().fit(X, y)
model = CalibratedClassifierCV(clf, cv=2, method="sigmoid").fit(X, y)
model_onnx = convert_sklearn(
model,
"scikit-learn CalibratedClassifierCVMNB",
[("input", Int64TensorType([None, X.shape[1]]))],
target_opset=TARGET_OPSET
)
self.assertTrue(model_onnx is not None)
dump_data_and_model(
X.astype(np.int64),
model,
model_onnx,
basename="SklearnCalibratedClassifierCVInt-Dec4",
allow_failure="StrictVersion(onnxruntime.__version__)"
"<= StrictVersion('0.2.1')",
)