本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.predict方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.predict方法的具体用法?Python MultinomialNB.predict怎么用?Python MultinomialNB.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes.MultinomialNB
的用法示例。
在下文中一共展示了MultinomialNB.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: multinomialNB
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def multinomialNB(devMatrix, trainMatrix, devtarget, traintarget):
f = open('MNNB2.log', 'a')
f.write("Making model!!!!!")
print 'Making model!'
clf = MultinomialNB(alpha=1, fit_prior=False)
clf.fit(trainMatrix, traintarget)
f.write("\n")
value = ('Model: multinomial bayes with parameters ',clf.get_params(False))
print (str(value))
f.write(str(value))
f.write("\n")
f.write("MSE for train: %.2f" % np.mean((clf.predict(trainMatrix) - traintarget) ** 2))
score = clf.score(trainMatrix, traintarget)
f.write("\n")
value = ('Score for train %.2f', score)
f.write("\n")
f.write("MSE for dev: %.2f" % np.mean((clf.predict(devMatrix) - devtarget) ** 2))
score = clf.score(devMatrix, devtarget)
value = ('Score for dev %.2f', score)
print(str(value))
f.write("\n")
s = str(value)
f.write(s)
f.write("\n")
f.write('model done')
f.write("\n")
f.write("\n")
f.close()
return score
示例2: RunNBCScikit
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def RunNBCScikit(q):
totalTimer = Timer()
Log.Info("Loading dataset", self.verbose)
# Load train and test dataset.
trainData = np.genfromtxt(self.dataset[0], delimiter=',')
testData = np.genfromtxt(self.dataset[1], delimiter=',')
# Labels are the last row of the training set.
labels = trainData[:, (trainData.shape[1] - 1)]
trainData = trainData[:,:-1]
try:
with totalTimer:
# Create and train the classifier.
nbc = MultinomialNB()
nbc.fit(trainData, labels)
# Run Naive Bayes Classifier on the test dataset.
nbc.predict(testData)
except Exception as e:
q.put(-1)
return -1
time = totalTimer.ElapsedTime()
q.put(time)
return time
示例3: test_mnb_prior_unobserved_targets
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def test_mnb_prior_unobserved_targets():
# test smoothing of prior for yet unobserved targets
# Create toy training data
X = np.array([[0, 1], [1, 0]])
y = np.array([0, 1])
clf = MultinomialNB()
assert_no_warnings(
clf.partial_fit, X, y, classes=[0, 1, 2]
)
assert clf.predict([[0, 1]]) == 0
assert clf.predict([[1, 0]]) == 1
assert clf.predict([[1, 1]]) == 0
# add a training example with previously unobserved class
assert_no_warnings(
clf.partial_fit, [[1, 1]], [2]
)
assert clf.predict([[0, 1]]) == 0
assert clf.predict([[1, 0]]) == 1
assert clf.predict([[1, 1]]) == 2
示例4: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class NaiveBayes:
def __init__(self):
self.clf = MultinomialNB()
self.pattern ='(?u)\\b[A-Za-z]{3,}'
self.tfidf = TfidfVectorizer(sublinear_tf=False, use_idf=True, smooth_idf=True, stop_words='english', token_pattern=self.pattern, ngram_range=(2,2))
def train(self,fileName):
print "Naive Bayes classifier is being trained"
table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
X_train = self.tfidf.fit_transform(table.message)
Y_train = []
for item in table.cat:
Y_train.append(int(item))
self.clf.fit(X_train, Y_train)
self.clf.fit(X_train, Y_train)
print "Naive Bayes classifier has been trained"
def classify(self,cFileName, rFileName):
table = pandas.read_table(cFileName, names=["message"])
X_test = self.tfidf.transform(table.message)
print "Data have been classified"
with open(rFileName,'w') as f:
for item in self.clf.predict(X_test).astype(str):
f.write(item+'\n')
def validate(self,fileName):
table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
X_validate = self.tfidf.transform(table.message)
Y_validated = self.clf.predict(X_validate).astype(str)
totalNum = len(table.cat)
errorCount = 0
for i in range(0,totalNum):
if int(table.cat[i])!=int(Y_validated[i]):
errorCount += 1
print "Data have been validated! Precision={}".format((totalNum-errorCount)/float(totalNum))
示例5: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class ScikitNB:
def __init__(self, train_file, tags_file, tag_start, tag_end):
self.sf = ScikitFeature(train_file, tags_file, tag_start, tag_end, max_features=10000)
print "done getting features"
self.classifier = MultinomialNB()
self.classifier.fit(self.sf.training_text, self.sf.training_labels_tuple)
print "done fitting"
def predict(self, text):
text_vector = self.sf.get_text_vector(text)
labels = self.classifier.predict(text_vector)
return self.sf.get_labels(labels)
def test(self, test_file):
test_matrix = self.sf.get_file_text(test_file)
predicted_labels = self.classifier.predict(test_matrix)
print predicted_labels
predicted_label_names = [self.sf.get_labels_from_id(label_ids) for label_ids in predicted_labels]
true_labels = self.sf.get_file_labels(test_file)
N_question = len(predicted_labels)
N_true_tags = 0.0
N_predict_tags = 0.0
N_correct = 0.0
F1 = []
for i in range(N_question):
N_true_tags += len(true_labels[i])
N_predict_tags += len(predicted_labels[i])
this_correct = 0.
for predict_label_id in predicted_labels[i]:
if (predict_label_id in true_labels[i]):
this_correct += 1
N_correct += this_correct
if this_correct == 0:
F1.append(0)
else:
p = this_correct / len(predicted_labels[i])
r = this_correct / len(true_labels[i])
F1.append(2*p*r/(p+r))
print N_correct,N_predict_tags,N_true_tags
p= N_correct / N_predict_tags
r= N_correct / N_true_tags
print "Precision: %f %%" % (p*100)
print "Recall: %f %%" % (r*100)
print "Mean F1: %f" % (np.average(F1))
def get_tags(self, test_file, output_file):
print "Getting tags for "+test_file
new_csv = open(output_file, 'w')
writer = csv.writer(new_csv, delimiter=',', quotechar='"')
test_matrix = self.sf.get_file_text(test_file)
predicted_labels = self.classifier.predict(test_matrix)
predicted_label_names = [self.sf.get_labels_from_id(label_ids) for label_ids in predicted_labels]
ids = self.sf.get_file_ids(test_file)
for i,id in enumerate(ids):
tags = " ".join(predicted_label_names[i])
writer.writerow([id, tags])
new_csv.close()
print "Done."
示例6: nb_class
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def nb_class():
nb_classifier = MultinomialNB(alpha=0.01)
nb_classifier.fit(X_train, y_train)
y_train_pred = nb_classifier.predict(X_train)
y_test_pred = nb_classifier.predict(X_test)
train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
y_test_pred = nb_classifier.predict(X_test)
test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
print 'multinomial naive bayes test accuracy:', test_accuracy
示例7: train_test
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def train_test(args):
# unpack arguments and make train/test data/label dicts/lists
train, test, features, classifier = args
# create tf idf spare matrix from training data
if features == 'tfidf':
fe = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_features=1290)
trainfe = fe.fit_transform(train['data'])
elif features == 'dict':
fe = CountVectorizer(tokenizer=tokenize, stop_words='english', binary=True)
trainfe = fe.fit_transform(train['data'])
elif features == 'lsa':
svd = TruncatedSVD(n_components=100, random_state=42)
fe = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_df=0.115, max_features=11500)
trainfe = svd.fit_transform(fe.fit_transform(train['data']))
elif features == 'rule':
hamfe = CountVectorizer(tokenizer=tokenize, stop_words='english', max_features=1150)
spamfe = CountVectorizer(tokenizer=tokenize, stop_words='english', max_features=1150)
hamfit = hamfe.fit_transform(train['data'].loc[train['labels'] == 0])
spamfit = spamfe.fit_transform(train['data'].loc[train['labels'] == 1])
# train multinomial nb classifier on training data
if classifier == 'mnb':
from sklearn.naive_bayes import MultinomialNB
clf = MultinomialNB().fit(trainfe, train['labels'])
elif classifier == 'gnb':
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB().fit(trainfe.toarray(), train['labels'])
elif classifier == 'svm':
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='squared_hinge', penalty='l2').fit(trainfe, train['labels'])
elif classifier == 'log':
from sklearn.linear_model import SGDClassifier
clf = SGDClassifier(loss='log', penalty='l2').fit(trainfe, train['labels'])
elif classifier == 'rule':
hamfeats = hamfe.transform(test['data'])
spamfeats = spamfe.transform(test['data'])
hyp = np.array(hamfeats.sum(axis=1) < spamfeats.sum(axis=1)).reshape(-1).T
# extract features from test data
if features == 'lsa':
feats = svd.transform(fe.transform(test['data']))
else:
feats = fe.transform(test['data'])
# use trained classifier to generate class predictions from test features
if classifier == 'gnb':
hyp = clf.predict(feats.toarray())
elif classifier == 'rule':
pass
else:
hyp = clf.predict(feats)
# compare predictions with test labels
score = np.mean(hyp == test['labels'])
return score
示例8: NaiveBayesClassifier
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class NaiveBayesClassifier(CrossDomainClassifier):
"""
Naive bayes classifier with tfidf
"""
def train(self, limit_data=None):
if not hasattr(self, 'reviews'):
print "No data loaded"
return
if limit_data is None:
limit_data = len(self.reviews)
X = self.get_bag_of_ngrams(self.reviews[:limit_data])
self.clf = MultinomialNB().fit(X, self.labels[:limit_data])
def __test(self, reviews, labels):
X_training_counts = self.count_vect.transform(reviews)
X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)
predicted = self.clf.predict(X_training_tfidf)
self.cm = confusion_matrix(labels, predicted)
return 1 - np.mean(predicted == labels)
def get_training_error(self):
return self.__test(self.reviews, self.labels)
def get_generalized_error(self):
return self.__test(self.test_reviews, self.test_labels)
def get_crossdomain_error(self):
return {'twitter': self.__test(self.twitter_items, self.twitter_labels),
'ebay': self.__test(self.ebay_items, self.ebay_labels)}
def __get_scores(self, reviews, labels):
X_training_counts = self.count_vect.transform(reviews)
X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)
predicted = self.clf.predict(X_training_tfidf)
self.cm = confusion_matrix(labels, predicted)
return precision_recall_fscore_support(labels, predicted, average='macro')
def get_scores_training(self):
return self.__get_scores(self.reviews, self.labels)
def get_scores_test(self):
return self.__get_scores(self.test_reviews, self.test_labels)
def get_scores_twitter(self):
return self.__get_scores(self.twitter_items, self.twitter_labels)
def get_scores_ebay(self):
return self.__get_scores(self.ebay_items, self.ebay_labels)
示例9: train_and_test
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def train_and_test(X_train, y_train, X_test, y_test):
print("Vectorizing features")
vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5)
X_train = vectorizer.fit_transform(X_train)
X_test = vectorizer.transform(X_test)
print("\nn_samples: %d, n_features: %d" % X_train.shape)
print("\nTrain Set Counts:")
counts = collections.Counter(y_train)
print("\nTotal: ", len(y_train))
print(counts)
print("\nTest Set Counts:")
counts = collections.Counter(y_test)
print("\nTotal: ", len(y_test))
print(counts)
print("\nTraining and Testing")
print("\n=====MULTINOMIAL NAIVE BAYES=====")
t0 = time()
classifier = MultinomialNB(alpha=.01)
classifier.fit(X_train, y_train)
predicted = classifier.predict(X_test)
acc_mnb, f1_mnb = print_metrics(y_test, predicted)
print("\n========SVM========")
t0 = time()
classifier = svm.LinearSVC(multi_class='ovr')
classifier.fit(X_train, y_train)
predicted = classifier.predict(X_test)
acc_svm, f1_svm = print_metrics(y_test, predicted)
print("\n=====RANDOM FOREST=====")
t0 = time()
classifier = RandomForestClassifier(n_estimators=400, n_jobs=10)
classifier.fit(X_train, y_train)
predicted = classifier.predict(X_test)
acc_rf, f1_rf = print_metrics(y_test, predicted)
return (acc_mnb, f1_mnb, acc_svm, f1_svm, acc_rf, f1_rf)
示例10: bayes_bench
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def bayes_bench():
data_file = "./data/dataset.pkl"
train_set, valid_set, test_set, word2id, pop2id, type2id = dataset.load_data(data_file)
train_set_x, train_set_y = train_set
train_set_pop_y, train_set_type_y, train_set_loc_y = train_set_y
valid_set_x, valid_set_y = valid_set
valid_set_pop_y, valid_set_type_y, valid_set_loc_y = valid_set_y
test_set_x, test_set_y = test_set
test_set_pop_y, test_set_type_y, test_set_loc_y = test_set_y
id2word = {v:k for k,v in word2id.items()}
word_train_set_x = [sen_dig2word(doc, id2word) for doc in train_set_x]
word_valid_set_x = [sen_dig2word(doc, id2word) for doc in valid_set_x]
word_test_set_x = [sen_dig2word(doc, id2word) for doc in test_set_x]
# construct the word count matrix
# construct the word count matrix
count_vect = CountVectorizer()
x_train_count = count_vect.fit_transform(word_train_set_x)
x_valid_count = count_vect.transform(word_valid_set_x)
x_test_count = count_vect.transform(word_test_set_x)
tfidf_transformer = TfidfTransformer()
x_train_tfidf = tfidf_transformer.fit_transform(x_train_count)
x_valid_tfidf = tfidf_transformer.transform(x_valid_count)
x_test_tfidf = tfidf_transformer.transform(x_test_count)
# train the pop model
pop_clf = MultinomialNB().fit(x_train_tfidf, train_set_pop_y)
pop_pred = pop_clf.predict(x_valid_tfidf)
pop_pred_test = pop_clf.predict(x_test_tfidf)
# compute the performance
pop_errors = np.mean(np.not_equal(pop_pred, valid_set_pop_y))
pop_errors_test = np.mean(np.not_equal(pop_pred_test, test_set_pop_y))
# train the event type model
type_clf = MultinomialNB().fit(x_train_tfidf, train_set_type_y)
type_pred = type_clf.predict(x_valid_tfidf)
type_pred_test = type_clf.predict(x_test_tfidf)
# compute the performance
type_errors = np.mean(np.not_equal(type_pred, valid_set_type_y))
type_errors_test = np.mean(np.not_equal(type_pred_test, test_set_type_y))
print "MB--> Type error: %0.2f, Popuation error: %0.2f" % (type_errors, pop_errors)
print "MB--> Type error: %0.2f, Popuation error: %0.2f" % (type_errors_test, pop_errors_test)
示例11: test_mnnb
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def test_mnnb(kind):
# Test Multinomial Naive Bayes classification.
# This checks that MultinomialNB implements fit and predict and returns
# correct values for a simple toy dataset.
if kind == 'dense':
X = X2
elif kind == 'sparse':
X = scipy.sparse.csr_matrix(X2)
# Check the ability to predict the learning set.
clf = MultinomialNB()
assert_raises(ValueError, clf.fit, -X, y2)
y_pred = clf.fit(X, y2).predict(X)
assert_array_equal(y_pred, y2)
# Verify that np.log(clf.predict_proba(X)) gives the same results as
# clf.predict_log_proba(X)
y_pred_proba = clf.predict_proba(X)
y_pred_log_proba = clf.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
# Check that incremental fitting yields the same results
clf2 = MultinomialNB()
clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
clf2.partial_fit(X[2:5], y2[2:5])
clf2.partial_fit(X[5:], y2[5:])
y_pred2 = clf2.predict(X)
assert_array_equal(y_pred2, y2)
y_pred_proba2 = clf2.predict_proba(X)
y_pred_log_proba2 = clf2.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
assert_array_almost_equal(y_pred_proba2, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
# Partial fit on the whole data at once should be the same as fit too
clf3 = MultinomialNB()
clf3.partial_fit(X, y2, classes=np.unique(y2))
y_pred3 = clf3.predict(X)
assert_array_equal(y_pred3, y2)
y_pred_proba3 = clf3.predict_proba(X)
y_pred_log_proba3 = clf3.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
assert_array_almost_equal(y_pred_proba3, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
示例12: main
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def main():
# extract reviews from tsv files
labeled_training_data = pd.read_csv("labeledTrainData.tsv", header=0, delimiter="\t", quoting=3) # 25,000 reviews
test_data = pd.read_csv("testData.tsv", header=0, delimiter="\t", quoting=3) # 25, 000 reviews
print "Creating BOW...."" "
vectorizer = CountVectorizer(analyzer = "word", tokenizer = None, preprocessor = None, stop_words = None, max_features = 5000)
trained_data_features = vectorizer.fit_transform(review_list)
trained_data_features = trained_data_features.toarray() # convert to numpy array for faster processing
print "Supervised Learning - Naive Bayes"
nb_model = MultinomialNB(alpha = 0.01)
nb_model = nb_model.fit(trained_data_features, labeled_training_data["sentiment"]) # using BOW as feaures and the given labels as repsonse variables
print "---------------------------------"
print " "
print "Predicting on test data: "
# BOW for test set
test_data_features = vectorizer.transform(test_review_list)
test_data_features = test_data_features.toarray()
# use the trained forest to make predictions
predictions = nb_model.predict(test_data_features)
# prepare output submission file
prediction_output = pd.DataFrame( data = {"id":test_data["id"], "sentiment":predictions} ) # create pandas dataframe
prediction_output.to_csv("BOW_NB.csv", index=False, quoting=3)# write to csv file
joblib.dump(vectorizer, 'bow_model.pkl')
joblib.dump(nb_model, 'nb_bow_model.pkl')
示例13: classify_reviews
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def classify_reviews():
import featurizer
import gen_training_data
import numpy as np
from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import SGDClassifier
data = gen_training_data.gen_data();
stemmed_data = featurizer.stem(data);
tfidf= featurizer.tfidf(data);
clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
predicted = clf.predict(tfidf['test_tfidf']);
num_wrong = 0;
tot = 0;
for expected, guessed in zip(data['testing_labels'], predicted):
if(expected-guessed != 0):
num_wrong += 1;
print("num_wrong: %d",num_wrong)
sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
print np.mean(sgd_pred == data['testing_labels']);
stem_tfidf = featurizer.tfidf(stemmed_data);
_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
print np.mean(sgd_stem_prd==data['testing_labels']);
示例14: main
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def main(clf):
#print 'getting train'
train = pd.read_csv('dat/trainMN.tsv',sep = '\t')
#print 'getting test'
test = pd.read_csv('dat/devMN.tsv', sep = '\t')
global all_words
all_words = word_to_set(train['Phrase'], trim=20, is_raw=True)
#print 'creating x dict vectors from train'
train_x = train['Phrase']
#print 'extracting...'
train_x = use_feature_dicts(train_x)
# print train_x
#print 'creating train y'
train_y = [int(y) for y in train['Sentiment']]
if clf == 'NB':
classifier = MultinomialNB().fit(train_x, train_y)
elif clf == 'RF':
classifier = RandomForestClassifier().fit(train_x, train_y)
elif clf == 'LG':
classifier = linear_model.LinearRegression()
classifier = classifier.fit(train_x, train_y)
elif clf == 'SGD':
classifier = SGDClassifier().fit(train_x, train_y)
#print 'testing'
test_x = use_feature_dicts(test['Phrase'])
for i in classifier.predict(test_x):
print i
title = clf + '.pickle'
pickle.dump(classifier, open(title, 'w'))
示例15: naive_bayes
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def naive_bayes():
nb = MultinomialNB()
nb.fit(X_train, train_data.danger)
nb_pred = nb.predict(X_test)
nb_score = nb.score(X_test, y_test)
precision, recall, _, _ = precision_recall_fscore_support(y_test, nb_pred)
return precision, recall, str(nb_score)