本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.fit方法的具体用法?Python MultinomialNB.fit怎么用?Python MultinomialNB.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes.MultinomialNB
的用法示例。
在下文中一共展示了MultinomialNB.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classifier
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def classifier():
nb = MultinomialNB(alpha=0)
nb.fit(DOC_TRAIN, CLASS_TRAIN)
db = DB()
query = 'select cate_id, tf, url, content from site_content_3'
cursor = db.cursor()
logger.info(query)
cursor.execute(query)
rows = cursor.fetchall()
for row in rows:
currentCateId = row['cate_id']
print 'rowID => ', row['cate_id'];
url = row['url']
tf = row['tf']
content = row['content']
termFrequencyDict = {}
# continue
try:
termFrequencyDict = json.loads(tf)
except:
print 'error => ', url
continue
testItem = np.array([])
for word in termFrequencyDict:
tf = termFrequencyDict[word]
if WORDS.has_key(word):
testItem = np.append([tf])
else:
testItem = np.append([0])
print "CURRENT CATE ", currentCateId
print "NEW ", nb.predict(testItem)
示例2: MultinomialNBClassify_Proba
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def MultinomialNBClassify_Proba(enrollment_id, trainData, trainLabel, testData):
nbClf = MultinomialNB() # default alpha=1.0, Laplace smoothing
# settinf alpha < 1 is called Lidstone smoothing
nbClf.fit(trainData, ravel(trainLabel))
testLabel = nbClf.predict_proba(testData)[:,1]
saveResult(enrollment_id, testLabel, 'Proba_sklearn_MultinomialNB_alpha=0.1_Result.csv')
return testLabel
示例3: crossValidate
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def crossValidate(X_dataset,y):
#cross validate model
num_folds = 5
kfold = cross_validation.StratifiedKFold(y, n_folds=num_folds, shuffle=True)
# kfold=KFold(X.shape[0],n_folds=10, shuffle=True)
avg_accuracy=0
avg_precision=0
avg_recall=0
print "----------- cross_validation k=5"
for train,test in kfold:
Xtrain,Xtest,ytrain,ytest=X_dataset[train],X_dataset[test],y[train],y[test]
# clf=LinearSVC()
clf=MultinomialNB(alpha=0.1)
# clf=LDA()
clf.fit(Xtrain.toarray(),ytrain)
ypred=clf.predict(Xtest.toarray())
accuracy=metrics.accuracy_score(ytest,ypred)
# print "accuracy = ", accuracy
avg_accuracy+=accuracy
precision = metrics.precision_score(ytest,ypred)
# print("precision: %0.3f" % precision)
avg_precision+=precision
recall = metrics.recall_score(ytest,ypred)
# print("recall: %0.3f" % recall)
avg_recall+=recall
print "Average accuracy : " , (avg_accuracy/num_folds)
print "Average precision : " , (avg_precision/num_folds)
print "Average recall : " , (avg_recall/num_folds)
示例4: run_naivebayes_evaluation
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def run_naivebayes_evaluation(self, inputdata, outputdata, k):
""" Fit Naive Bayes Classification on train set with cross validation.
Run Naive Bayes Classificaiton on test set. Return results
"""
###print "** Fitting Naive Bayes classifier.."
# Cross validation
cv = cross_validation.KFold(inputdata.shape[0], n_folds=k, indices=True)
cv_naivebayes = []
f1_scores = []
for traincv, testcv in cv:
clf_cv = MultinomialNB()
clf_cv.fit(inputdata[traincv], outputdata[traincv])
y_pred_cv = clf_cv.predict(inputdata[testcv])
f1 = metrics.f1_score(outputdata[testcv], y_pred_cv, pos_label=0)
f1_scores.append(f1)
#TODO: NEEDED? self.classifier = clf_cv
print "score average: %s" + str(np.mean(f1_scores))
average_score =np.mean(f1_scores)
tuples = (average_score, f1_scores)
return (tuples, 'N.A.', 'N.A.')
示例5: train
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train(good_sources, bad_sources,method,naive_bayes=None,keywords=list()):
#train the algorithm
good_samples = find_keywords(' '.join([entry[method] for entry in good_sources]))
bad_samples = find_keywords(' '.join([entry[method] for entry in bad_sources]))
#if we have an exists knowledge base to append this new information to, do so
if naive_bayes:
new_kws = set(good_samples+bad_samples)
print('Using old keywords as well')
print("# old keywords = {}\n # new keywords = {}".format(len(keywords),len(new_kws)))
new_kws = set(good_samples+bad_samples).difference(keywords)
print("# fresh keywords = {}\n".format(len(new_kws)))
#make some call to naive_bayes.partial_fssit in here
X = np.concatenate((naive_bayes.feature_count_, np.zeros((naive_bayes.feature_count_.shape[0],len(new_kws)))),1)
all_kw = keywords + list(new_kws)
else:
print('Only using keywords from this content set')
all_kw = list(set(good_samples+bad_samples))
X = np.zeros((2,len(all_kw)))
for j,kw in enumerate(all_kw):
X[0,j] += good_samples.count(kw)
X[1,j] += bad_samples.count(kw)
y = ['good','bad']
naive_bayes = MultinomialNB()
naive_bayes.fit(X,y)
return naive_bayes, all_kw
示例6: run_learning_curves_experiment
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def run_learning_curves_experiment(dataset):
logger.info("Now starting experiment with learning curves...")
scores = []
sklearn_scores = []
train_sizes = []
clf = MultinomialBayesEstimator()
sklearn_clf = MultinomialNB()
# Constructing confidence intervals using empiric bootstrap
intervals = []
for test_size in xrange(1, len(dataset)):
f_scores = []
f_scores_sklearn = []
for train_set, test_set in split_train_test_p_out(dataset, test_size):
train_set, test_set = split_train_test(dataset, test_size)
X_train, y_train, X_test, y_test = make_test_train(train_set, test_set)
clf.fit(X_train, y_train)
f_scores.append(f1_score(y_test, clf.predict(X_test)))
sklearn_clf.fit(X_train, y_train.ravel())
f_scores_sklearn.append(f1_score(y_test, sklearn_clf.predict(X_test)))
intervals.append(calculate_confidence_interval(f_scores))
scores.append(np.mean(f_scores))
sklearn_scores.append(np.mean(f_scores_sklearn))
train_sizes.append(len(dataset) - test_size)
plot_learning_curves(train_sizes, sklearn_scores, scores, intervals)
示例7: predict
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def predict(cur, plyr_id, game_plyrs):
#creates training set (called 'X') for plyr
all_plyrs = all_player_ids(cur) #np.array - all NFL players (and coaches)
games = games_played_in(cur, plyr_id) #np.array - the games_ids the player played in
n_cols = all_plyrs.shape[0] #int
m_rows = games.shape[0] #int
w = weights(games)
zeros = np.zeros((m_rows, n_cols)) #2darr - used to initialize DF
X = pd.DataFrame(zeros, index=games, columns=all_plyrs) #dataframe
populate_training_set(cur, X, games, plyr_id)
#print "X: ", X.values
###run coaches_model and then im here###
#creates vector of known output values
Y = training_output_vector(cur, games, plyr_id) #good
#print "(len) Y: ", len(Y), Y
test_zeros = np.zeros((1, n_cols)) #2darr - used to initialize DF
test_X = pd.DataFrame(zeros, columns=all_plyrs) #dataframe
update_training_matrix(cur, game_plyrs, 0, test_X)
#run Bernoulli NB Classifier
nb_clf = MultinomialNB()
if len(X.values) == 0:
return 0
nb_clf.fit(X, Y, sample_weight=w)
nb_predictions = nb_clf.predict(test_X)
#print "test_X: ", test_X.values
nb_norm_prob = normalize_probs(nb_clf.predict_proba(test_X)[0])
avgs = [3,8,12.5,17,21,25]
#print "probs: ", nb_norm_prob
#print avgs
ev = expected_val(nb_norm_prob, avgs) #can also calc dot product
return round(ev,1)
示例8: train
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train(self):
'''
## -- How to predict -- ##
query = "blah blah"
q = list2vec(hashit(q))
clf2 = joblib.load('nb')
print(clf2.predict(q)) # <--- returns type id
'''
limit = self.comment_limit
sqls = ["SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=1 ORDER BY time DESC LIMIT " + str(limit),
"SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=2 ORDER BY time DESC LIMIT " + str(limit),
"SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=3 ORDER BY time DESC LIMIT " + str(limit)]
print "training model"
comments = self.sql2list(sqls)
x, y = self.featureMatrix(comments)
X = list2Vec(x)
Y = list2Vec(y)
q = "Let's talk about food."
q_vec = list2Vec(hashit(q))
## Precicting
print "Classifying"
clf = MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
clf.fit(X, Y)
joblib.dump(clf, self.path, compress=9)
示例9: multinomialNB
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def multinomialNB(devMatrix, trainMatrix, devtarget, traintarget):
f = open('MNNB2.log', 'a')
f.write("Making model!!!!!")
print 'Making model!'
clf = MultinomialNB(alpha=1, fit_prior=False)
clf.fit(trainMatrix, traintarget)
f.write("\n")
value = ('Model: multinomial bayes with parameters ',clf.get_params(False))
print (str(value))
f.write(str(value))
f.write("\n")
f.write("MSE for train: %.2f" % np.mean((clf.predict(trainMatrix) - traintarget) ** 2))
score = clf.score(trainMatrix, traintarget)
f.write("\n")
value = ('Score for train %.2f', score)
f.write("\n")
f.write("MSE for dev: %.2f" % np.mean((clf.predict(devMatrix) - devtarget) ** 2))
score = clf.score(devMatrix, devtarget)
value = ('Score for dev %.2f', score)
print(str(value))
f.write("\n")
s = str(value)
f.write(s)
f.write("\n")
f.write('model done')
f.write("\n")
f.write("\n")
f.close()
return score
示例10: train_classifiers
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train_classifiers(X_data, y_data):
############ Linear SVM: 0.908 #############
clf_LSVM = svm.SVC(kernel = 'linear')
clf_LSVM.fit(X_data, y_data)
############ MultinomialNB: 0.875 #############
clf_MNB = MultinomialNB()
clf_MNB.fit(X_data, y_data)
############ Random Forest: 0.910 #############
clf_RF = RandomForestClassifier(n_estimators=200, criterion='entropy')
clf_RF.fit(X_data, y_data)
############ Extra Tree: 0.915 ##################
clf_ETC = ExtraTreesClassifier(n_estimators=500, max_depth=None, min_samples_split=1, random_state=0)
clf_ETC.fit(X_data, y_data)
############ AdaBoost: 0.88 ##################
clf_Ada = AdaBoostClassifier()
clf_Ada.fit(X_data, y_data)
############ rbf SVM: 0.895 #############
clf_rbf = svm.SVC(C=200, gamma=0.06, kernel='rbf')
clf_rbf.fit(X_data, y_data)
############ GradientBoosting: 0.88 #############
clf_GBC = GradientBoostingClassifier()
clf_GBC.fit(X_data, y_data)
return clf_LSVM, clf_MNB, clf_RF, clf_ETC, clf_Ada, clf_rbf, clf_GBC
示例11: naive_bayes
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def naive_bayes(x_value, y_value):
X = x_value
y = y_value
#train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 123)
vect = CountVectorizer()
vect.fit(X_train)
X_train_dtm = vect.transform(X_train)
X_test_dtm = vect.transform(X_test)
from sklearn.naive_bayes import MultinomialNB
nb = MultinomialNB()
nb.fit(X_train_dtm, y_train)
y_pred_class = nb.predict(X_test_dtm)
print 'Accuracy: '
print metrics.accuracy_score(y_test, y_pred_class)
print 'Null Accuracy: '
print y_test.value_counts().head(1) / len(y_test)
print 'Confusion Matrix: '
print metrics.confusion_matrix(y_test, y_pred_class)
示例12: trainNB
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def trainNB(xTrain, yTrain):
classifier = MultinomialNB()
classifier.fit(xTrain, yTrain)
return classifier
示例13: nb
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def nb(x_train,x_test,y_train,doc_app_id,id_name_dict):
clf = MultinomialNB(alpha=0.01)
clf.fit(x_train,y_train)
pred = clf.predict(x_test)
for i in range(len(pred)):
app_id = doc_app_id[i]
print id_name_dict[app_id]+" "+str(pred[i])
示例14: do_lda
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def do_lda(x, y, folds):
indexes = list(range(len(x)))
shuffle(indexes)
x = list(x[i] for i in indexes)
y = list(y[i] for i in indexes)
fold_size = len(x) / folds
corrects = []
for fold in range(folds):
test_x = []
train_x = []
test_y = []
train_y = []
for i in range(len(x)):
fold_index = i / fold_size
if fold == fold_index:
test_x.append(x[i])
test_y.append(y[i])
else:
train_x.append(x[i])
train_y.append(y[i])
print 'Partitioned data into fold'
test_x, train_x = remove_redundant_dimensions(test_x, train_x)
print 'Removed redundant dimensions'
nb = MultinomialNB()
nb.fit(train_x, train_y)
print 'Fit NB'
predictions = nb.predict(test_x)
# lda = LDA()
# lda.fit(train_x, train_y)
# print 'Fit lda'
# predictions = lda.predict(test_x)
correct = sum(1 for i in range(len(predictions)) if predictions[i] == test_y[i])
print 'Did fold, correct:', correct
corrects.append(correct)
return corrects
示例15: train
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train(self, data):
nb = MultinomialNB()
launches = map(lambda x: x['application'], data)
instances = map(lambda i: {'lu1': launches[i-1]}, xrange(1, len(launches)))
X = self.vectorizer.fit_transform(instances).toarray()
y = launches[1:]
self.lu1_predictor = nb.fit(X, y)
instances = map(lambda i: {'lu2': launches[i-2]}, xrange(2, len(launches)))
X = self.vectorizer.fit_transform(instances).toarray()
y = launches[2:]
self.lu2_predictor = nb.fit(X, y)
# tune mu
max_hr = 0
best_mu = 0
for mu in map(lambda x: x/10.0, xrange(11)):
self.mu = mu
predictions = map(lambda i: self.predict({'lu1': launches[i-1], 'lu2': launches[i-2]}), \
xrange(2, len(launches)))
hr, mrr = self.test(launches[2:], predictions)
if hr > max_hr:
max_hr = hr
best_mu = mu
self.mu = best_mu