本文整理汇总了Python中sklearn.naive_bayes.GaussianNB.fit方法的典型用法代码示例。如果您正苦于以下问题:Python GaussianNB.fit方法的具体用法?Python GaussianNB.fit怎么用?Python GaussianNB.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes.GaussianNB
的用法示例。
在下文中一共展示了GaussianNB.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def main():
"""
主函数
"""
# 准备数据集
train_data, test_data = utils.prepare_data()
# 查看数据集
utils.inspect_dataset(train_data, test_data)
# 特征工程处理
# 构建训练测试数据
X_train, X_test = utils.do_feature_engineering(train_data, test_data)
print('共有{}维特征。'.format(X_train.shape[1]))
# 标签处理
y_train = train_data['label'].values
y_test = test_data['label'].values
# 数据建模及验证
print('\n===================== 数据建模及验证 =====================')
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)
print('准确率:', accuracy_score(y_test, y_pred))
print('AUC值:', roc_auc_score(y_test, y_pred))
示例2: categorize
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def categorize(train_data,test_data,train_class,n_features):
#cf= ExtraTreesClassifier()
#cf.fit(train_data,train_class)
#print (cf.feature_importances_)
#lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0)
model = LogisticRegression()
lgr = LogisticRegression(C=100.0,penalty='l1')
#knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None)
svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01, probability=True)#2
svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01, probability=True)#2
cf = DecisionTreeClassifier()
dct = DecisionTreeClassifier(criterion='gini', splitter='best', min_samples_split=7, min_samples_leaf=4)
rf = RandomForestClassifier(n_estimators=10, criterion='gini', min_samples_split=7, min_samples_leaf=4, max_features='auto')
gnb = GaussianNB() #1
adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True)
#ch2 = SelectKBest(chi2, k=n_features)
#train_data = ch2.fit_transform(train_data, train_class)
#test_data = ch2.transform(test_data)
#rfe = RFE(svmlcf,n_features)
#rfe = rfe.fit(train_data, train_class)
gnb.fit(train_data,train_class)
return gnb.predict(test_data)
示例3: scikitNBClassfier
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def scikitNBClassfier(self):
dataMat, labels = self.loadProcessedData()
bayesian = Bayesian()
myVocabList = bayesian.createVocabList(dataMat)
## 建立bag of words 矩阵
trainMat = []
for postinDoc in dataMat:
trainMat.append(bayesian.setOfWords2Vec(myVocabList, postinDoc))
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
X = array(trainMat)
y = labels
testText = "美国军队的军舰今天访问了巴西港口城市,并首次展示了核潜艇攻击能力,飞机,监听。他们表演了足球。"
testEntry = self.testEntryProcess(testText)
bayesian = Bayesian()
thisDoc = array(bayesian.setOfWords2Vec(myVocabList, testEntry))
## 拟合并预测
y_pred = gnb.fit(X, y).predict(thisDoc)
clabels = ['军事', '体育']
y_pred = gnb.fit(X, y).predict(X)
print("Number of mislabeled points : %d" % (labels != y_pred).sum())
示例4: NBAccuracy
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def NBAccuracy(features_train, labels_train, features_test, labels_test):
""" compute the accuracy of your Naive Bayes classifier """
### import the sklearn module for GaussianNB
from sklearn.naive_bayes import GaussianNB
### create classifier
clf = GaussianNB()
t0 = time()
### fit the classifier on the training features and labels
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"
### use the trained classifier to predict labels for the test features
import numpy as np
t1 = time()
pred = clf.predict(features_test)
print "predicting time:", round(time()-t1, 3), "s"
### calculate and return the accuracy on the test data
### this is slightly different than the example,
### where we just print the accuracy
### you might need to import an sklearn module
accuracy = clf.score(features_test, labels_test)
return accuracy
示例5: NB_experiment
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def NB_experiment(data_fold, train, test, dumper):
print "Ready to find the Best Parameters for Naive Bayes"
print 'Gaussian Naive Bayes'
nb = GNB()
print "fitting NaiveBayes Experiment"
dumper.write('Classifier: Naive Bayes\n')
scores = cross_validation.cross_val_score(nb, train[0], train[1],
cv = data_fold, score_func=accus)
reports = "Accuracy on Train: %0.2f (+/- %0.2f)"%(scores.mean(), scores.std()/2)
print reports
dumper.write(reports+'\n')
reports = " ".join(['%0.2f'%(item) for item in scores])
dumper.write(reports+'\n')
nb = GNB()
nb.fit(train[0], train[1])
pred = clf_test(nb, test)
output_ranking(pred, codecs.open('nb.ranking', 'w', 'utf-8'))
return None
示例6: __init__
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
class GaussianNBClassifier:
def __init__(self):
"""
This is the constructor responsible for initializing the classifier
"""
self.outputHeader = "#gnb"
self.clf = None
def buildModel(self):
"""
This builds the model of the Gaussian NB classifier
"""
self.clf = GaussianNB()
def trainGaussianNB(self,X, Y):
"""
Training the Gaussian NB Classifier
"""
self.clf.fit(X, Y)
def validateGaussianNB(self,X, Y):
"""
Validate the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
def testGaussianNB(self,X, Y):
"""
Test the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
示例7: performNB
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def performNB(trainingScores, trainingResults, testScores):
print "->Gaussian NB"
X = []
for currMark in trainingScores:
pass
for idx in range(0, len(trainingScores[currMark])):
X.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
print currMark,
for idx in range(0, len(trainingScores[currMark])):
X[idx].append(trainingScores[currMark][idx])
X_test = []
for idx in range(0, len(testScores[currMark])):
X_test.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
for idx in range(0, len(testScores[currMark])):
X_test[idx].append(testScores[currMark][idx])
gnb = GaussianNB()
gnb.fit(X, np.array(trainingResults))
y_pred = gnb.predict_proba(X_test)[:, 1]
print "->Gaussian NB"
return y_pred
示例8: __init__
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
class RegularizedGaussianNB:
"""
Three types of regularization are possible:
- regularized the variance of a feature within a class toward the
average variance of all features from that class
- regularize the variance of a feature within a class toward its
pooled variance across all classes
- add some constant amount of variance to each feature
In practice, the latter seems to work the best, though the regularization
value should be cross-validated.
"""
def __init__(self, avg_weight = 0, pooled_weight = 0, extra_variance = 0.1):
self.pooled_weight = pooled_weight
self.avg_weight = avg_weight
self.extra_variance = extra_variance
self.model = GaussianNB()
def fit(self, X,Y):
self.model.fit(X,Y)
p = self.pooled_weight
a = self.avg_weight
ev = self.extra_variance
original_weight = 1.0 - p - a
pooled_variances = np.var(X, 0)
for i in xrange(self.model.sigma_.shape[0]):
class_variances = self.model.sigma_[i, :]
new_variances = original_weight*class_variances + \
p * pooled_variances + \
a * np.mean(class_variances) + \
ev
self.model.sigma_[i, :] = new_variances
def predict(self, X):
return self.model.predict(X)
示例9: createNaiveBayesModel
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def createNaiveBayesModel(feature_vector_data):
'''
Uses the dimensionally reduced feature vectors of each of the instance, sense id pairs
to create a naive bayes model
'''
naive_bayes_model_word_type = {}
for word_type, instance_sense_dict in feature_vector_data.iteritems():
vectors = []
senses = []
for i in xrange(len(instance_sense_dict)):
sense = instance_sense_dict.keys()[i][1]
data_type = instance_sense_dict.keys()[i][2]
#Need to grab the TSNE vectors and senses of only the training data
#Thus, we ignore all the validation data
if data_type == "training":
vectors.append(instance_sense_dict.values()[i])
senses.append(sense)
vectors = np.array(vectors)
senses = np.array(senses)
nb = GaussianNB()
nb.fit(vectors, senses)
naive_bayes_model_word_type[word_type] = nb
return naive_bayes_model_word_type
示例10: test_classification
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def test_classification():
t = zeros(len(target))
t[target == 'setosa'] = 1
t[target == 'versicolor'] = 2
t[target == 'virginica'] = 3
from sklearn.naive_bayes import GaussianNB
classifier = GaussianNB()
classifier.fit(data,t) # training on the iris dataset
print classifier.predict(data[0])
print t[0]
from sklearn import cross_validation
train, test, t_train, t_test = cross_validation.train_test_split(data, t, test_size=0.4, random_state=0)
classifier.fit(train,t_train) # train
print classifier.score(test,t_test) # test
from sklearn.metrics import confusion_matrix
print confusion_matrix(classifier.predict(test),t_test)
from sklearn.metrics import classification_report
print classification_report(classifier.predict(test), t_test, target_names=['setosa', 'versicolor', 'virginica'])
from sklearn.cross_validation import cross_val_score
# cross validation with 6 iterations
scores = cross_val_score(classifier, data, t, cv=6)
print scores
from numpy import mean
print mean(scores)
示例11: simple_svm_train
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def simple_svm_train(emotion, training_set):
song_list = []
sizes_list = []
other_emotions = []
# print 'Start to sample set'
# Setting up the data
sampled_dict = create_sample_dict(training_set)
# print 'Set sampled, extracting features'
feature_vector, class_vector, test_values, test_class = extract_features(sampled_dict, emotion, training_set)
# Creating the classifier using sklearn
# print 'Extracted features, training classifier'
clf = GaussianNB()
clf.fit(feature_vector,class_vector)
# clf = svm.SVC(max_iter = 10000)
# clf.fit(feature_vector,class_vector)
# print 'Finished training classifier'
# Testing and analyzing results
results = test_classifier(clf, emotion, test_values)
return post_process_results(results, emotion)
示例12: myClassifier
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def myClassifier(X,Y,model,CV=4, scoreType='pure'):
# X = [[0, 0], [1, 1],[1, 2]]
# y = [0, 1, 2]
score = {}
print "Error Analysis using", scoreType
if model == "SVM":
clf = svm.SVC(probability=True, random_state=0, kernel='rbf')
#clf = svm.SVR(cache_size=7000)
elif model == "LR":
clf = linear_model.LogisticRegression()
clf.fit(X, Y)
elif model == "NB":
clf = GaussianNB()
clf.fit(X, Y)
elif model=='MLP': # multilayer perceptron
clf = MLPClassifier( hidden_layer_sizes=[100],algorithm='l-bfgs')
clf.fit(X, Y)
if scoreType == 'cv':
accu = np.mean(cross_validation.cross_val_score(clf, X, Y, scoring='accuracy',cv=CV))
elif scoreType == 'pure':
predictions=clf.predict(X)
accu = sum([int(predictions[q]==Y[q]) for q in range(len(Y))])/len(Y)
return accu, clf
示例13: MyNaiveBayes
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def MyNaiveBayes(object):
pre = PreProcess()
(training_value, test_value, test_pos_x, test_pos_y, training_pos_x, training_pos_y) = pre.split()
# 模型初始化
clf_x = GaussianNB()
clf_y = GaussianNB()
# 进行模型的训练
clf_x.fit(training_value, training_pos_x)
clf_y.fit(training_value, training_pos_y)
# 计算结果
result_pos_x = clf_x.predict(test_value)
result_pos_y = clf_y.predict(test_value)
'''
print result_pos_x
print test_pos_x
print result_pos_y
print test_pos_y
'''
# 计算误差
x_dis = []
y_dis = []
d_dis = []
for i in range(len(result_pos_x)):
x_dis.append(abs(result_pos_x[i] - test_pos_x[i]))
y_dis.append(abs(result_pos_y[i] - test_pos_y[i]))
d_dis.append(math.sqrt((result_pos_x[i]-test_pos_x[i])**2+(result_pos_y[i]-test_pos_y[i])**2))
x = (sum(x_dis))/len(result_pos_x)
y = (sum(y_dis))/len(result_pos_y)
d = (sum(d_dis))/len(d_dis)
print x, y, d
return x, y, d
示例14: boundaries
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def boundaries():
# import some data to play with
iris = datasets.load_iris()
X = iris.data[:, :2]
y = iris.target
h = .02
means = np.empty((X.shape[1], len(set(y))))
for i,lab in enumerate(list(set(y))):
means[:,i] = X[y==lab].mean(axis=0)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
nb = GaussianNB()
nb.fit(X, y)
Z = nb.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
plt.scatter(means[0,:], means[1,:])
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())
plt.savefig("decision_boundary.pdf")
plt.clf()
示例15: naive_bayes
# 需要导入模块: from sklearn.naive_bayes import GaussianNB [as 别名]
# 或者: from sklearn.naive_bayes.GaussianNB import fit [as 别名]
def naive_bayes(features, labels):
classifier = GaussianNB()
classifier.fit(features, labels)
scores = cross_validation.cross_val_score(
classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
)
print_table("Naive Bayes", numpy.around(numpy.mean(scores, axis=0), 2))