本文整理汇总了Python中sklearn.naive_bayes.GaussianNB类的典型用法代码示例。如果您正苦于以下问题:Python GaussianNB类的具体用法?Python GaussianNB怎么用?Python GaussianNB使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了GaussianNB类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: selectKBest
def selectKBest(previous_result, data):
# remove 'restricted_stock_deferred' and 'director_fees'
previous_result.pop(4)
previous_result.pop(4)
result = []
_k = 10
for k in range(0,_k):
feature_list = ['poi']
for n in range(0,k+1):
feature_list.append(previous_result[n][0])
data = featureFormat(my_dataset, feature_list, sort_keys = True, remove_all_zeroes = False)
labels, features = targetFeatureSplit(data)
features = [abs(x) for x in features]
from sklearn.cross_validation import StratifiedShuffleSplit
cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
features_train = []
features_test = []
labels_train = []
labels_test = []
for train_idx, test_idx in cv:
for ii in train_idx:
features_train.append( features[ii] )
labels_train.append( labels[ii] )
for jj in test_idx:
features_test.append( features[jj] )
labels_test.append( labels[jj] )
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)
score = score_func(labels_test,predictions)
result.append((k+1,score[0],score[1],score[2]))
return result
示例2: test_gnb_sample_weight
def test_gnb_sample_weight():
"""Test whether sample weights are properly used in GNB. """
# Sample weights all being 1 should not change results
sw = np.ones(6)
clf = GaussianNB().fit(X, y)
clf_sw = GaussianNB().fit(X, y, sw)
assert_array_almost_equal(clf.theta_, clf_sw.theta_)
assert_array_almost_equal(clf.sigma_, clf_sw.sigma_)
# Fitting twice with half sample-weights should result
# in same result as fitting once with full weights
sw = rng.rand(y.shape[0])
clf1 = GaussianNB().fit(X, y, sample_weight=sw)
clf2 = GaussianNB().partial_fit(X, y, classes=[1, 2], sample_weight=sw / 2)
clf2.partial_fit(X, y, sample_weight=sw / 2)
assert_array_almost_equal(clf1.theta_, clf2.theta_)
assert_array_almost_equal(clf1.sigma_, clf2.sigma_)
# Check that duplicate entries and correspondingly increased sample
# weights yield the same result
ind = rng.randint(0, X.shape[0], 20)
sample_weight = np.bincount(ind, minlength=X.shape[0])
clf_dupl = GaussianNB().fit(X[ind], y[ind])
clf_sw = GaussianNB().fit(X, y, sample_weight)
assert_array_almost_equal(clf_dupl.theta_, clf_sw.theta_)
assert_array_almost_equal(clf_dupl.sigma_, clf_sw.sigma_)
示例3: scikitNBClassfier
def scikitNBClassfier(self):
dataMat, labels = self.loadProcessedData()
bayesian = Bayesian()
myVocabList = bayesian.createVocabList(dataMat)
## 建立bag of words 矩阵
trainMat = []
for postinDoc in dataMat:
trainMat.append(bayesian.setOfWords2Vec(myVocabList, postinDoc))
from sklearn.naive_bayes import GaussianNB
gnb = GaussianNB()
X = array(trainMat)
y = labels
testText = "美国军队的军舰今天访问了巴西港口城市,并首次展示了核潜艇攻击能力,飞机,监听。他们表演了足球。"
testEntry = self.testEntryProcess(testText)
bayesian = Bayesian()
thisDoc = array(bayesian.setOfWords2Vec(myVocabList, testEntry))
## 拟合并预测
y_pred = gnb.fit(X, y).predict(thisDoc)
clabels = ['军事', '体育']
y_pred = gnb.fit(X, y).predict(X)
print("Number of mislabeled points : %d" % (labels != y_pred).sum())
示例4: categorize
def categorize(train_data,test_data,train_class,n_features):
#cf= ExtraTreesClassifier()
#cf.fit(train_data,train_class)
#print (cf.feature_importances_)
#lsvmcf = sklearn.svm.LinearSVC(penalty='l2', loss='l2', dual=True, tol=0.0001, C=100.0)
model = LogisticRegression()
lgr = LogisticRegression(C=100.0,penalty='l1')
#knn = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=10, p=2, metric='minkowski', metric_params=None)
svmlcf = sklearn.svm.SVC(C=1000.0, kernel='linear', degree=1, gamma=0.01, probability=True)#2
svmcf = sklearn.svm.SVC(C=1000.0, kernel='rbf', degree=1, gamma=0.01, probability=True)#2
cf = DecisionTreeClassifier()
dct = DecisionTreeClassifier(criterion='gini', splitter='best', min_samples_split=7, min_samples_leaf=4)
rf = RandomForestClassifier(n_estimators=10, criterion='gini', min_samples_split=7, min_samples_leaf=4, max_features='auto')
gnb = GaussianNB() #1
adbst = sklearn.ensemble.AdaBoostClassifier(base_estimator=rf, n_estimators=5, learning_rate=1.0, algorithm='SAMME.R', random_state=True)
#ch2 = SelectKBest(chi2, k=n_features)
#train_data = ch2.fit_transform(train_data, train_class)
#test_data = ch2.transform(test_data)
#rfe = RFE(svmlcf,n_features)
#rfe = rfe.fit(train_data, train_class)
gnb.fit(train_data,train_class)
return gnb.predict(test_data)
示例5: performNB
def performNB(trainingScores, trainingResults, testScores):
print "->Gaussian NB"
X = []
for currMark in trainingScores:
pass
for idx in range(0, len(trainingScores[currMark])):
X.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
print currMark,
for idx in range(0, len(trainingScores[currMark])):
X[idx].append(trainingScores[currMark][idx])
X_test = []
for idx in range(0, len(testScores[currMark])):
X_test.append([])
for currMark in trainingScores:
if "Asym" in currMark:
continue
for idx in range(0, len(testScores[currMark])):
X_test[idx].append(testScores[currMark][idx])
gnb = GaussianNB()
gnb.fit(X, np.array(trainingResults))
y_pred = gnb.predict_proba(X_test)[:, 1]
print "->Gaussian NB"
return y_pred
示例6: NBAccuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test):
""" compute the accuracy of your Naive Bayes classifier """
### import the sklearn module for GaussianNB
from sklearn.naive_bayes import GaussianNB
### create classifier
clf = GaussianNB()
t0 = time()
### fit the classifier on the training features and labels
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"
### use the trained classifier to predict labels for the test features
import numpy as np
t1 = time()
pred = clf.predict(features_test)
print "predicting time:", round(time()-t1, 3), "s"
### calculate and return the accuracy on the test data
### this is slightly different than the example,
### where we just print the accuracy
### you might need to import an sklearn module
accuracy = clf.score(features_test, labels_test)
return accuracy
示例7: classify
def classify(features_train, labels_train):
clf = GaussianNB()
clf.fit(features_train, labels_train)
### import the sklearn module for GaussianNB
### create classifier
### fit the classifier on the training features and labels
return clf
示例8: naive_bayes
def naive_bayes(features, labels):
classifier = GaussianNB()
classifier.fit(features, labels)
scores = cross_validation.cross_val_score(
classifier, features, labels, cv=10, score_func=metrics.precision_recall_fscore_support
)
print_table("Naive Bayes", numpy.around(numpy.mean(scores, axis=0), 2))
示例9: test_gnb_prior
def test_gnb_prior():
# Test whether class priors are properly set.
clf = GaussianNB().fit(X, y)
assert_array_almost_equal(np.array([3, 3]) / 6.0, clf.class_prior_, 8)
clf.fit(X1, y1)
# Check that the class priors sum to 1
assert_array_almost_equal(clf.class_prior_.sum(), 1)
示例10: nb_names
def nb_names():
#generate list of tuple names
engine = create_engine('sqlite:///names.db')
DBSession = sessionmaker(bind=engine)
session = DBSession()
db_names = names.Names.getAllNames(session)
names_list = [(x,'name') for x in db_names]
words_list = generate_words()
sample_names = [names_list[i] for i in sorted(random.sample(xrange(len(names_list)), len(words_list)))]
data = sample_names + words_list
shuffled_data = np.random.permutation(data)
strings = []
classification = []
for item in shuffled_data:
strings.append([item[0]])
classification.append(str(item[1]))
X = np.array(strings)
Y = np.array(classification)
print X,Y
clf = GaussianNB()
clf.fit(X, Y)
示例11: CruiseAlgorithm
class CruiseAlgorithm(object):
# cruise algorithm is used to classify the cruise phase vs noncruise phase, it uses the differential change in data stream as the input matrix
def __init__(self, testing=False):
self.core = GaussianNB()
self.scaler = RobustScaler()
self.X_prev = None
self.testing = testing
def fit(self,X,Y): # Y should be the label of cruise or not
X = self.prepare(X)
self.core.fit(X,Y.ravel())
def predict(self, X):
if self.testing:
X_t = self.prepare(X)
else:
if self.X_prev:
X_t = X - self.X_prev
else:
X_t = X
self.X_prev = X
print repr(X_t)
prediction_result = self.core.predict(X_t)
return np.asmatrix(prediction_result)
def prepare(self,X):
a = np.zeros((X.shape[0],X.shape[1]))
for i in xrange(X.shape[0]-1):
a[i+1,:] = X[i+1] - X[i]
return a
示例12: trainNB
def trainNB():
featureVector = []
classVector = []
temp= []
headerLine = True
#training
train = open(r'C:\Python34\alchemyapi_python\TrainingDataDummy.csv')
for line in train:
if(headerLine):
headerLine = False
else:
temp = line.split(",")
x = [float(temp[i]) for i in activeFeatureIndex]
#print(x)
featureVector.append(x)
#temp = [int(x) for x in line.split(",")[-1].rstrip("\n")]
classVector.append(int(line.split(",")[-1].rstrip("\n")))
fVector = np.array(featureVector)
cVector = np.array(classVector)
#print(classVector)
print(fVector.shape)
print(cVector.shape)
clf = GaussianNB()
clf.fit(fVector,cVector)
train.close()
return clf
示例13: univariateFeatureSelection
def univariateFeatureSelection(f_list, my_dataset):
result = []
for feature in f_list:
# Replace 'NaN' with 0
for name in my_dataset:
data_point = my_dataset[name]
if not data_point[feature]:
data_point[feature] = 0
elif data_point[feature] == 'NaN':
data_point[feature] =0
data = featureFormat(my_dataset, ['poi',feature], sort_keys = True, remove_all_zeroes = False)
labels, features = targetFeatureSplit(data)
features = [abs(x) for x in features]
from sklearn.cross_validation import StratifiedShuffleSplit
cv = StratifiedShuffleSplit(labels, 1000, random_state = 42)
features_train = []
features_test = []
labels_train = []
labels_test = []
for train_idx, test_idx in cv:
for ii in train_idx:
features_train.append( features[ii] )
labels_train.append( labels[ii] )
for jj in test_idx:
features_test.append( features[jj] )
labels_test.append( labels[jj] )
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(features_train, labels_train)
predictions = clf.predict(features_test)
score = score_func(labels_test,predictions)
result.append((feature,score[0],score[1],score[2]))
result = sorted(result, reverse=True, key=lambda x: x[3])
return result
示例14: NBAccuracy
def NBAccuracy(features_train, labels_train, features_test, labels_test):
#Import sklearn modules for GaussianNB
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
#Create classifer
classifer = GaussianNB();
#Timing fit algorithm
t0 = time();
#Fit classier on the training features
classifer.fit(features_train, labels_train);
print "Training Time: ", round(time() - t0, 3), "s";
GaussianNB();
#Timing prediction algorithm
t0=time();
#Use trained classifer to predict labels for test features
pred = classifer.predict(features_test);
print "Prediction Time: ", round(time() - t0, 3), "s";
#Calculate accuracy from features_test with answer in labels_test
accuracy = accuracy_score(pred, labels_test);
return accuracy;
示例15: __init__
class GaussianNBClassifier:
def __init__(self):
"""
This is the constructor responsible for initializing the classifier
"""
self.outputHeader = "#gnb"
self.clf = None
def buildModel(self):
"""
This builds the model of the Gaussian NB classifier
"""
self.clf = GaussianNB()
def trainGaussianNB(self,X, Y):
"""
Training the Gaussian NB Classifier
"""
self.clf.fit(X, Y)
def validateGaussianNB(self,X, Y):
"""
Validate the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)
def testGaussianNB(self,X, Y):
"""
Test the Gaussian NB Classifier
"""
YPred = self.clf.predict(X)
print accuracy_score(Y, YPred)