本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.predict_proba方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.predict_proba方法的具体用法?Python MultinomialNB.predict_proba怎么用?Python MultinomialNB.predict_proba使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.naive_bayes.MultinomialNB
的用法示例。
在下文中一共展示了MultinomialNB.predict_proba方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MultinomialNaiveBayesGridSearch_OLD
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNaiveBayesGridSearch_OLD():
# C=1 is best
cs = 10.0**np.arange(-9,2,0.5)
aucs = []
for c in cs:
clf = MultinomialNB(alpha=c).fit(f_train, y_train)
probs = clf.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
roc_auc = auc(fpr,tpr)
cstr = '%0.2e'%c
myplt = st.plotROC(fpr,tpr,roc_auc,
figure=False,
show=False,
returnplt=True,
showlegend=False,
title='Grid Search - Multinomial Naive Bayes ROC Curve')
aucs.append(roc_auc)
best = 0
for i in range(len(cs)):
if aucs[i] > aucs[best]:
best = i
c = cs[best]
clf = MultinomialNB(alpha=c).fit(f_train, y_train)
probs = clf.predict_proba(f_test)
fpr,tpr,_ = roc_curve(y_true=y_test,y_score=probs[:,1])
myplt = st.plotROC(fpr,tpr,roc_auc,
legendlabel='Best alpha = %0.2e' % c,
figure=False,
show=False,
returnplt=True,
showlegend=True,
title='Grid Search - Multinomial Naive Bayes ROC Curve')
myplt.show()
return clf
示例2: recommend
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def recommend(twitterword):
newpd = get_words_df()
#newpd = pd.read_csv('twitter_bigdf_appended_cleanedtweets_averageperuser.csv')
newpd['Tweet'] = newpd['Tweet'].map(lambda x: str(x))
newpd['was_retweeted'] = newpd['average_retweet_threshold']
best_alpha = 50.0
best_min_df = 0.01
vectorizer = CountVectorizer(min_df=best_min_df)
x, y = make_xy(newpd, vectorizer)
xtrain, xtest, ytrain, ytest = train_test_split(x, y)
clf = MultinomialNB(alpha=best_alpha).fit(xtrain, ytrain)
probs = clf.predict_log_proba(x)[:, 0]
prob = clf.predict_proba(x)[:, 0]
predict = clf.predict(x)
retweet_chance = clf.predict_proba(vectorizer.transform([twitterword]))
answer = retweet_chance[0][1] * 100
return answer
示例3: test_mnnb
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def test_mnnb(kind):
# Test Multinomial Naive Bayes classification.
# This checks that MultinomialNB implements fit and predict and returns
# correct values for a simple toy dataset.
if kind == 'dense':
X = X2
elif kind == 'sparse':
X = scipy.sparse.csr_matrix(X2)
# Check the ability to predict the learning set.
clf = MultinomialNB()
assert_raises(ValueError, clf.fit, -X, y2)
y_pred = clf.fit(X, y2).predict(X)
assert_array_equal(y_pred, y2)
# Verify that np.log(clf.predict_proba(X)) gives the same results as
# clf.predict_log_proba(X)
y_pred_proba = clf.predict_proba(X)
y_pred_log_proba = clf.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)
# Check that incremental fitting yields the same results
clf2 = MultinomialNB()
clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
clf2.partial_fit(X[2:5], y2[2:5])
clf2.partial_fit(X[5:], y2[5:])
y_pred2 = clf2.predict(X)
assert_array_equal(y_pred2, y2)
y_pred_proba2 = clf2.predict_proba(X)
y_pred_log_proba2 = clf2.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
assert_array_almost_equal(y_pred_proba2, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)
# Partial fit on the whole data at once should be the same as fit too
clf3 = MultinomialNB()
clf3.partial_fit(X, y2, classes=np.unique(y2))
y_pred3 = clf3.predict(X)
assert_array_equal(y_pred3, y2)
y_pred_proba3 = clf3.predict_proba(X)
y_pred_log_proba3 = clf3.predict_log_proba(X)
assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
assert_array_almost_equal(y_pred_proba3, y_pred_proba)
assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
示例4: self_training
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def self_training(attribute,iterate_count,initial_data_count,new_data_count):
from data_constructor import construct
print ''
construct(attribute,initial_data_count)
unlabel_train_x,unlabel_train_y,unlabel_train_uids=get_data(attribute,'train_unlabel')
train_x,train_y,train_uids=get_data(attribute,'train')
test_x,test_y,_=get_data(attribute,'test')
scores=[]
for i in xrange(iterate_count):
print '----------------'
print 'Iterate: %d'%i
print 'Labeled training data size: %d'%(len(train_x))
print 'Unlabeled training data size: %d'%(len(unlabel_train_x))
print 'Testing data size: %d'%(len(test_x))
clf=MultinomialNB()
clf.fit(train_x,train_y)
score=clf.score(test_x,test_y)
print 'Accurate: %0.4f'%score
scores.append(score)
result=clf.predict_proba(unlabel_train_x)
good_x,good_y,bad_x,bad_y=extract_new_data(zip(unlabel_train_x,result),new_data_count)
if len(good_x)==0:
print 'No more new train data!'
break
print 'New training data size: %d'%(len(good_x))
train_x=numpy.concatenate((train_x, good_x), axis=0)
train_y=numpy.concatenate((train_y, good_y), axis=0)
unlabel_train_x,unlabel_train_y=bad_x,bad_y
print '--------'
for s in scores:
print s
print '--------'
示例5: bag_of_words_probabilities
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def bag_of_words_probabilities(train_reviews, test_reviews):
""" Implements a baseline bag-of-words classifier. Returns a dictionary mapping tuples (review_id, class) to the probability that that review belongs to that class. """
train_corpus = []
test_corpus = []
Y_train = []
for review_id in train_reviews:
review = train_reviews[review_id]
train_corpus.append(review["text"])
Y_train.append(review["rating"])
vectorizer = CountVectorizer(stop_words = 'english')
X_train = vectorizer.fit_transform(train_corpus)
for review_id in test_reviews:
review = test_reviews[review_id]
test_corpus.append(review["text"])
# clf = LinearSVC(class_weight = 'auto').fit(X_train, Y_train)
# clf = LogisticRegression().fit(X_train, Y_train)
clf = MultinomialNB().fit(X_train, Y_train)
X_test = vectorizer.transform(test_corpus)
Y_probability = clf.predict_proba(X_test)
probability_dict = {}
review_id_list = test_reviews.keys()
for i in range(len(review_id_list)):
probability_dict[review_id_list[i]] = Y_probability[i][1]
return probability_dict
示例6: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class RecommenderNB:
min_score = None
stop_words = ["a","a's","able","about","above","according","accordingly","across","actually","after","afterwards","again","against","ain't","all","allow","allows","almost","alone","along","already","also","although","always","am","among","amongst","an","and","another","any","anybody","anyhow","anyone","anything","anyway","anyways","anywhere","apart","appear","appreciate","appropriate","are","aren't","around","as","aside","ask","asking","associated","at","available","away","awfully","b","be","became","because","become","becomes","becoming","been","before","beforehand","behind","being","believe","below","beside","besides","best","better","between","beyond","both","brief","but","by","c","c'mon","c's","came","can","can't","cannot","cant","cause","causes","certain","certainly","changes","clearly","co","com","come","comes","concerning","consequently","consider","considering","contain","containing","contains","corresponding","could","couldn't","course","currently","d","definitely","described","despite","did","didn't","different","do","does","doesn't","doing","don't","done","down","downwards","during","e","each","edu","eg","eight","either","else","elsewhere","enough","entirely","especially","et","etc","even","ever","every","everybody","everyone","everything","everywhere","ex","exactly","example","except","f","far","few","fifth","first","five","followed","following","follows","for","former","formerly","forth","four","from","further","furthermore","g","get","gets","getting","given","gives","go","goes","going","gone","got","gotten","greetings","h","had","hadn't","happens","hardly","has","hasn't","have","haven't","having","he","he's","hello","help","hence","her","here","here's","hereafter","hereby","herein","hereupon","hers","herself","hi","him","himself","his","hither","hopefully","how","howbeit","however","i","i'd","i'll","i'm","i've","ie","if","ignored","immediate","in","inasmuch","inc","indeed","indicate","indicated","indicates","inner","insofar","instead","into","inward","is","isn't","it","it'd","it'll","it's","its","itself","j","just","k","keep","keeps","kept","know","knows","known","l","last","lately","later","latter","latterly","least","less","lest","let","let's","like","liked","likely","little","look","looking","looks","ltd","m","mainly","many","may","maybe","me","mean","meanwhile","merely","might","more","moreover","most","mostly","much","must","my","myself","n","name","namely","nd","near","nearly","necessary","need","needs","neither","never","nevertheless","new","next","nine","no","nobody","non","none","noone","nor","normally","not","nothing","novel","now","nowhere","o","obviously","of","off","often","oh","ok","okay","old","on","once","one","ones","only","onto","or","other","others","otherwise","ought","our","ours","ourselves","out","outside","over","overall","own","p","particular","particularly","per","perhaps","placed","please","plus","possible","presumably","probably","provides","q","que","quite","qv","r","rather","rd","re","really","reasonably","regarding","regardless","regards","relatively","respectively","right","s","said","same","saw","say","saying","says","second","secondly","see","seeing","seem","seemed","seeming","seems","seen","self","selves","sensible","sent","serious","seriously","seven","several","shall","she","should","shouldn't","since","six","so","some","somebody","somehow","someone","something","sometime","sometimes","somewhat","somewhere","soon","sorry","specified","specify","specifying","still","sub","such","sup","sure","t","t's","take","taken","tell","tends","th","than","thank","thanks","thanx","that","that's","thats","the","their","theirs","them","themselves","then","thence","there","there's","thereafter","thereby","therefore","therein","theres","thereupon","these","they","they'd","they'll","they're","they've","think","third","this","thorough","thoroughly","those","though","three","through","throughout","thru","thus","to","together","too","took","toward","towards","tried","tries","truly","try","trying","twice","two","u","un","under","unfortunately","unless","unlikely","until","unto","up","upon","us","use","used","useful","uses","using","usually","uucp","v","value","various","very","via","viz","vs","w","want","wants","was","wasn't","way","we","we'd","we'll","we're","we've","welcome","well","went","were","weren't","what","what's","whatever","when","whence","whenever","where","where's","whereafter","whereas","whereby","wherein","whereupon","wherever","whether","which","while","whither","who","who's","whoever","whole","whom","whose","why","will","willing","wish","with","within","without","won't","wonder","would","would","wouldn't","x","y","yes","yet","you","you'd","you'll","you're","you've","your","yours","yourself","yourselves"]
def __init__(self, num_hashtags=40):
RecommenderNB.min_score = float(1/(float(num_hashtags)-1.0))
self.tl = TweetLib()
print "Generating classifier ... "
documents = self.tl.get_hashtag_documents(num_hashtags)
corpus = [b for a, b in documents]
self.hashtags = [a for a,b in documents]
all_classes = range(len(documents))
self.vectorizer = TfidfVectorizer(stop_words='english')
self.xtrain = self.vectorizer.fit_transform(corpus)
self.ytrain = all_classes
self.parameters = {'alpha': 0.01}
self.clf = MultinomialNB(**self.parameters).partial_fit(self.xtrain, self.ytrain, self.ytrain)
print "Classifier has been generated..."
def recommend(self, tweet):
tweet = " ".join([w.lower() for w in tweet.split() if not w.lower() in RecommenderNB.stop_words])
xtest = self.vectorizer.transform([tweet])
pred = self.clf.predict_proba(xtest)[0]
sorted_pred = sorted(enumerate(pred), key=lambda x:x[1])
max_score = max([b for a,b in sorted_pred])
if max_score < RecommenderNB.min_score:
return None
else:
return list(reversed([self.hashtags[i[0]] for i in sorted_pred]))
示例7: predict
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def predict(cur, plyr_id, game_plyrs):
#creates training set (called 'X') for plyr
all_plyrs = all_player_ids(cur) #np.array - all NFL players (and coaches)
games = games_played_in(cur, plyr_id) #np.array - the games_ids the player played in
n_cols = all_plyrs.shape[0] #int
m_rows = games.shape[0] #int
w = weights(games)
zeros = np.zeros((m_rows, n_cols)) #2darr - used to initialize DF
X = pd.DataFrame(zeros, index=games, columns=all_plyrs) #dataframe
populate_training_set(cur, X, games, plyr_id)
#print "X: ", X.values
###run coaches_model and then im here###
#creates vector of known output values
Y = training_output_vector(cur, games, plyr_id) #good
#print "(len) Y: ", len(Y), Y
test_zeros = np.zeros((1, n_cols)) #2darr - used to initialize DF
test_X = pd.DataFrame(zeros, columns=all_plyrs) #dataframe
update_training_matrix(cur, game_plyrs, 0, test_X)
#run Bernoulli NB Classifier
nb_clf = MultinomialNB()
if len(X.values) == 0:
return 0
nb_clf.fit(X, Y, sample_weight=w)
nb_predictions = nb_clf.predict(test_X)
#print "test_X: ", test_X.values
nb_norm_prob = normalize_probs(nb_clf.predict_proba(test_X)[0])
avgs = [3,8,12.5,17,21,25]
#print "probs: ", nb_norm_prob
#print avgs
ev = expected_val(nb_norm_prob, avgs) #can also calc dot product
return round(ev,1)
示例8: MultinomialNBClassify_Proba
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNBClassify_Proba(enrollment_id, trainData, trainLabel, testData):
nbClf = MultinomialNB() # default alpha=1.0, Laplace smoothing
# settinf alpha < 1 is called Lidstone smoothing
nbClf.fit(trainData, ravel(trainLabel))
testLabel = nbClf.predict_proba(testData)[:,1]
saveResult(enrollment_id, testLabel, 'Proba_sklearn_MultinomialNB_alpha=0.1_Result.csv')
return testLabel
示例9: NaiveBayesClassifier
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class NaiveBayesClassifier():
tfidf_transformer = TfidfTransformer(norm = None)
def __init__(self, keywords_path, dataset_path):
print 'Initializing NaiveBayesClassifier..'
self.data_collector = NaiveBayesDataCollector(keywords_path, dataset_path)
self.keywords = self.data_collector.keywords
self.documents = self.data_collector.documents
self.target_classes = self.data_collector.target_classes
self.count_vectorizer = CountVectorizer(min_df=1, tokenizer=tokenize, vocabulary = self.keywords)
def train(self):
print 'Training Naive Bayes..'
print 'Running Count Vectorizer..'
X_train_counts = self.count_vectorizer.fit_transform(self.documents)
# print 'Headers:', self.count_vectorizer.get_feature_names()
# print 'X_train_counts:\n', X_train_counts.toarray()
# print 'count_vect.vocabulary_:', self.count_vectorizer.vocabulary_
print 'Performing tf-idf transform..'
X_train_tfidf = self.tfidf_transformer.fit_transform(X_train_counts)
#print 'X_train_tfidf.shape:', X_train_tfidf.shape
# print 'X_train_tfidf:\n', X_train_tfidf.toarray()
self.clf = MultinomialNB(fit_prior=False).fit(X_train_tfidf, self.target_classes)
def classify(self, param):
if isinstance(param, list):
docs_new = param
else:
docs_new = [param]
X_new_counts = self.count_vectorizer.transform(docs_new)
# print 'X_new_counts:', X_new_counts
X_new_tfidf = self.tfidf_transformer.transform(X_new_counts)
predicted = self.clf.predict(X_new_tfidf)
predicted_prob = self.clf.predict_proba(X_new_tfidf)
"""
print
print 'Prediction:'
for doc, category in zip(docs_new, predicted):
print '%r => %s' % (doc, category)
print
for doc, prob in zip(docs_new, predicted_prob):
print '%r => %s' % (doc, prob)
"""
return_val = []
for row in predicted_prob:
prob_data = {}
for prob, category in zip(row, self.clf.classes_):
prob_data[category] = prob
return_val.append(prob_data)
return return_val
示例10: classifyNaiveBayes
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def classifyNaiveBayes(Xtr, ytr, Xte, yte, reduceDim="none", targetDim=0):
""" Classified data using Naive Bayes """
try:
accuracyRate, timing, probabilities = 0.0, 0.0, []
# Reduce dimensionality if requested
Xtr = reduceDimensionality(Xtr, ytr, reduceDim, targetDim) if reduceDim != "none" else Xtr
Xte = reduceDimensionality(Xte, yte, reduceDim, targetDim) if reduceDim != "none" else Xte
# Make sure values are positive because MultinomialNB doesn't take negative features
Xtr = flipSign(Xtr, "+")
Xte = flipSign(Xte, "+")
# Perform classification
nbClassifier = MultinomialNB()
prettyPrint("Training the Naive Bayes algorithm", "debug")
startTime = time.time()
nbClassifier.fit(numpy.array(Xtr), numpy.array(ytr))
# Now test the trained algorithm
prettyPrint("Submitting the test samples", "debug")
predicted = nbClassifier.predict(Xte)
endTime = time.time()
# Compare the predicted and ground truth
accuracyRate = round(metrics.accuracy_score(predicted, yte), 2)
probabilities = nbClassifier.predict_proba(Xte)
# Finally, calculate the time taken to train and classify
timing = endTime-startTime
except Exception as e:
prettyPrint("Error encountered in \"classifyNaiveBayes\": %s" % e, "error")
return accuracyRate, timing, probabilities, predicted
示例11: main
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def main():
# 3/5 train
trainFeature1 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[0::5]
trainLabel1 = genfromtxt('trainLabel.csv', delimiter='\n')[0::5]
trainFeature2 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[2::5]
trainLabel2 = genfromtxt('trainLabel.csv', delimiter='\n')[2::5]
trainFeature3 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[4::5]
trainLabel3 = genfromtxt('trainLabel.csv', delimiter='\n')[4::5]
trainFeature = np.concatenate((trainFeature1, trainFeature2,trainFeature3))
trainLabel = np.concatenate((trainLabel1, trainLabel2,trainLabel3))
# 2/5 in trainset to test
trainFeature_test1 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[1::5]
trainLabel_test1 = genfromtxt('trainLabel.csv', delimiter='\n')[1::5]
trainFeature_test2 = genfromtxt('trainFeatureWithCounting.csv', delimiter=',')[3::5]
trainLabel_test2 = genfromtxt('trainLabel.csv', delimiter='\n')[3::5]
trainFeature_test = np.concatenate((trainFeature_test1, trainFeature_test2))
trainLabel_test = np.concatenate((trainLabel_test1, trainLabel_test2))
#testset
testFeature = genfromtxt('testFeatureWithCounting.csv', delimiter=',')
clf = MultinomialNB()
clf.fit(trainFeature, trainLabel)
MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
header = "Id,ARSON,ASSAULT,BAD CHECKS,BRIBERY,BURGLARY,DISORDERLY CONDUCT,DRIVING UNDER THE INFLUENCE,DRUG/NARCOTIC,DRUNKENNESS,EMBEZZLEMENT,EXTORTION,FAMILY OFFENSES,FORGERY/COUNTERFEITING,FRAUD,GAMBLING,KIDNAPPING,LARCENY/THEFT,LIQUOR LAWS,LOITERING,MISSING PERSON,NON-CRIMINAL,OTHER OFFENSES,PORNOGRAPHY/OBSCENE MAT,PROSTITUTION,RECOVERED VEHICLE,ROBBERY,RUNAWAY,SECONDARY CODES,SEX OFFENSES FORCIBLE,SEX OFFENSES NON FORCIBLE,STOLEN PROPERTY,SUICIDE,SUSPICIOUS OCC,TREA,TRESPASS,VANDALISM,VEHICLE THEFT,WARRANTS,WEAPON LAWS"
#testset
dec1 = clf.predict_proba(testFeature)
#trainset to test
dec2 = clf.predict_proba(trainFeature_test)
fmt1=['%d'] + ['%1.4f'] * dec1.shape[1]
fmt2=['%d'] + ['%1.4f'] * dec2.shape[1]
# normalized to [0,1] by Henry
# dec = asarray([[(i - min(j)) / (max(j) - min(j)) for i in j] for j in dec])
#ind = [i for i in xrange(1,len(dec)+1)] by Henry
dec1 = insert(dec1, 0, range(len(dec1)), axis=1)
savetxt("predict_NaiveBayes_96_testset.csv", dec1, delimiter=",", header=header, fmt=fmt1, comments="")
dec2 = insert(dec2, 0, range(len(dec2)), axis=1)
savetxt("predict_NaiveBayes_96_trainset_to_test.csv", dec2, delimiter=",", header=header, fmt=fmt2, comments="")
示例12: WeightedPartialFitPassiveTransferClassifier
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class WeightedPartialFitPassiveTransferClassifier(object):
def __init__(self, target_weight):
self.classifier = MultinomialNB()
self.target_weight = target_weight
self.vectorizer = FullContextBagOfWordsLeftRightCutoff(9)
# Train on unambiguous annotatios which have a group number
def train_source(self, annotations):
X = self.vectorizer.fit_transform(annotations)
y = numpy.array([annotation.get_group_number() for annotation in annotations])
self.classifier.fit(X, y)
# Train on ambiguous annotations with according group labels
def train_target_online(self, annotations, labels):
X = self.vectorizer.transform(annotations)
y = numpy.array([Annotation.GROUP_MAPPING[label] for label in labels])
weight_vector = [self.target_weight] * len(annotations)
self.classifier.partial_fit(X, y, Annotation.GROUP_MAPPING.values(), weight_vector)
def get_group_number_prob_pair(self, annotation, prob_vector):
group_option_indices = annotation.get_group_number()
group_option_prob = [prob_vector[group_option_index] for group_option_index in group_option_indices]
return max(zip(group_option_indices, group_option_prob), key = lambda (index, prob): prob)
def get_group_number(self, annotation, prob_vector):
group_index, _ = self.get_group_number_prob_pair(annotation, prob_vector)
return group_index
# tested, results for the classifier trained on source are not random
def predict(self, annotations):
X = self.vectorizer.transform(annotations)
probs = self.classifier.predict_proba(X) # [n_samples, n_classes]
return numpy.array([self.get_group_number(annotation, row)
for row, annotation in itertools.izip(probs, annotations)])
# tested, results for the classifier trained on source are not random
def get_max_probability(self, annotation, prob_vector):
_, prob = self.get_group_number_prob_pair(annotation, prob_vector)
return prob
def get_prob_estimates(self, annotations):
X = self.vectorizer.transform(annotations)
probs = self.classifier.predict_proba(X)
return numpy.array([self.get_max_probability(annotation, row)
for row, annotation in itertools.izip(probs, annotations)])
示例13: __init__
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class Classifier:
"""Used to allow the adding and removing of speeches to the classifer.
This could be made faster by actually modifying or extending the MultinomialNB
in scikit-learn rather than creating a new MultinomialNB object each time."""
def __init__(self, vocab=None):
self.vectorizer = TfidfVectorizer(min_df=2, vocabulary=vocab)
self.classifier = MultinomialNB(alpha=0.1,fit_prior=True)
def train_classifier(self, data, target):
sparse_data = self.vectorizer.fit_transform(data)
app.logger.debug("training classifier")
self.classifier.fit(sparse_data, target)
def classify_document(self, document):
app.logger.debug("classifying document")
tfidf_frames_vector = self.vectorizer.transform([document])
return self.classifier.predict_proba(tfidf_frames_vector)[0]
def cross_validation(self, documents, targets):
"""
Instantiate a new classifier and run this function.
Do not run train_classifier
"""
# documentation
# sklearn.cross_validation.cross_val_score(estimator, X, y=None, scoring=None,
# cv=None, n_jobs=1, verbose=0, fit_params=None, score_func=None, pre_dispatch='2*n_jobs')¶
X = self.vectorizer.fit(documents)
y = targets
return cross_val_score(self.classifier, X, y, cv=5)
@staticmethod
def bunch_with_targets (speeches, target_function):
'''This function is an alternative form of the loads in sklearn which loads
from a partiular file structure. This function allows me to load from the database
'''
app.logger.debug('Building bunch containing data and target vector.')
target = [] # 0 and 1 for subgroup a and b respectively
target_names = ['a','b'] # target_names
data = [] # data
for speech in speeches:
target.append(target_function(speech))
speech_string = ''
for sentence in speech.speaking:
speech_string += sentence
data.append(speech_string)
DESCR = "Trained subgroup_a vs subgroup_b classifier"
# Bunch - https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/datasets/base.py
return Bunch(
target = target,
target_names = target_names,
data = data,
DESCR = DESCR
)
示例14: NaiveBayesModel
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
class NaiveBayesModel(BaseModel):
def __init__(self, cached_feature):
BaseModel.__init__(self, cached_feature)
self.model = MultinomialNB(alpha=0.01, fit_prior=True)
def _predict_internal(self, X_test):
return self.model.predict_proba(X_test)[:, 1]
示例15: MultinomialNB_pred
# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict_proba [as 别名]
def MultinomialNB_pred(X_train, X_test, y_train):
clf = MultinomialNB(alpha=0.1, fit_prior=True)
clf = clf.fit(X_train, y_train)
predictions = clf.predict_proba(X_test) # these are predictions for both classes, so non-clicks and clicks
# Get only the predictions for clicks
predictions_click = []
for pred in predictions:
predictions_click.append(pred[1])
predictions_train = clf.predict_proba(X_train)
predictions_train_click = []
for pred in predictions_train:
predictions_train_click.append(pred[1])
return predictions_click, predictions_train_click