当前位置: 首页>>代码示例>>Python>>正文


Python MultinomialNB.fit方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.fit方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.fit方法的具体用法?Python MultinomialNB.fit怎么用?Python MultinomialNB.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes.MultinomialNB的用法示例。


在下文中一共展示了MultinomialNB.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: classifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def classifier():
    nb = MultinomialNB(alpha=0)
    nb.fit(DOC_TRAIN, CLASS_TRAIN)
    db = DB()
    query = 'select cate_id, tf, url, content from site_content_3'

    cursor = db.cursor()
    logger.info(query)
    cursor.execute(query)
    rows = cursor.fetchall()
    for row in rows:
        currentCateId = row['cate_id']
        print 'rowID => ', row['cate_id'];
        url = row['url']
        tf = row['tf']
        content = row['content']
        termFrequencyDict = {}
        # continue

        try:
            termFrequencyDict = json.loads(tf)
        except:
            print 'error => ', url
            continue

        testItem = np.array([])
        for word in termFrequencyDict:
            tf = termFrequencyDict[word]
            if WORDS.has_key(word):
                testItem = np.append([tf])
            else:
                testItem = np.append([0])

        print "CURRENT CATE ", currentCateId
        print "NEW ", nb.predict(testItem)
开发者ID:meotomit,项目名称:crawler01,代码行数:37,代码来源:MNB.py

示例2: MultinomialNBClassify_Proba

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def MultinomialNBClassify_Proba(enrollment_id, trainData, trainLabel, testData):
    nbClf = MultinomialNB() # default alpha=1.0, Laplace smoothing
    # settinf alpha < 1 is called Lidstone smoothing
    nbClf.fit(trainData, ravel(trainLabel))
    testLabel = nbClf.predict_proba(testData)[:,1]
    saveResult(enrollment_id, testLabel, 'Proba_sklearn_MultinomialNB_alpha=0.1_Result.csv')
    return testLabel
开发者ID:ElvisKwok,项目名称:code,代码行数:9,代码来源:test.py

示例3: crossValidate

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def crossValidate(X_dataset,y):
#cross validate model
    num_folds = 5
    kfold = cross_validation.StratifiedKFold(y, n_folds=num_folds, shuffle=True)

   # kfold=KFold(X.shape[0],n_folds=10, shuffle=True)
    avg_accuracy=0
    avg_precision=0
    avg_recall=0
    print "----------- cross_validation k=5"
    for train,test in kfold:
        Xtrain,Xtest,ytrain,ytest=X_dataset[train],X_dataset[test],y[train],y[test]
        
#        clf=LinearSVC()
        clf=MultinomialNB(alpha=0.1)
#        clf=LDA()
        clf.fit(Xtrain.toarray(),ytrain)
        ypred=clf.predict(Xtest.toarray())
        accuracy=metrics.accuracy_score(ytest,ypred)              
#        print "accuracy = ", accuracy
        avg_accuracy+=accuracy
        precision = metrics.precision_score(ytest,ypred)
#        print("precision:   %0.3f" % precision)
        avg_precision+=precision
        recall = metrics.recall_score(ytest,ypred)
#        print("recall:   %0.3f" % recall)
        avg_recall+=recall
        
    print "Average accuracy : " , (avg_accuracy/num_folds)
    print "Average precision : " , (avg_precision/num_folds)
    print "Average recall : " , (avg_recall/num_folds)        
开发者ID:ananya11,项目名称:CS5614_projects,代码行数:33,代码来源:CrossValidation.py

示例4: run_naivebayes_evaluation

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
	def run_naivebayes_evaluation(self, inputdata, outputdata, k):
		""" Fit Naive Bayes Classification on train set with cross validation. 
		Run Naive Bayes Classificaiton on test set. Return results
		"""

		###print "** Fitting Naive Bayes classifier.."

		# Cross validation
		cv = cross_validation.KFold(inputdata.shape[0], n_folds=k, indices=True)
		cv_naivebayes = []
		f1_scores = []
		for traincv, testcv in cv:

			clf_cv = MultinomialNB()
			clf_cv.fit(inputdata[traincv], outputdata[traincv])

			y_pred_cv = clf_cv.predict(inputdata[testcv])

			f1 = metrics.f1_score(outputdata[testcv], y_pred_cv, pos_label=0)
			f1_scores.append(f1)

		
		#TODO: NEEDED? self.classifier = clf_cv
		print "score average: %s" + str(np.mean(f1_scores))

		average_score =np.mean(f1_scores)
		tuples = (average_score, f1_scores)

		return (tuples, 'N.A.', 'N.A.')
开发者ID:sagieske,项目名称:scriptie,代码行数:31,代码来源:start_nb.py

示例5: train

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train(good_sources, bad_sources,method,naive_bayes=None,keywords=list()):
    #train the algorithm
    good_samples = find_keywords(' '.join([entry[method] for entry in good_sources]))
    bad_samples = find_keywords(' '.join([entry[method] for entry in bad_sources]))


    #if we have an exists knowledge base to append this new information to, do so
    if naive_bayes:
        new_kws = set(good_samples+bad_samples)
        print('Using old keywords as well')
        print("# old keywords = {}\n # new keywords = {}".format(len(keywords),len(new_kws)))
        new_kws = set(good_samples+bad_samples).difference(keywords)
        print("# fresh keywords = {}\n".format(len(new_kws)))

        #make some call to naive_bayes.partial_fssit in here
        X = np.concatenate((naive_bayes.feature_count_, np.zeros((naive_bayes.feature_count_.shape[0],len(new_kws)))),1)
        all_kw = keywords + list(new_kws)

    else:
        print('Only using keywords from this content set')
        all_kw = list(set(good_samples+bad_samples))
        X = np.zeros((2,len(all_kw)))

    for j,kw in enumerate(all_kw):
        X[0,j] += good_samples.count(kw)
        X[1,j] += bad_samples.count(kw)

    y = ['good','bad']

    naive_bayes = MultinomialNB()
    naive_bayes.fit(X,y)

    return naive_bayes, all_kw
开发者ID:pfdamasceno,项目名称:shakespeare,代码行数:35,代码来源:shakespeare.py

示例6: run_learning_curves_experiment

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def run_learning_curves_experiment(dataset):
    logger.info("Now starting experiment with learning curves...")
    scores = []
    sklearn_scores = []
    train_sizes = []

    clf = MultinomialBayesEstimator()
    sklearn_clf = MultinomialNB()
    # Constructing confidence intervals using empiric bootstrap
    intervals = []
    for test_size in xrange(1, len(dataset)):
        f_scores = []
        f_scores_sklearn = []
        for train_set, test_set in split_train_test_p_out(dataset, test_size):
            train_set, test_set = split_train_test(dataset, test_size)
            X_train, y_train, X_test, y_test = make_test_train(train_set, test_set)
            clf.fit(X_train, y_train)
            f_scores.append(f1_score(y_test, clf.predict(X_test)))
            sklearn_clf.fit(X_train, y_train.ravel())
            f_scores_sklearn.append(f1_score(y_test, sklearn_clf.predict(X_test)))
        intervals.append(calculate_confidence_interval(f_scores))
        scores.append(np.mean(f_scores))
        sklearn_scores.append(np.mean(f_scores_sklearn))
        train_sizes.append(len(dataset) - test_size)

    plot_learning_curves(train_sizes, sklearn_scores, scores, intervals)
开发者ID:asaitgalin,项目名称:atp-task,代码行数:28,代码来源:spam_detector.py

示例7: predict

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def predict(cur, plyr_id, game_plyrs): 
  #creates training set (called 'X') for plyr
  all_plyrs = all_player_ids(cur) #np.array - all NFL players (and coaches)
  games = games_played_in(cur, plyr_id) #np.array - the games_ids the player played in
  n_cols = all_plyrs.shape[0] #int 
  m_rows = games.shape[0] #int
  w = weights(games)
  zeros = np.zeros((m_rows, n_cols)) #2darr - used to initialize DF
  X = pd.DataFrame(zeros, index=games, columns=all_plyrs) #dataframe
  populate_training_set(cur, X, games, plyr_id)
  #print "X: ", X.values
  
  ###run coaches_model and then im here### 
  #creates vector of known output values
  Y = training_output_vector(cur, games, plyr_id) #good
  #print "(len) Y: ", len(Y), Y
  test_zeros = np.zeros((1, n_cols)) #2darr - used to initialize DF
  test_X = pd.DataFrame(zeros, columns=all_plyrs) #dataframe
  update_training_matrix(cur, game_plyrs, 0, test_X)
  
  #run Bernoulli NB Classifier
  nb_clf = MultinomialNB()
  
  if len(X.values) == 0:
    return 0
  nb_clf.fit(X, Y, sample_weight=w)
  nb_predictions = nb_clf.predict(test_X)
  #print "test_X: ", test_X.values
  nb_norm_prob = normalize_probs(nb_clf.predict_proba(test_X)[0])
  avgs = [3,8,12.5,17,21,25]
  #print "probs: ", nb_norm_prob
  #print avgs
  ev = expected_val(nb_norm_prob, avgs) #can also calc dot product
  return round(ev,1)
开发者ID:kwheeler27,项目名称:insight_datasci,代码行数:36,代码来源:mn_predictions.py

示例8: train

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
    def train(self):
        '''
        ## -- How to predict -- ##
            query = "blah blah"
            q = list2vec(hashit(q)) 
            clf2 = joblib.load('nb')
            print(clf2.predict(q)) # <--- returns type id
        '''

        limit = self.comment_limit
        sqls = ["SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=1 ORDER BY time DESC LIMIT " + str(limit),
            "SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=2 ORDER BY time DESC LIMIT " + str(limit),
            "SELECT body FROM comment JOIN entity ON comment.eid = entity.eid WHERE entity.tid=3 ORDER BY time DESC LIMIT " + str(limit)]

        print "training model"
        comments = self.sql2list(sqls)
        x, y = self.featureMatrix(comments)
        X = list2Vec(x)
        Y = list2Vec(y)

        q = "Let's talk about food."
        q_vec = list2Vec(hashit(q))

        ## Precicting
        print "Classifying"
        clf = MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)
        clf.fit(X, Y)
        joblib.dump(clf, self.path, compress=9)
开发者ID:WangWenjun559,项目名称:Weiss,代码行数:30,代码来源:typeTrain.py

示例9: multinomialNB

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def multinomialNB(devMatrix, trainMatrix, devtarget, traintarget):
	f = open('MNNB2.log', 'a')
	f.write("Making model!!!!!")
	print 'Making model!'
	clf = MultinomialNB(alpha=1, fit_prior=False)
	clf.fit(trainMatrix, traintarget)
	f.write("\n")
	value = ('Model: multinomial bayes with parameters ',clf.get_params(False))
	print (str(value))
	f.write(str(value))
	f.write("\n")
	f.write("MSE for train: %.2f" % np.mean((clf.predict(trainMatrix) - traintarget) ** 2))
	score = clf.score(trainMatrix, traintarget)
	f.write("\n")
	value = ('Score for train %.2f', score)
	f.write("\n")
	f.write("MSE for dev: %.2f" % np.mean((clf.predict(devMatrix) - devtarget) ** 2))
	score = clf.score(devMatrix, devtarget)
	value = ('Score for dev %.2f', score)
	print(str(value))
	f.write("\n")
	s = str(value)
	f.write(s)
	f.write("\n")
	f.write('model done')
	f.write("\n")
	f.write("\n")
	f.close()
	return score
开发者ID:katymccl3,项目名称:MachineLearning,代码行数:31,代码来源:dataParser.py

示例10: train_classifiers

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def train_classifiers(X_data, y_data):
    ############ Linear SVM: 0.908 #############
    clf_LSVM = svm.SVC(kernel = 'linear')
    clf_LSVM.fit(X_data, y_data)
    
    ############ MultinomialNB: 0.875 #############
    clf_MNB = MultinomialNB()
    clf_MNB.fit(X_data, y_data)
    
    ############ Random Forest: 0.910 #############
    clf_RF = RandomForestClassifier(n_estimators=200, criterion='entropy')
    clf_RF.fit(X_data, y_data)
    
    ############ Extra Tree: 0.915 ##################
    clf_ETC = ExtraTreesClassifier(n_estimators=500, max_depth=None, min_samples_split=1, random_state=0)
    clf_ETC.fit(X_data, y_data)
    
    ############ AdaBoost: 0.88 ##################
    clf_Ada = AdaBoostClassifier()
    clf_Ada.fit(X_data, y_data)
    
    ############ rbf SVM: 0.895 #############
    clf_rbf = svm.SVC(C=200, gamma=0.06, kernel='rbf')
    clf_rbf.fit(X_data, y_data)
    
    ############ GradientBoosting: 0.88 #############
    clf_GBC = GradientBoostingClassifier()
    clf_GBC.fit(X_data, y_data)
    
    return clf_LSVM, clf_MNB, clf_RF, clf_ETC, clf_Ada, clf_rbf, clf_GBC    
开发者ID:yiwuxie15,项目名称:yiwuxie15-urlBootstrap,代码行数:32,代码来源:urlAnalysis.py

示例11: naive_bayes

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def naive_bayes(x_value, y_value):
    X = x_value
    y = y_value

    #train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 123)

    vect = CountVectorizer()
    vect.fit(X_train)
    X_train_dtm = vect.transform(X_train)

    X_test_dtm = vect.transform(X_test)

    from sklearn.naive_bayes import MultinomialNB
    nb = MultinomialNB()
    nb.fit(X_train_dtm, y_train)
    y_pred_class = nb.predict(X_test_dtm)
    
    print 'Accuracy: '
    print metrics.accuracy_score(y_test, y_pred_class)
    
    print 'Null Accuracy: '
    print y_test.value_counts().head(1) / len(y_test)
    
    print 'Confusion Matrix: '
    print metrics.confusion_matrix(y_test, y_pred_class)
开发者ID:sds2ga,项目名称:yelp_report,代码行数:28,代码来源:model.py

示例12: trainNB

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def trainNB(xTrain, yTrain):

    classifier = MultinomialNB()

    classifier.fit(xTrain, yTrain)

    return classifier
开发者ID:Gliganu,项目名称:IP_FaceRecognition,代码行数:9,代码来源:ClassifierTrainer.py

示例13: nb

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def nb(x_train,x_test,y_train,doc_app_id,id_name_dict):
	clf = MultinomialNB(alpha=0.01)
	clf.fit(x_train,y_train)
	pred = clf.predict(x_test)
	for i in range(len(pred)):
		app_id = doc_app_id[i]
		print id_name_dict[app_id]+" "+str(pred[i])
开发者ID:theseusyang,项目名称:360_tag_recommendation_system,代码行数:9,代码来源:naive_bayesian_classifier.py

示例14: do_lda

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
def do_lda(x, y, folds):
    indexes = list(range(len(x)))
    shuffle(indexes)
    x = list(x[i] for i in indexes)
    y = list(y[i] for i in indexes)
    fold_size = len(x) / folds
    corrects = []
    for fold in range(folds):
        test_x = []
        train_x = []
        test_y = []
        train_y = []
        for i in range(len(x)):
            fold_index = i / fold_size
            if fold == fold_index:
                test_x.append(x[i])
                test_y.append(y[i])
            else:
                train_x.append(x[i])
                train_y.append(y[i])
        print 'Partitioned data into fold'
        test_x, train_x = remove_redundant_dimensions(test_x, train_x)
        print 'Removed redundant dimensions'
	nb =  MultinomialNB()
	nb.fit(train_x, train_y)
	print 'Fit NB'
	predictions = nb.predict(test_x)
#        lda = LDA()
#        lda.fit(train_x, train_y)
#        print 'Fit lda'
#        predictions = lda.predict(test_x)
        correct = sum(1 for i in range(len(predictions)) if predictions[i] == test_y[i])
        print 'Did fold, correct:', correct
        corrects.append(correct)
    return corrects
开发者ID:mtinkerhess,项目名称:eecs545_twitter,代码行数:37,代码来源:do_naive.py

示例15: train

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import fit [as 别名]
    def train(self, data):
        nb = MultinomialNB()

        launches = map(lambda x: x['application'], data)
        instances = map(lambda i: {'lu1': launches[i-1]}, xrange(1, len(launches)))
        X = self.vectorizer.fit_transform(instances).toarray()
        y = launches[1:]
        self.lu1_predictor = nb.fit(X, y)

        instances = map(lambda i: {'lu2': launches[i-2]}, xrange(2, len(launches)))
        X = self.vectorizer.fit_transform(instances).toarray()
        y = launches[2:]
        self.lu2_predictor = nb.fit(X, y)

        # tune mu
        max_hr = 0
        best_mu = 0
        for mu in map(lambda x: x/10.0, xrange(11)):
            self.mu = mu
            predictions = map(lambda i: self.predict({'lu1': launches[i-1], 'lu2': launches[i-2]}), \
                xrange(2, len(launches)))
            hr, mrr = self.test(launches[2:], predictions)
            if hr > max_hr:
                max_hr = hr
                best_mu = mu
        self.mu = best_mu
开发者ID:nodestory,项目名称:ApeicServer,代码行数:28,代码来源:lu_predictor.py


注:本文中的sklearn.naive_bayes.MultinomialNB.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。