当前位置: 首页>>代码示例>>Python>>正文


Python MultinomialNB.predict方法代码示例

本文整理汇总了Python中sklearn.naive_bayes.MultinomialNB.predict方法的典型用法代码示例。如果您正苦于以下问题:Python MultinomialNB.predict方法的具体用法?Python MultinomialNB.predict怎么用?Python MultinomialNB.predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.naive_bayes.MultinomialNB的用法示例。


在下文中一共展示了MultinomialNB.predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: multinomialNB

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def multinomialNB(devMatrix, trainMatrix, devtarget, traintarget):
	f = open('MNNB2.log', 'a')
	f.write("Making model!!!!!")
	print 'Making model!'
	clf = MultinomialNB(alpha=1, fit_prior=False)
	clf.fit(trainMatrix, traintarget)
	f.write("\n")
	value = ('Model: multinomial bayes with parameters ',clf.get_params(False))
	print (str(value))
	f.write(str(value))
	f.write("\n")
	f.write("MSE for train: %.2f" % np.mean((clf.predict(trainMatrix) - traintarget) ** 2))
	score = clf.score(trainMatrix, traintarget)
	f.write("\n")
	value = ('Score for train %.2f', score)
	f.write("\n")
	f.write("MSE for dev: %.2f" % np.mean((clf.predict(devMatrix) - devtarget) ** 2))
	score = clf.score(devMatrix, devtarget)
	value = ('Score for dev %.2f', score)
	print(str(value))
	f.write("\n")
	s = str(value)
	f.write(s)
	f.write("\n")
	f.write('model done')
	f.write("\n")
	f.write("\n")
	f.close()
	return score
开发者ID:katymccl3,项目名称:MachineLearning,代码行数:31,代码来源:dataParser.py

示例2: RunNBCScikit

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
    def RunNBCScikit(q):
      totalTimer = Timer()
      
      Log.Info("Loading dataset", self.verbose)
      # Load train and test dataset.
      trainData = np.genfromtxt(self.dataset[0], delimiter=',')
      testData = np.genfromtxt(self.dataset[1], delimiter=',')

      # Labels are the last row of the training set.
      labels = trainData[:, (trainData.shape[1] - 1)]
      trainData = trainData[:,:-1]

      try:
        with totalTimer:      
          # Create and train the classifier.
          nbc = MultinomialNB()
          nbc.fit(trainData, labels)
          # Run Naive Bayes Classifier on the test dataset.
          nbc.predict(testData)
      except Exception as e:
        q.put(-1)
        return -1

      time = totalTimer.ElapsedTime()
      q.put(time)
      return time
开发者ID:rancho93,项目名称:benchmarks,代码行数:28,代码来源:nbc.py

示例3: test_mnb_prior_unobserved_targets

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def test_mnb_prior_unobserved_targets():
    # test smoothing of prior for yet unobserved targets

    # Create toy training data
    X = np.array([[0, 1], [1, 0]])
    y = np.array([0, 1])

    clf = MultinomialNB()

    assert_no_warnings(
        clf.partial_fit, X, y, classes=[0, 1, 2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 0

    # add a training example with previously unobserved class
    assert_no_warnings(
        clf.partial_fit, [[1, 1]], [2]
    )

    assert clf.predict([[0, 1]]) == 0
    assert clf.predict([[1, 0]]) == 1
    assert clf.predict([[1, 1]]) == 2
开发者ID:allefpablo,项目名称:scikit-learn,代码行数:27,代码来源:test_naive_bayes.py

示例4: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class NaiveBayes:
	def __init__(self):
		self.clf = MultinomialNB()
		self.pattern ='(?u)\\b[A-Za-z]{3,}'
		self.tfidf = TfidfVectorizer(sublinear_tf=False, use_idf=True, smooth_idf=True, stop_words='english', token_pattern=self.pattern, ngram_range=(2,2))

	def train(self,fileName):
		print "Naive Bayes classifier is being trained"
		table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
		X_train = self.tfidf.fit_transform(table.message)
		Y_train = []
		for item in table.cat:
			Y_train.append(int(item)) 
		self.clf.fit(X_train, Y_train)
		self.clf.fit(X_train, Y_train)
		print "Naive Bayes classifier has been trained"

	def classify(self,cFileName, rFileName):
		table = pandas.read_table(cFileName, names=["message"])
		X_test = self.tfidf.transform(table.message)
		print "Data have been classified"
		with open(rFileName,'w') as f:
			for item in self.clf.predict(X_test).astype(str):
				f.write(item+'\n')

	def validate(self,fileName):
		table = pandas.read_table(fileName, sep="\t", names=["cat", "message"])
		X_validate = self.tfidf.transform(table.message)
		Y_validated = self.clf.predict(X_validate).astype(str)
		totalNum = len(table.cat)
		errorCount = 0
		for i in range(0,totalNum):
			if int(table.cat[i])!=int(Y_validated[i]):
				errorCount += 1
		print "Data have been validated! Precision={}".format((totalNum-errorCount)/float(totalNum))
开发者ID:richelite,项目名称:classify,代码行数:37,代码来源:lib.py

示例5: __init__

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class ScikitNB:
    def __init__(self, train_file, tags_file, tag_start, tag_end):
        self.sf = ScikitFeature(train_file, tags_file, tag_start, tag_end, max_features=10000)
        print "done getting features"
        self.classifier = MultinomialNB()
        self.classifier.fit(self.sf.training_text, self.sf.training_labels_tuple)
        print "done fitting"

    def predict(self, text):
        text_vector = self.sf.get_text_vector(text)
        labels = self.classifier.predict(text_vector)
        return self.sf.get_labels(labels)

    def test(self, test_file):
        test_matrix = self.sf.get_file_text(test_file)
        predicted_labels = self.classifier.predict(test_matrix)
        print predicted_labels
        predicted_label_names = [self.sf.get_labels_from_id(label_ids) for label_ids in predicted_labels]
        true_labels = self.sf.get_file_labels(test_file)
        N_question = len(predicted_labels)
        N_true_tags = 0.0
        N_predict_tags = 0.0
        N_correct = 0.0
        F1 = []
        for i in range(N_question):
            N_true_tags += len(true_labels[i])
            N_predict_tags += len(predicted_labels[i])
            this_correct = 0.
            for predict_label_id in predicted_labels[i]:
                if (predict_label_id in true_labels[i]):
                    this_correct += 1
            N_correct += this_correct
            if this_correct == 0:
                F1.append(0)
            else:
                p = this_correct / len(predicted_labels[i])
                r = this_correct / len(true_labels[i])
                F1.append(2*p*r/(p+r))
        print N_correct,N_predict_tags,N_true_tags
        p= N_correct / N_predict_tags
        r= N_correct / N_true_tags
        print "Precision: %f %%" % (p*100)
        print "Recall: %f %%" % (r*100)
        print "Mean F1: %f" % (np.average(F1))

    def get_tags(self, test_file, output_file):
        print "Getting tags for "+test_file
        new_csv = open(output_file, 'w')
        writer = csv.writer(new_csv, delimiter=',', quotechar='"')
        test_matrix = self.sf.get_file_text(test_file)
        predicted_labels = self.classifier.predict(test_matrix)
        predicted_label_names = [self.sf.get_labels_from_id(label_ids) for label_ids in predicted_labels]
        ids = self.sf.get_file_ids(test_file)
        for i,id in enumerate(ids):
            tags = " ".join(predicted_label_names[i])
            writer.writerow([id, tags])
        new_csv.close()
        print "Done."
开发者ID:chtran,项目名称:kaggle_fb,代码行数:60,代码来源:scikit_naive.py

示例6: nb_class

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def nb_class():
    nb_classifier = MultinomialNB(alpha=0.01)
    nb_classifier.fit(X_train, y_train)
    y_train_pred = nb_classifier.predict(X_train)
    y_test_pred =  nb_classifier.predict(X_test)
    train_accuracy = np.mean(y_train_pred.ravel() == y_train.ravel()) * 100
    y_test_pred = nb_classifier.predict(X_test)
    test_accuracy = np.mean(y_test_pred.ravel() == y_test.ravel()) * 100
    print 'multinomial naive bayes test accuracy:', test_accuracy
开发者ID:apiatski,项目名称:Tiebreaker,代码行数:11,代码来源:emm.py

示例7: train_test

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def train_test(args):
    
    # unpack arguments and make train/test data/label dicts/lists
    train, test, features, classifier = args

    # create tf idf spare matrix from training data
    if features == 'tfidf':
        fe = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_features=1290)
        trainfe = fe.fit_transform(train['data'])
    elif features == 'dict':
        fe = CountVectorizer(tokenizer=tokenize, stop_words='english', binary=True)
        trainfe = fe.fit_transform(train['data'])
    elif features == 'lsa':
        svd = TruncatedSVD(n_components=100, random_state=42)
        fe = TfidfVectorizer(tokenizer=tokenize, stop_words='english', max_df=0.115, max_features=11500)
        trainfe = svd.fit_transform(fe.fit_transform(train['data']))
    elif features == 'rule':
        hamfe = CountVectorizer(tokenizer=tokenize, stop_words='english', max_features=1150)
        spamfe = CountVectorizer(tokenizer=tokenize, stop_words='english', max_features=1150)
        hamfit = hamfe.fit_transform(train['data'].loc[train['labels'] == 0])
        spamfit = spamfe.fit_transform(train['data'].loc[train['labels'] == 1])

    # train multinomial nb classifier on training data
    if classifier == 'mnb':
        from sklearn.naive_bayes import MultinomialNB
        clf = MultinomialNB().fit(trainfe, train['labels'])
    elif classifier == 'gnb':
        from sklearn.naive_bayes import GaussianNB
        clf = GaussianNB().fit(trainfe.toarray(), train['labels'])
    elif classifier == 'svm':
        from sklearn.linear_model import SGDClassifier
        clf = SGDClassifier(loss='squared_hinge', penalty='l2').fit(trainfe, train['labels'])
    elif classifier == 'log':
        from sklearn.linear_model import SGDClassifier
        clf = SGDClassifier(loss='log', penalty='l2').fit(trainfe, train['labels'])
    elif classifier == 'rule':
        hamfeats = hamfe.transform(test['data'])
        spamfeats = spamfe.transform(test['data'])
        hyp = np.array(hamfeats.sum(axis=1) < spamfeats.sum(axis=1)).reshape(-1).T
        
    # extract features from test data
    if features == 'lsa':
        feats = svd.transform(fe.transform(test['data']))
    else:
        feats = fe.transform(test['data'])
    # use trained classifier to generate class predictions from test features
    if classifier == 'gnb':
        hyp = clf.predict(feats.toarray())
    elif classifier == 'rule':
        pass
    else:
        hyp = clf.predict(feats)

    # compare predictions with test labels
    score = np.mean(hyp == test['labels'])

    return score
开发者ID:cilsat,项目名称:spam-nlp,代码行数:59,代码来源:preprocess.py

示例8: NaiveBayesClassifier

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
class NaiveBayesClassifier(CrossDomainClassifier):
    """
    Naive bayes classifier with tfidf
    """

    def train(self, limit_data=None):
        if not hasattr(self, 'reviews'):
            print "No data loaded"
            return

        if limit_data is None:
            limit_data = len(self.reviews)

        X = self.get_bag_of_ngrams(self.reviews[:limit_data])
        self.clf = MultinomialNB().fit(X, self.labels[:limit_data])

    def __test(self, reviews, labels):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.clf.predict(X_training_tfidf)
        self.cm = confusion_matrix(labels, predicted)

        return 1 - np.mean(predicted == labels)

    def get_training_error(self):
        return self.__test(self.reviews, self.labels)

    def get_generalized_error(self):
        return self.__test(self.test_reviews, self.test_labels)

    def get_crossdomain_error(self):
        return {'twitter': self.__test(self.twitter_items, self.twitter_labels),
                'ebay': self.__test(self.ebay_items, self.ebay_labels)}

    def __get_scores(self, reviews, labels):
        X_training_counts = self.count_vect.transform(reviews)
        X_training_tfidf = self.tfidf_transformer.transform(X_training_counts)

        predicted = self.clf.predict(X_training_tfidf)
        self.cm = confusion_matrix(labels, predicted)

        return precision_recall_fscore_support(labels, predicted, average='macro')

    def get_scores_training(self):
        return self.__get_scores(self.reviews, self.labels)

    def get_scores_test(self):
        return self.__get_scores(self.test_reviews, self.test_labels)

    def get_scores_twitter(self):
        return self.__get_scores(self.twitter_items, self.twitter_labels)

    def get_scores_ebay(self):
        return self.__get_scores(self.ebay_items, self.ebay_labels)
开发者ID:lukedeo,项目名称:cross-domain,代码行数:57,代码来源:classifier.py

示例9: train_and_test

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def train_and_test(X_train, y_train, X_test, y_test):
    
    print("Vectorizing features")
    vectorizer = TfidfVectorizer(sublinear_tf=True, max_df=0.5)
    X_train = vectorizer.fit_transform(X_train)
    X_test = vectorizer.transform(X_test)
    
    print("\nn_samples: %d, n_features: %d" % X_train.shape)
    
    
    print("\nTrain Set Counts:")
    counts = collections.Counter(y_train)
    print("\nTotal: ", len(y_train))
    print(counts)
    
    print("\nTest Set Counts:")
    counts = collections.Counter(y_test)
    print("\nTotal: ", len(y_test))
    print(counts)
    
    print("\nTraining and Testing")
	
    print("\n=====MULTINOMIAL NAIVE BAYES=====")
    t0 = time()
    classifier = MultinomialNB(alpha=.01)
    classifier.fit(X_train, y_train)
    predicted = classifier.predict(X_test)
		
    acc_mnb, f1_mnb = print_metrics(y_test, predicted)
    
    
    print("\n========SVM========")
    t0 = time()
    classifier = svm.LinearSVC(multi_class='ovr')
    classifier.fit(X_train, y_train)
    predicted = classifier.predict(X_test)
	
    acc_svm, f1_svm = print_metrics(y_test, predicted)


   
    print("\n=====RANDOM FOREST=====")
    t0 = time()
    classifier = RandomForestClassifier(n_estimators=400, n_jobs=10)
    classifier.fit(X_train, y_train)
    predicted = classifier.predict(X_test)
    
    acc_rf, f1_rf = print_metrics(y_test, predicted)
    

    return (acc_mnb, f1_mnb, acc_svm, f1_svm, acc_rf, f1_rf)
开发者ID:laineyzoo,项目名称:protein_annotation,代码行数:53,代码来源:cluster_dataset.py

示例10: bayes_bench

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def bayes_bench():
    data_file = "./data/dataset.pkl"
    train_set, valid_set, test_set, word2id, pop2id, type2id = dataset.load_data(data_file)

    train_set_x, train_set_y = train_set
    train_set_pop_y, train_set_type_y, train_set_loc_y = train_set_y

    valid_set_x, valid_set_y = valid_set
    valid_set_pop_y, valid_set_type_y, valid_set_loc_y = valid_set_y
    
    test_set_x, test_set_y = test_set
    test_set_pop_y, test_set_type_y, test_set_loc_y = test_set_y
    
    id2word = {v:k for k,v in word2id.items()}
    word_train_set_x = [sen_dig2word(doc, id2word) for doc in train_set_x]
    word_valid_set_x = [sen_dig2word(doc, id2word) for doc in valid_set_x]
    word_test_set_x = [sen_dig2word(doc, id2word) for doc in test_set_x]
    
    # construct the word count matrix
    
    # construct the word count matrix
    count_vect = CountVectorizer()
    x_train_count = count_vect.fit_transform(word_train_set_x)
    x_valid_count = count_vect.transform(word_valid_set_x)
    x_test_count = count_vect.transform(word_test_set_x)

    tfidf_transformer = TfidfTransformer()
    x_train_tfidf = tfidf_transformer.fit_transform(x_train_count)
    x_valid_tfidf = tfidf_transformer.transform(x_valid_count)
    x_test_tfidf = tfidf_transformer.transform(x_test_count)

    # train the pop model
    pop_clf = MultinomialNB().fit(x_train_tfidf, train_set_pop_y)
    pop_pred = pop_clf.predict(x_valid_tfidf)
    pop_pred_test = pop_clf.predict(x_test_tfidf)

    # compute the performance
    pop_errors = np.mean(np.not_equal(pop_pred, valid_set_pop_y))
    pop_errors_test = np.mean(np.not_equal(pop_pred_test, test_set_pop_y))

    # train the event type model
    type_clf = MultinomialNB().fit(x_train_tfidf, train_set_type_y)
    type_pred = type_clf.predict(x_valid_tfidf)
    type_pred_test = type_clf.predict(x_test_tfidf)

    # compute the performance
    type_errors = np.mean(np.not_equal(type_pred, valid_set_type_y))
    type_errors_test = np.mean(np.not_equal(type_pred_test, test_set_type_y))

    print "MB--> Type error: %0.2f, Popuation error: %0.2f" % (type_errors, pop_errors)
    print "MB--> Type error: %0.2f, Popuation error: %0.2f" % (type_errors_test, pop_errors_test)
开发者ID:Tskatom,项目名称:Protest_Event_Encoder,代码行数:53,代码来源:benchmark.py

示例11: test_mnnb

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def test_mnnb(kind):
    # Test Multinomial Naive Bayes classification.
    # This checks that MultinomialNB implements fit and predict and returns
    # correct values for a simple toy dataset.

    if kind == 'dense':
        X = X2
    elif kind == 'sparse':
        X = scipy.sparse.csr_matrix(X2)

    # Check the ability to predict the learning set.
    clf = MultinomialNB()
    assert_raises(ValueError, clf.fit, -X, y2)
    y_pred = clf.fit(X, y2).predict(X)

    assert_array_equal(y_pred, y2)

    # Verify that np.log(clf.predict_proba(X)) gives the same results as
    # clf.predict_log_proba(X)
    y_pred_proba = clf.predict_proba(X)
    y_pred_log_proba = clf.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba), y_pred_log_proba, 8)

    # Check that incremental fitting yields the same results
    clf2 = MultinomialNB()
    clf2.partial_fit(X[:2], y2[:2], classes=np.unique(y2))
    clf2.partial_fit(X[2:5], y2[2:5])
    clf2.partial_fit(X[5:], y2[5:])

    y_pred2 = clf2.predict(X)
    assert_array_equal(y_pred2, y2)

    y_pred_proba2 = clf2.predict_proba(X)
    y_pred_log_proba2 = clf2.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba2), y_pred_log_proba2, 8)
    assert_array_almost_equal(y_pred_proba2, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba2, y_pred_log_proba)

    # Partial fit on the whole data at once should be the same as fit too
    clf3 = MultinomialNB()
    clf3.partial_fit(X, y2, classes=np.unique(y2))

    y_pred3 = clf3.predict(X)
    assert_array_equal(y_pred3, y2)
    y_pred_proba3 = clf3.predict_proba(X)
    y_pred_log_proba3 = clf3.predict_log_proba(X)
    assert_array_almost_equal(np.log(y_pred_proba3), y_pred_log_proba3, 8)
    assert_array_almost_equal(y_pred_proba3, y_pred_proba)
    assert_array_almost_equal(y_pred_log_proba3, y_pred_log_proba)
开发者ID:AlexisMignon,项目名称:scikit-learn,代码行数:51,代码来源:test_naive_bayes.py

示例12: main

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def main():
	# extract reviews from tsv files
	labeled_training_data = pd.read_csv("labeledTrainData.tsv", header=0, delimiter="\t", quoting=3) # 25,000 reviews
	test_data = pd.read_csv("testData.tsv", header=0, delimiter="\t", quoting=3) # 25, 000 reviews


	print "Creating BOW...."" "		
	vectorizer = CountVectorizer(analyzer = "word", tokenizer = None, preprocessor = None, stop_words = None, max_features = 5000) 
	trained_data_features  = vectorizer.fit_transform(review_list)
	trained_data_features = trained_data_features.toarray() # convert to numpy array for faster processing

	print "Supervised Learning - Naive Bayes"
	nb_model = MultinomialNB(alpha = 0.01)
	nb_model = nb_model.fit(trained_data_features, labeled_training_data["sentiment"]) # using BOW as feaures and the given labels as repsonse variables

	print "---------------------------------"
	print " "
	print "Predicting on test data: "

	# BOW for test set
	test_data_features = vectorizer.transform(test_review_list)
	test_data_features = test_data_features.toarray()

	# use the trained forest to make predictions
	predictions = nb_model.predict(test_data_features)

	# prepare output submission file
	prediction_output = pd.DataFrame( data = {"id":test_data["id"], "sentiment":predictions} ) # create pandas dataframe
	prediction_output.to_csv("BOW_NB.csv", index=False, quoting=3)# write to csv file
	joblib.dump(vectorizer, 'bow_model.pkl')
	joblib.dump(nb_model, 'nb_bow_model.pkl')  
开发者ID:amit-rakesh,项目名称:Sentiment-Analysis-Movie-Reviews,代码行数:33,代码来源:main_bow_nb.py

示例13: classify_reviews

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def classify_reviews():
	import featurizer
	import gen_training_data
	import numpy as np
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.linear_model import SGDClassifier

	data = gen_training_data.gen_data();
	stemmed_data = featurizer.stem(data);
	tfidf= featurizer.tfidf(data);
	clf = MultinomialNB().fit(tfidf['train_tfidf'], data['training_labels']);
	predicted = clf.predict(tfidf['test_tfidf']);
	num_wrong = 0;
	tot = 0;
	for expected, guessed in zip(data['testing_labels'], predicted):
		if(expected-guessed != 0):	
			num_wrong += 1;

	print("num_wrong: %d",num_wrong)

	sgd_clf = SGDClassifier(loss='hinge', penalty='l2', alpha=1e-3, n_iter=5, random_state=42);
	_ = sgd_clf.fit(tfidf['train_tfidf'], data['training_labels']);
	sgd_pred = sgd_clf.predict(tfidf['test_tfidf']);
	print np.mean(sgd_pred == data['testing_labels']);

	stem_tfidf = featurizer.tfidf(stemmed_data);
	_ = sgd_clf.fit(stem_tfidf['train_tfidf'], data['training_labels']);
	sgd_stem_prd = sgd_clf.predict(stem_tfidf['test_tfidf']);
	print np.mean(sgd_stem_prd==data['testing_labels']);
开发者ID:JT17,项目名称:445Project,代码行数:31,代码来源:classifier.py

示例14: main

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def main(clf):
    #print 'getting train'
    train = pd.read_csv('dat/trainMN.tsv',sep = '\t')
    #print 'getting test'
    test = pd.read_csv('dat/devMN.tsv', sep = '\t')

    global all_words
    all_words = word_to_set(train['Phrase'], trim=20, is_raw=True)

    #print 'creating x dict vectors from train'
    train_x = train['Phrase']
    #print 'extracting...'
    train_x = use_feature_dicts(train_x)
    # print train_x

    #print 'creating train y'
    train_y = [int(y) for y in train['Sentiment']]
    if clf == 'NB':
        classifier = MultinomialNB().fit(train_x, train_y)
    elif clf == 'RF':
        classifier = RandomForestClassifier().fit(train_x, train_y)
    elif clf == 'LG':
        classifier = linear_model.LinearRegression()
        classifier = classifier.fit(train_x, train_y)
    elif clf == 'SGD':
        classifier = SGDClassifier().fit(train_x, train_y)
    #print 'testing'
    test_x = use_feature_dicts(test['Phrase'])
    
    for i in classifier.predict(test_x):
        print i
    title = clf + '.pickle'
    pickle.dump(classifier, open(title, 'w'))
开发者ID:meera1hahn,项目名称:cs571,代码行数:35,代码来源:sentimentNB.py

示例15: naive_bayes

# 需要导入模块: from sklearn.naive_bayes import MultinomialNB [as 别名]
# 或者: from sklearn.naive_bayes.MultinomialNB import predict [as 别名]
def naive_bayes():
    nb = MultinomialNB()
    nb.fit(X_train, train_data.danger)
    nb_pred = nb.predict(X_test)
    nb_score = nb.score(X_test, y_test)
    precision, recall, _, _ = precision_recall_fscore_support(y_test, nb_pred)
    return precision, recall, str(nb_score)
开发者ID:ilyaaltshteyn,项目名称:danger_tweets,代码行数:9,代码来源:classify4.py


注:本文中的sklearn.naive_bayes.MultinomialNB.predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。