Python classifiers.NaiveBayesClassifier類代碼示例

本文整理匯總了Python中text.classifiers.NaiveBayesClassifier類的典型用法代碼示例。如果您正苦於以下問題：Python NaiveBayesClassifier類的具體用法？Python NaiveBayesClassifier怎麽用？Python NaiveBayesClassifier使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

在下文中一共展示了NaiveBayesClassifier類的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: nb

def nb(data):
  # check out params
  
  # divide data into 4 = 3 + 1, 3 for train, 1 for test
  train = data[0: (len(data) / 4) * 3]
  test = data[(len(data) / 4) * 3:]
  
  print "Training ..."
  classifier = NaiveBayesClassifier(train)
  print "Testing ..."
  print "Accuracy: ", classifier.accuracy(test)
  
  """

開發者ID:csrgxtu，項目名稱:maxent，代碼行數:13，代碼來源:NBTextBlob.py

示例2: test_Textblog

def test_Textblog():
    train = [
        ('I love this sandwich.', 'pos'),
        ('This is an amazing place!', 'pos'),
        ('I feel very good about these beers.', 'pos'),
        ('This is my best work.', 'pos'),
        ("What an awesome view", 'pos'),
        ('I do not like this restaurant', 'neg'),
        ('I am tired of this stuff.', 'neg'),
        ("I can't deal with this", 'neg'),
        ('He is my sworn enemy!', 'neg'),
        ('My boss is horrible.', 'neg')
    ]
    test = [
        ('The beer was good.', 'pos'),
        ('I do not enjoy my job', 'neg'),
        ("I ain't feeling dandy today.", 'neg'),
        ("I feel amazing!", 'pos'),
        ('Gary is a friend of mine.', 'pos'),
        ("I can't believe I'm doing this.", 'neg')
    ]
    cl = NaiveBayesClassifier(train)
    #print cl.classify("Their burgers are amazing")  # "pos"
    #print cl.classify("I don't like their pizza.")  # "neg"
    import nltk
    new_train = []
    for item in train:
        token_sent = nltk.word_tokenize(item[0])

        item = list(item)
        item[0] = token_sent
        item[1] = item[1]
        item = tuple(item)
        new_train.append(item)

    print new_train
    cl = NaiveBayesClassifier(new_train)
    new_test = nltk.word_tokenize("I don't like their pizza.")
    print new_test, cl.classify(new_test)

開發者ID:LiuyinC，項目名稱:MDLab，代碼行數:39，代碼來源:test.py

示例3: setUp

 def setUp(self):
     self.train_set =  [
           ('I love this car', 'positive'),
           ('This view is amazing', 'positive'),
           ('I feel great this morning', 'positive'),
           ('I am so excited about the concert', 'positive'),
           ('He is my best friend', 'positive'),
           ('I do not like this car', 'negative'),
           ('This view is horrible', 'negative'),
           ('I feel tired this morning', 'negative'),
           ('I am not looking forward to the concert', 'negative'),
           ('He is my enemy', 'negative')
     ]
     self.classifier = NaiveBayesClassifier(self.train_set)
     self.test_set = [('I feel happy this morning', 'positive'),
                     ('Larry is my friend.', 'positive'),
                     ('I do not like that man.', 'negative'),
                     ('My house is not great.', 'negative'),
                     ('Your song is annoying.', 'negative')]

開發者ID:robertlayton，項目名稱:TextBlob，代碼行數:19，代碼來源:test_classifiers.py

示例4: NaiveBayesClassifier

('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
print 'initial training going on....'
cl = NaiveBayesClassifier(train)
print 'initial training done.'
# Grab some movie review data
print 'now gathering reviews...'
reviews = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train = reviews[0:200]
print 'reviews gathered.'
# Update the classifier with the new training data
print 'now training using the new data...'
cl.update(new_train)
print 'trained and ready!'
print cl.classify("I hated the movie and hated the food")
# Compute accuracy

開發者ID:anishmashankar，項目名稱:experiments，代碼行數:31，代碼來源:sentana.py

示例5: test_init_with_json_file

 def test_init_with_json_file(self):
     cl = NaiveBayesClassifier(JSON_FILE, format="json")
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

開發者ID:allenwade3，項目名稱:TextBlob，代碼行數:5，代碼來源:test_classifiers.py

示例6: range

#dev - years
inputfile = codecs.open("years-dev.txt", 'r', 'utf-8')
dev_train = inputfile.readlines()
inputfile.close()

#dev - content
inputfile = codecs.open("contents-dev.txt", 'r', 'utf-8')
contents_dev = inputfile.readlines()
inputfile.close()

#training set
train_set = []
g = range(0, 4000, 2)
for i in g:
	train_set.append((contents_train[i], years_train[i/2]))


print "tu się robi"	
cl = NaiveBayesClassifier(train_set)
print "a tu się zrobiło"
outputfile = open("classified.txt", "w")
g = range(0, len(contents_dev), 2)
for i in g:
	result = cl.classify(contents_dev[i])
	print i
	outputfile.write(str(result))
print "zmieliło"
outputfile.close()

開發者ID:hllk，項目名稱:ISIZgadujemyDaty，代碼行數:28，代碼來源:naive.py

示例7: TextBlob

			msg = TextBlob(tabsep[1])
			try:
				words=msg.words
			except:
				continue
			for word in words:
				if word not in stopwords.words() and not word.isdigit():
					list_tuples.append((word.lower(),tabsep[0]))
			c+=1
			if c==500:
				break
	return list_tuples
print 'importing data...'
a = time.time()
entire_data = get_list_tuples("/home/anish/Documents/DataSci/DataSets/sms/SMSSpamCollection")
print "It took "+str(time.time()-a)+" seconds to import data"
print 'data imported'
random.seed(1)
random.shuffle(entire_data)
train = entire_data[:250]
test = entire_data[251:500]
print 'training data'
a = time.time()
cl = NaiveBayesClassifier(train)
print "It took "+str(time.time()-a)+" seconds to train data"
print 'data trained, now checking accuracy:'
accuracy = cl.accuracy(test)
print "accuracy: "+str(accuracy)
print cl.classify("Hey bud, what's up") #ham
print cl.classify("Get a brand new mobile phone by being an agent of The Mob! Plus loads more goodies! For more info just text MAT to 87021") #spam

開發者ID:anishmashankar，項目名稱:experiments，代碼行數:30，代碼來源:spamvsham.py

示例8: test_init_with_csv_file_without_format_specifier

 def test_init_with_csv_file_without_format_specifier(self):
     cl = NaiveBayesClassifier(CSV_FILE)
     assert_equal(cl.classify("I feel happy this morning"), 'pos')
     training_sentence = cl.train_set[0][0]
     assert_true(isinstance(training_sentence, unicode))

開發者ID:allenwade3，項目名稱:TextBlob，代碼行數:5，代碼來源:test_classifiers.py

示例9: test_custom_feature_extractor

 def test_custom_feature_extractor(self):
     cl = NaiveBayesClassifier(self.train_set, custom_extractor)
     cl.classify("Yay! I'm so happy it works.")
     assert_equal(cl.train_features[0][1], 'positive')

開發者ID:robertlayton，項目名稱:TextBlob，代碼行數:4，代碼來源:test_classifiers.py

示例10: open_workbook

train = []

book = open_workbook('C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_Train.xls')
sheet1 = book.sheet_by_index(0)
print "Training.............\n"
for j in range(sheet1.nrows):
      line1 = sheet1.cell_value(j,1)
      line1 = re.sub('[\-*>]', '', line1)
      line1 = re.sub('[\n]', '', line1)
      line2 = sheet1.cell_value(j,2)
      stored = [(line1, line2)]
      train = train + stored

print  "Training algo....\n"
cl = NaiveBayesClassifier(train)

book = open_workbook('C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_Test.xls')
sheet = book.sheet_by_index(0)

book1 = xlwt.Workbook()
sh = book1.add_sheet("sheet")

print "Classifying..........."

for j in range(sheet.nrows):
    id = sheet.cell_value(j,0)
    line = sheet.cell_value(j,1)
    line = re.sub('[-*>]', '', line)
    line = re.sub('[\n]', '', line)
    a = cl.classify(line)

開發者ID:rojinva，項目名稱:Email-classifier，代碼行數:30，代碼來源:Text+classification+using+text+blob.py

示例11: test_train_from_lists_of_words

 def test_train_from_lists_of_words(self):
     # classifier can be trained on lists of words instead of strings
     train = [(doc.split(), label) for doc, label in train_set]
     classifier = NaiveBayesClassifier(train)
     assert_equal(classifier.accuracy(test_set),
                     self.classifier.accuracy(test_set))

開發者ID:shidao-fm，項目名稱:TextBlob，代碼行數:6，代碼來源:test_classifiers.py

示例12: open

infile = "data/yelp_academic_dataset_review.json"

# read the first 1000 reviews
i = 0
fin = open(infile, 'r')
data = []
for line in fin:
    review = json.loads(line)
    data.append((review['text'], float(review['stars'])))
    if i == 1000:
        break
    i += 1
fin.close()

k = 500
training_set, test_set = data[:k], data[k:]
print "building classifier"
cl = NaiveBayesClassifier(training_set)
print "built classifier"

# Compute accuracy
print "computing accuracy"
print("Accuracy: {0}".format(cl.accuracy(test_set)))
print "computed accuracy"
 
# Show 5 most informative features
print "showing features"
cl.show_informative_features(5)
print "done :)"

開發者ID:GayathriSrinivas，項目名稱:cmpe239_project，代碼行數:29，代碼來源:classify.py

示例13: TestNaiveBayesClassifier

class TestNaiveBayesClassifier(unittest.TestCase):

    def setUp(self):
        self.classifier = NaiveBayesClassifier(train_set)

    def test_basic_extractor(self):
        text = "I feel happy this morning."
        feats = basic_extractor(text, train_set)
        assert_true(feats["contains(feel)"])
        assert_true(feats['contains(morning)'])
        assert_false(feats["contains(amazing)"])

    def test_default_extractor(self):
        text = "I feel happy this morning."
        assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))

    def test_classify(self):
        res = self.classifier.classify("I feel happy this morning")
        assert_equal(res, 'positive')
        assert_equal(len(self.classifier.train_set), len(train_set))

    def test_classify_a_list_of_words(self):
        res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
        assert_equal(res, "positive")

    def test_train_from_lists_of_words(self):
        # classifier can be trained on lists of words instead of strings
        train = [(doc.split(), label) for doc, label in train_set]
        classifier = NaiveBayesClassifier(train)
        assert_equal(classifier.accuracy(test_set),
                        self.classifier.accuracy(test_set))

    def test_prob_classify(self):
        res = self.classifier.prob_classify("I feel happy this morning")
        assert_equal(res.max(), "positive")
        assert_true(res.prob("positive") > res.prob("negative"))

    def test_accuracy(self):
        acc = self.classifier.accuracy(test_set)
        assert_true(isinstance(acc, float))

    def test_update(self):
        res1 = self.classifier.prob_classify("lorem ipsum")
        original_length = len(self.classifier.train_set)
        self.classifier.update([("lorem ipsum", "positive")])
        new_length = len(self.classifier.train_set)
        res2 = self.classifier.prob_classify("lorem ipsum")
        assert_true(res2.prob("positive") > res1.prob("positive"))
        assert_equal(original_length + 1, new_length)

    def test_labels(self):
        labels = self.classifier.labels()
        assert_true("positive" in labels)
        assert_true("negative" in labels)

    def test_show_informative_features(self):
        feats = self.classifier.show_informative_features()

    def test_informative_features(self):
        feats = self.classifier.informative_features(3)
        assert_true(isinstance(feats, list))
        assert_true(isinstance(feats[0], tuple))

    def test_custom_feature_extractor(self):
        cl = NaiveBayesClassifier(train_set, custom_extractor)
        cl.classify("Yay! I'm so happy it works.")
        assert_equal(cl.train_features[0][1], 'positive')

    def test_init_with_csv_file(self):
        cl = NaiveBayesClassifier(CSV_FILE, format="csv")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_csv_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(CSV_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file(self):
        cl = NaiveBayesClassifier(JSON_FILE, format="json")
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_init_with_json_file_without_format_specifier(self):
        cl = NaiveBayesClassifier(JSON_FILE)
        assert_equal(cl.classify("I feel happy this morning"), 'pos')
        training_sentence = cl.train_set[0][0]
        assert_true(isinstance(training_sentence, unicode))

    def test_accuracy_on_a_csv_file(self):
        a = self.classifier.accuracy(CSV_FILE)
        assert_true(isinstance(a, float))

    def test_accuracy_on_json_file(self):
        a = self.classifier.accuracy(JSON_FILE)
        assert_true(isinstance(a, float))

#.........這裏部分代碼省略.........

開發者ID:shidao-fm，項目名稱:TextBlob，代碼行數:101，代碼來源:test_classifiers.py

示例14: setUp

 def setUp(self):
     self.classifier = NaiveBayesClassifier(train_set)

開發者ID:shidao-fm，項目名稱:TextBlob，代碼行數:2，代碼來源:test_classifiers.py

示例15: open

        train.append((val, "english"))

with open("spanish.txt", "r") as span:
    for ind, val in enumerate(span):
        try:
            val = val.encode("ascii", "ignore")
            val = val.replace("\t", "")
            val = val.replace("\n", "")
            val = val.replace("\r", "")
        except UnicodeDecodeError:
            continue

        train.append((val, "spanish"))


cl = NaiveBayesClassifier(train)

english_links = open("english_links.txt", "w")
spanish_links = open("spanish_links.txt", "w")

for link in classes:
    r = requests.get(link)
    html = lxml.html.fromstring(r.text)
    obj = html.xpath('//div[@class="postingBody"]')
    post_body = [elem.text_content() for elem in obj]
    if post_body != []:
        text = post_body[0]
    try:
        text = text.encode("ascii", "ignore")
        text = text.replace("\t", "")
        text = text.replace("\n", "")

開發者ID:EricSchles，項目名稱:grab_analyze，代碼行數:31，代碼來源:grab_classify.py

注：本文中的text.classifiers.NaiveBayesClassifier類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。