本文整理汇总了Python中text.classifiers.NaiveBayesClassifier.classify方法的典型用法代码示例。如果您正苦于以下问题:Python NaiveBayesClassifier.classify方法的具体用法?Python NaiveBayesClassifier.classify怎么用?Python NaiveBayesClassifier.classify使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类text.classifiers.NaiveBayesClassifier
的用法示例。
在下文中一共展示了NaiveBayesClassifier.classify方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_Textblog
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
def test_Textblog():
train = [
('I love this sandwich.', 'pos'),
('This is an amazing place!', 'pos'),
('I feel very good about these beers.', 'pos'),
('This is my best work.', 'pos'),
("What an awesome view", 'pos'),
('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
cl = NaiveBayesClassifier(train)
#print cl.classify("Their burgers are amazing") # "pos"
#print cl.classify("I don't like their pizza.") # "neg"
import nltk
new_train = []
for item in train:
token_sent = nltk.word_tokenize(item[0])
item = list(item)
item[0] = token_sent
item[1] = item[1]
item = tuple(item)
new_train.append(item)
print new_train
cl = NaiveBayesClassifier(new_train)
new_test = nltk.word_tokenize("I don't like their pizza.")
print new_test, cl.classify(new_test)
示例2: NaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
cl = NaiveBayesClassifier(train)
# Classify some text
print(cl.classify("Their burgers are amazing.")) # "pos"
print(cl.classify("I don't like their pizza.")) # "neg"
# Classify a TextBlob
blob = TextBlob("The beer was amazing. But the hangover was horrible. "
"My boss was not pleased.", classifier=cl)
print(blob)
print(blob.classify())
for sentence in blob.sentences:
print(sentence)
print(sentence.classify())
# Compute accuracy
print("Accuracy: {0}".format(cl.accuracy(test)))
示例3: NaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
print 'initial training going on....'
cl = NaiveBayesClassifier(train)
print 'initial training done.'
# Grab some movie review data
print 'now gathering reviews...'
reviews = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train = reviews[0:200]
print 'reviews gathered.'
# Update the classifier with the new training data
print 'now training using the new data...'
cl.update(new_train)
print 'trained and ready!'
print cl.classify("I hated the movie and hated the food")
# Compute accuracy
accuracy = cl.accuracy(test)
print("Accuracy: {0}".format(accuracy))
示例4: range
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
#dev - years
inputfile = codecs.open("years-dev.txt", 'r', 'utf-8')
dev_train = inputfile.readlines()
inputfile.close()
#dev - content
inputfile = codecs.open("contents-dev.txt", 'r', 'utf-8')
contents_dev = inputfile.readlines()
inputfile.close()
#training set
train_set = []
g = range(0, 4000, 2)
for i in g:
train_set.append((contents_train[i], years_train[i/2]))
print "tu się robi"
cl = NaiveBayesClassifier(train_set)
print "a tu się zrobiło"
outputfile = open("classified.txt", "w")
g = range(0, len(contents_dev), 2)
for i in g:
result = cl.classify(contents_dev[i])
print i
outputfile.write(str(result))
print "zmieliło"
outputfile.close()
示例5: test_init_with_json_file
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
def test_init_with_json_file(self):
cl = NaiveBayesClassifier(JSON_FILE, format="json")
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
示例6: test_init_with_csv_file_without_format_specifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
def test_init_with_csv_file_without_format_specifier(self):
cl = NaiveBayesClassifier(CSV_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
示例7: TextBlob
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
msg = TextBlob(tabsep[1])
try:
words=msg.words
except:
continue
for word in words:
if word not in stopwords.words() and not word.isdigit():
list_tuples.append((word.lower(),tabsep[0]))
c+=1
if c==500:
break
return list_tuples
print 'importing data...'
a = time.time()
entire_data = get_list_tuples("/home/anish/Documents/DataSci/DataSets/sms/SMSSpamCollection")
print "It took "+str(time.time()-a)+" seconds to import data"
print 'data imported'
random.seed(1)
random.shuffle(entire_data)
train = entire_data[:250]
test = entire_data[251:500]
print 'training data'
a = time.time()
cl = NaiveBayesClassifier(train)
print "It took "+str(time.time()-a)+" seconds to train data"
print 'data trained, now checking accuracy:'
accuracy = cl.accuracy(test)
print "accuracy: "+str(accuracy)
print cl.classify("Hey bud, what's up") #ham
print cl.classify("Get a brand new mobile phone by being an agent of The Mob! Plus loads more goodies! For more info just text MAT to 87021") #spam
示例8: test_init_with_tsv_file
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
def test_init_with_tsv_file(self):
cl = NaiveBayesClassifier(TSV_FILE)
assert_equal(cl.classify("I feel happy this morning"), "pos")
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
示例9: NaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
from text.classifiers import NaiveBayesClassifier
train = [
('I love this sandwich.', 'pos'),
('This is an amazing place!', 'pos'),
('I feel very good about these beers.', 'pos'),
('This is my best work.', 'pos'),
("What an awesome view", 'pos'),
('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
print '> cl = NaiveBayesClassifier(train)'
cl = NaiveBayesClassifier(train)
print '> cl.classify("Their burgers are amazing")'
print cl.classify("Their burgers are amazing")
print '> cl.classify("I don\'t like their pizza.")'
print cl.classify("I don't like their pizza.")
示例10: TestNaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
class TestNaiveBayesClassifier(unittest.TestCase):
def setUp(self):
self.classifier = NaiveBayesClassifier(train_set)
def test_basic_extractor(self):
text = "I feel happy this morning."
feats = basic_extractor(text, train_set)
assert_true(feats["contains(feel)"])
assert_true(feats['contains(morning)'])
assert_false(feats["contains(amazing)"])
def test_default_extractor(self):
text = "I feel happy this morning."
assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))
def test_classify(self):
res = self.classifier.classify("I feel happy this morning")
assert_equal(res, 'positive')
assert_equal(len(self.classifier.train_set), len(train_set))
def test_classify_a_list_of_words(self):
res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
assert_equal(res, "positive")
def test_train_from_lists_of_words(self):
# classifier can be trained on lists of words instead of strings
train = [(doc.split(), label) for doc, label in train_set]
classifier = NaiveBayesClassifier(train)
assert_equal(classifier.accuracy(test_set),
self.classifier.accuracy(test_set))
def test_prob_classify(self):
res = self.classifier.prob_classify("I feel happy this morning")
assert_equal(res.max(), "positive")
assert_true(res.prob("positive") > res.prob("negative"))
def test_accuracy(self):
acc = self.classifier.accuracy(test_set)
assert_true(isinstance(acc, float))
def test_update(self):
res1 = self.classifier.prob_classify("lorem ipsum")
original_length = len(self.classifier.train_set)
self.classifier.update([("lorem ipsum", "positive")])
new_length = len(self.classifier.train_set)
res2 = self.classifier.prob_classify("lorem ipsum")
assert_true(res2.prob("positive") > res1.prob("positive"))
assert_equal(original_length + 1, new_length)
def test_labels(self):
labels = self.classifier.labels()
assert_true("positive" in labels)
assert_true("negative" in labels)
def test_show_informative_features(self):
feats = self.classifier.show_informative_features()
def test_informative_features(self):
feats = self.classifier.informative_features(3)
assert_true(isinstance(feats, list))
assert_true(isinstance(feats[0], tuple))
def test_custom_feature_extractor(self):
cl = NaiveBayesClassifier(train_set, custom_extractor)
cl.classify("Yay! I'm so happy it works.")
assert_equal(cl.train_features[0][1], 'positive')
def test_init_with_csv_file(self):
cl = NaiveBayesClassifier(CSV_FILE, format="csv")
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_csv_file_without_format_specifier(self):
cl = NaiveBayesClassifier(CSV_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_json_file(self):
cl = NaiveBayesClassifier(JSON_FILE, format="json")
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_json_file_without_format_specifier(self):
cl = NaiveBayesClassifier(JSON_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_accuracy_on_a_csv_file(self):
a = self.classifier.accuracy(CSV_FILE)
assert_true(isinstance(a, float))
def test_accuracy_on_json_file(self):
a = self.classifier.accuracy(JSON_FILE)
assert_true(isinstance(a, float))
#.........这里部分代码省略.........
示例11: open
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
english_links = open("english_links.txt", "w")
spanish_links = open("spanish_links.txt", "w")
for link in classes:
r = requests.get(link)
html = lxml.html.fromstring(r.text)
obj = html.xpath('//div[@class="postingBody"]')
post_body = [elem.text_content() for elem in obj]
if post_body != []:
text = post_body[0]
try:
text = text.encode("ascii", "ignore")
text = text.replace("\t", "")
text = text.replace("\n", "")
text = text.replace("\r", "")
except UnicodeDecodeError:
continue
if cl.classify(text) == "english":
english_links.write("link= " + link + "\n\n")
english_links.write("description= " + text + "\n\n")
elif cl.classify(text) == "spanish":
spanish_links.write("link= " + link + "\n\n")
spanish_links.write("description= " + text + "\n\n")
else:
continue
english_links.close()
spanish_links.close()
示例12: range
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
for j in range(sheet1.nrows):
line1 = sheet1.cell_value(j,1)
line1 = re.sub('[\-*>]', '', line1)
line1 = re.sub('[\n]', '', line1)
line2 = sheet1.cell_value(j,2)
stored = [(line1, line2)]
train = train + stored
print "Training algo....\n"
cl = NaiveBayesClassifier(train)
book = open_workbook('C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_Test.xls')
sheet = book.sheet_by_index(0)
book1 = xlwt.Workbook()
sh = book1.add_sheet("sheet")
print "Classifying..........."
for j in range(sheet.nrows):
id = sheet.cell_value(j,0)
line = sheet.cell_value(j,1)
line = re.sub('[-*>]', '', line)
line = re.sub('[\n]', '', line)
a = cl.classify(line)
sh.write(j, 0, id)
sh.write(j, 1, a)
book1.save("C:/Documents and Settings/rojin.varghese/Desktop/LargeTest/One_Category_result_new.xls")
示例13: TestNaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
class TestNaiveBayesClassifier(unittest.TestCase):
def setUp(self):
self.train_set = [
('I love this car', 'positive'),
('This view is amazing', 'positive'),
('I feel great this morning', 'positive'),
('I am so excited about the concert', 'positive'),
('He is my best friend', 'positive'),
('I do not like this car', 'negative'),
('This view is horrible', 'negative'),
('I feel tired this morning', 'negative'),
('I am not looking forward to the concert', 'negative'),
('He is my enemy', 'negative')
]
self.classifier = NaiveBayesClassifier(self.train_set)
self.test_set = [('I feel happy this morning', 'positive'),
('Larry is my friend.', 'positive'),
('I do not like that man.', 'negative'),
('My house is not great.', 'negative'),
('Your song is annoying.', 'negative')]
def test_basic_extractor(self):
text = "I feel happy this morning."
feats = basic_extractor(text, self.train_set)
assert_true(feats["contains(feel)"])
assert_true(feats['contains(morning)'])
assert_false(feats["contains(amazing)"])
def test_default_extractor(self):
text = "I feel happy this morning."
assert_equal(self.classifier.extract_features(text), basic_extractor(text, self.train_set))
def test_classify(self):
res = self.classifier.classify("I feel happy this morning")
assert_equal(res, 'positive')
assert_equal(len(self.classifier.train_set), len(self.train_set))
def test_prob_classify(self):
res = self.classifier.prob_classify("I feel happy this morning")
assert_equal(res.max(), "positive")
assert_true(res.prob("positive") > res.prob("negative"))
def test_accuracy(self):
acc = self.classifier.accuracy(self.test_set)
assert_true(isinstance(acc, float))
def test_update(self):
res1 = self.classifier.prob_classify("lorem ipsum")
original_length = len(self.classifier.train_set)
self.classifier.update([("lorem ipsum", "positive")])
new_length = len(self.classifier.train_set)
res2 = self.classifier.prob_classify("lorem ipsum")
assert_true(res2.prob("positive") > res1.prob("positive"))
assert_equal(original_length + 1, new_length)
def test_show_informative_features(self):
feats = self.classifier.show_informative_features()
def test_informative_features(self):
feats = self.classifier.informative_features(3)
assert_true(isinstance(feats, list))
assert_true(isinstance(feats[0], tuple))
def test_custom_feature_extractor(self):
cl = NaiveBayesClassifier(self.train_set, custom_extractor)
cl.classify("Yay! I'm so happy it works.")
assert_equal(cl.train_features[0][1], 'positive')
示例14: NaiveBayesClassifier
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
("microbe mammal origins evolution life forms. Explore biology genetics evolution", 'Science'),
('art news exhibitions events artists galleries museums editions books mapping the art.', 'Art'),
('art daily art Museums Exhibits Artists Milestones Digital Art Architecture', 'Art'),
("exhibitions interesting random weirdness photography painting prints design sculpture.", 'Art'),
('artists galleries museums and auction houses movies documentary.', 'Art'),
('Medicine, Health, Drugs, drugs fitness nutrition health care mental health drugs diet pregnancy babies cancer AIDS allergies & asthma.', 'Health'),
('Drugs supplements living healthy family pregnancy, energizing moves recipes losing weight feeling great.', 'Health'),
('Weight Loss & Diet Plans Food & Recipes Fitness & Exercise Beauty Balance & Love Sex & Relationships Oral Care yoga Aging Well.', 'Health'),
('Conceive Parenting Newborn & Baby Children Vaccines Raising Fit Kids Pets.', 'Health')
]
## CREATING THE CLASSIFIER ##
cl = NaiveBayesClassifier(train)
for articles in db_collection_tweets.find({'content': {'$exists': True}}):
#print articles['full_url']
category = cl.classify(articles['content'])
db_collection_tweets.update({ '_id' : articles['_id'] }, { '$set' : { 'Category': category} } )
## DISTRIBUTION OF THE CATEGORIES IN THE SAMPLE ##
# Listing all the categories
list_cat = []
for articles in db_collection_tweets.find({'Category': {'$exists' : True}}):
list_cat.append(articles['Category'])
# Counting the number of occurences of each category
cat_dict = {}
for (k,v) in Counter(list_cat).iteritems():
cat_dict[k] = v
'''
示例15: test_custom_feature_extractor
# 需要导入模块: from text.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from text.classifiers.NaiveBayesClassifier import classify [as 别名]
def test_custom_feature_extractor(self):
cl = NaiveBayesClassifier(self.train_set, custom_extractor)
cl.classify("Yay! I'm so happy it works.")
assert_equal(cl.train_features[0][1], 'positive')