本文整理汇总了Python中textblob.classifiers.NaiveBayesClassifier.show_informative_features方法的典型用法代码示例。如果您正苦于以下问题:Python NaiveBayesClassifier.show_informative_features方法的具体用法?Python NaiveBayesClassifier.show_informative_features怎么用?Python NaiveBayesClassifier.show_informative_features使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类textblob.classifiers.NaiveBayesClassifier
的用法示例。
在下文中一共展示了NaiveBayesClassifier.show_informative_features方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: generateIntentionalityClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def generateIntentionalityClassifier():
db = dbClient()
training = db.training
cursor = training.find()
#Reducir la cantidad de registros
crs = list(cursor)
random.shuffle(crs)
# split into 90% training and 10% test sets
p = int(len(crs) * .01)
cr_test = crs[0:p]
print "Test", len(cr_test)
data = []
t = ""
for td in cr_test:
tgram = td["triGram"]
label = td["label"]
#print tgram
for tg in tgram:
d = '-'.join(tg)
t = t + " " + d
#print t
data.append((t, label))
t = ""
#print data
cl = NaiveBayesClassifier(data)
cl.show_informative_features(30)
path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/"
saveTrainedClassifier(path, cl, "my_classifier_v6.pickle")
return cl
示例2: generateClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def generateClassifier():
train = getIntentDataset()
cl = NaiveBayesClassifier(train)
cl.show_informative_features(5)
path = "/media/University/UniversityDisc/2-Master/MasterThesis/EjecucionTesis/Desarrollo/PythonProjects/QueryAnalyzer/Models/"
saveTrainedClassifier(path, cl, "intent_classifier_2.pickle")
示例3: create_sentiment
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def create_sentiment():
"""
Train sentiment model and save.
Input type: None
Output: Model as pickle
"""
random.seed(1)
test = [
("The dude presenting Unravel seems like one of the most genuine game developers Ive ever seen I really hope this game works out for him",'pos'),
("His hands are shaking Dude looks so stoked and scared at the same time",'pos'),
("Right I just felt like I was watching his dream come true It was nice The game looks very well done as well Good for him",'pos'),
("Seriously Unravel looks really good actually and honestly seeing him so happy about what hes made is contagious I want to see more of Unravel ",'pos'),
("He was so nervous shaking all over his voice quivering",'neg'),
("The game looked nice too very cute art style ",'pos'),
("You could tell he genuinely wanted to be there it looked like he was even shaking from the excitement I hope it works out for them aswell",'pos'),
("However following that up with the weird PvZ thing was odd To say the least",'neg'),
("Haha The game did look nice though Im definitely going to keep an eye on it I enjoy supporting such hopeful developers",'pos'),
("Very personable This looks like a buy for me As a dev in a other sector I appreciate this passion",'pos'),
("I want to give him a cookie",'pos'),
("Im getting a copy Im gonna support my indie devs",'pos'),
("The twitch leak was accurate It was like a play by play you start speaking French then switch to English",'neg'),
("yep exactly what i was thinking lol its important to note that the twitch leak never had them saying it was Dishonored 2 but that they were honored to be here very different",'neg'),
("Honored Im 100 sure that was intentional",'neg'),
("oh yea for sure but wasnt solid enough evidence imo to be like dishonored 2 confirmed just based off that",'neg'),
("The confirmation was who was talking not what they were talking about ",'neg'),
("How awkward is it for a pop singer to perform at a video game conference",'neg'),
("Oh god did they warn him that he will get zero reaction",'neg'),
("I really hope so",'pos'),
("Almost as bad as Aisha fucking up her dialogue constantly Shes doing alright though E3 is really becoming a mainstream media event Hollywood has nothing like this ComicCon is the only comparison and they dont dazzle it up like E3",'neg')
]
# Grab review data
reviews = [
(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)
]
random.shuffle(reviews)
# Divide into 10% train/test splits
new_train, new_test = reviews[:1900], reviews[1900:]
# Train the NB classifier on the train split
cl = NaiveBayesClassifier(new_train)
# Compute accuracy
accuracy = cl.accuracy(test + new_test)
print("Accuracy: {0}".format(accuracy))
# Show 5 most informative features
cl.show_informative_features(5)
# Save model for use in creating social model sentiment
with open('sentiment_clf_full.pkl', 'wb') as pk:
pickle.dump(cl, pk)
print 'done saving model'
示例4: __init__
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
class NaiveBayesAnalyzer:
cl = None
def __init__(self):
with open("training_data.json", "r") as f:
self.cl = NaiveBayesClassifier(f, format="json")
self.cl.show_informative_features(20)
def analyze(self, text):
return self.cl.classify(text)
示例5: LanguageDetector
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
class LanguageDetector(object):
def __init__(self, train=SAMPLE_TRAIN, feature_extractor=FeatureExtractors.last_word_extractor()):
self.train = train
self.classifier = NaiveBayesClassifier(self.train, feature_extractor)
def accuracy(self, test_set=SAMPLE_TEST):
return self.classifier.accuracy(test_set)
def show_features(self):
return self.classifier.show_informative_features(5)
示例6: main
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def main(argv=0):
nBObj = naiveBayes()
businessId = nBObj.deriveBusinessId('yelp_academic_dataset_business.json')
print len(businessId)
businessId = businessId[:10]
train = nBObj.getTrainData('yelp_academic_dataset_review.json',businessId)
print train
cl = NaiveBayesClassifier(train)
print cl.show_informative_features(20)
print "Opening the file..."
target = open("naiveBayesResult.txt", 'w')
for (sentence,rating) in nBObj.testSentences:
clOutput = nBObj.testSentence(sentence,cl)
strToWrite = str(rating) + "\t" + clOutput
target.write(strToWrite)
target.write("\n")
target.close()
nBObj.calcAccuracy()
示例7: create_sentiment_model
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def create_sentiment_model():
random.seed(1)
# Grab some movie review data
reviews = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
random.shuffle(reviews)
new_train, new_test = reviews[:1900], reviews[1900:]
cl = NaiveBayesClassifier(new_train)
# Compute accuracy
accuracy = cl.accuracy(new_test)
print("Accuracy: {0}".format(accuracy))
# Show 5 most informative features
print cl.show_informative_features(5)
with open('sentiment_clf_full.pkl', 'wb') as pk:
dill.dump(cl, pk)
print 'done saving model'
示例8: NaiveBayesClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
'what are you working on',
'what you making')
experience_utterances = [(x, 'experience') for x in experience_utterances]
environment_utterances = [(x, 'enivornment') for x in environment_utterances]
working_on_utterances = [(x, 'working') for x in working_on_utterances]
# FIXME: find better way to flatten lists together
training_set = []
training_set.extend(experience_utterances)
training_set.extend(environment_utterances)
training_set.extend(working_on_utterances)
classifier = NaiveBayesClassifier(training_set)
print(classifier.show_informative_features(), classifier.labels())
bogus_utterances = (
'if you going to use nltk u may want to check this out spacy .io',
'sup people? I see the weather\'s getting better over there, Ben.',
'i had the same problem your having so thats my i made my own.',
'try http, instead of https'
)
# TODO: Figure out how to make this stronger
dual_utterance = ('how long have you been coding and what IDE do you use',)
test_utterances = ('what are you making',
'hey that nyancat is cool, how do you get that?')
for t in test_utterances:
示例9: activeLearning
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def activeLearning(NAME, datapath, infile, iterations = 3, portion = 10):
logger = logging.getLogger('signature.activeLearning')
logger.info('Active learning model building')
#load data
review_file = open(infile,"r")
#convert to appropriate format
review_corpus = list()
for i, line in enumerate(review_file):
try:
#filter out non-ascii simbols
review = json.loads(line)
review_corpus.append([re.sub(r'[^\x00-\x7f]', r' ', review['text']), review['textFeatures']])
except:
logger.error(review['text'])
continue
review_file.close()
logger.info('Data converted - %d reviews'%len(review_corpus))
#Shuffle dataset
#random.seed(1)
random.shuffle(review_corpus)
try:
current_train = json.loads(open(datapath + '%s_current_train.json'%NAME,'r').read())
except:
current_train = list()
for t in current_train:
try:
review_corpus.remove(t[0])
except:
pass
logger.info("Len(current_train) = %d"%len(current_train))
'''
Prepare first portion
'''
if len(current_train) > 10:
#train model
cl = NaiveBayesClassifier(current_train, feature_extractor=feature_extractor)
#prepare next portion
ratio = float(sum([int(x[1] == 'g') for x in current_train]))/len(current_train)
#ratio = 0.5
logger.info('ratio = %.3f\nclassifying train set ...'%ratio)
train_classify = [[0.1*random.random() + abs(int(cl.classify(t)=='s')-ratio),t] for t in review_corpus[:1000]]
train_classify.sort()
reviews_portion = train_classify[:portion]
else:
reviews_portion = [y for y in enumerate(review_corpus[:portion])]
'''
main iterations of active learning
'''
for iteration in range(iterations):
#ask for labels
for p in range(len(reviews_portion)):
var = input('''\n\n%s \n(%f)\nPlease give the label to the review
(g - generic / s - specific): '''%(reviews_portion[p][1][0],reviews_portion[p][0]))
if var.lower().startswith('g'):
label = 'g'
elif var.lower().startswith('s'):
label = 's'
elif var.lower().startswith('x'):
logger.info('Finish')
break
else:
logger.info('Bad label')
continue
#prepare train set
current_train.append((reviews_portion[p][1],label))
review_corpus.remove(reviews_portion[p][1])
#train model
cl = NaiveBayesClassifier(current_train, feature_extractor=feature_extractor)
#prepare next portion
ratio = float(sum([int(x[1] == 'g') for x in current_train]))/len(current_train)
#ratio = 0.5
logger.info('ratio = %.3f\nclassifying train set ...'%ratio)
train_classify = [[0.1*random.random() + abs(int(cl.classify(t)=='s')-ratio),t] for t in review_corpus[:1000]]
train_classify.sort()
reviews_portion = train_classify[:portion]
logger.info('Iteration: %d (%d items), Accuracy on train = %.2f'%(iteration,len(current_train),100*cl.accuracy(current_train)))
current_train_out = open(datapath+'%s_current_train.json'%NAME,'w')
current_train_out.write(json.dumps(current_train))
current_train_out.close()
cl.show_informative_features(10)
#.........这里部分代码省略.........
示例10: NaiveBayesClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
#tx_cl = "I feel amazing!"
#tx_prob = "This one's a doozy."
tx_cl = "El subte esta demorado"
tx_prob = "El subte funciona bien"
cl = NaiveBayesClassifier(train)
print cl.classify(tx_cl)
print cl.classify("El subte funciona bien")
prob_dist = cl.prob_classify(tx_prob)
print prob_dist.max()
print round(prob_dist.prob("pos"), 2)
print round(prob_dist.prob("neg"), 2)
print cl.accuracy(data_sets.en_test)
print cl.show_informative_features(5)
#Using TextBlob
blob = TextBlob("No funca por que hay obras para mejorar la cosa", classifier=cl)
print blob.sentiment
print blob.classify()
blob = TextBlob("El subte funciona normal", classifier=cl)
print blob.sentiment
print blob.classify()
blob = TextBlob("Se realizan obras en el subte A", classifier=cl)
print blob.sentiment
print blob.classify()
blob = TextBlob("No funciona, anda averiguar por que. Quizas hay un accidente", classifier=cl)
示例11: extractor
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def extractor(word):
feats = {}
last_letter = word[-1]
feats["last_letter({0})".format(last_letter)] = True
return feats
if __name__ == "__main__":
# customDicts = {'./texts/wordsEn.txt':'english','./texts/wordsEs.txt':'spanish','./texts/wordsEs2.txt':'spanish'}
""" customDicts = {'./texts/wordsEn.txt':'english','./texts/wordsEs2.txt':'spanish'}
for customDictFilename, customDictLang in customDicts.items():
currentDict = open(customDictFilename,'r')
for line in currentDict:
wordTrain = (line.replace('\r','').replace('\n',''),customDictLang)
train.append(wordTrain)
currentDict.close() """
# print train
lang_detector = NaiveBayesClassifier(train, feature_extractor=extractor)
# lang_detector = NaiveBayesClassifier(train)
print lang_detector.accuracy(test)
lang_detector.show_informative_features(5)
while 1:
try:
line = sys.stdin.readline()
# print line
print lang_detector.classify(line)
except KeyboardInterrupt:
break
if not line:
break
示例12: encode_tweet
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
def encode_tweet(tweet): #remove links,username and symbols form tweet
tweet_words = []
words = tweet[0].split()
for x in words:
x = unicode(x, errors='ignore')
tweet_words.append(x)
mod_tweet=" ".join(tweet_words)
tweet[0] = mod_tweet
train_tweets.append(tweet)
if counter > 100: #training and testing dataset
test_tweets.append(tweet)
else:
train_tweets.append(tweet)
with open("tweets1.csv",'rb') as data_file:
data = csv.reader(data_file,delimiter=',')
for tweet in data:
encode_tweet(tweet)
counter+=1
classifier = NaiveBayesClassifier(train_tweets)
print("Accuracy of the classifier: {0}".format(classifier.accuracy(test_tweets)))
classifier.show_informative_features(10)
print "Training complete"
test = raw_input("Enter the string:")
if classifier.classify(test)==0:
print "Sentiment: negative"
else:
print "Sentiment: positive"
示例13: Minggu
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
"Korban diajak tersangka ke musala di dekat pondok. Saat kondisi sepi dan hanya berdua dengan korban, tersangka mencabuli korban," kata Wahyu kepada wartawan, Minggu (20/3/2016).
Lantaran menganggap Nurul sebagai Gus, korban pun tak berani menolak permintaan tersangka. Terlebih lagi, tersangka membujuk korban bahwa perbuatan cabul itu untuk memasukkan ilmu kebatinan ke tubuh korban.
"Tersangka berdalih untuk mengajari korban ilmu tasawuf. Nyatanya itu hanya untuk memuluskan niat tersangka agar bisa mencabuli korban," ungkapnya.
Menurut Wahyu, perbuatan cabul itu dilakukan tersangka kepada korban berulang kali selama 2 tahun terakhir. Bahkan korban diminta membayar uang kepada tersangka setiap kali usai melakukan pencabulan. Nilainya antara Rp 200.000 hingga jutaan rupiah.
"Tersangka juga meminta uang dari korban berulang kali. Total kerugian korban Rp 40 juta," sebutnya.
Tak tahan dengan perbuatan Nurul, lanjut Wahyu, korban pun memutuskan buka mulut ke teman sesama santri. Mendapat dukungan dari teman-temannya, korban memberanikan diri melapor ke Polres Jombang, Kamis (17/3).
Pada hari yang sama, polisi memutuskan menjebak tersangka. "Saat korban menyerahkan uang yang terakhir kepada tersangka, saat itu tersangka langsung kami tangkap," jelasnya.
Akibat perbuatannya, kini Nurul harus mendekam di Rutan Polres Jombang. Tersangka dijerat dengan Pasal 80 ayat (1) juncto Pasal 82 ayat (1) UU RI No 35 Tahun 2014 tentang Perlindungan Anak dengan ancaman pidana maksimal 15 tahun penjara.
"Kalau ada yang merasa menjadi korban perbuatan tersangka ini, jangan malu melapor, akan kami jaga identitasnya. Karena itu bisa memberatkan tersangka," pungkasnya. """
tic = timeit.default_timer()
renum = ''.join([i for i in text if not i.isdigit()])
text = stem_words(renum)
print("text diatas setelah diklasifikasi yaitu %s\n" % cl.classify(text))
toc = timeit.default_timer()
print ("waktu klasifikasi : ")
print(toc-tic)
print(cl.show_informative_features(20))
# classifier = TextBlob(stemstop_output, classifier=cl)
# print(classifier.classify())
示例14: TestNaiveBayesClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
class TestNaiveBayesClassifier(unittest.TestCase):
def setUp(self):
self.classifier = NaiveBayesClassifier(train_set)
def test_default_extractor(self):
text = "I feel happy this morning."
assert_equal(self.classifier.extract_features(text), basic_extractor(text, train_set))
def test_classify(self):
res = self.classifier.classify("I feel happy this morning")
assert_equal(res, 'positive')
assert_equal(len(self.classifier.train_set), len(train_set))
def test_classify_a_list_of_words(self):
res = self.classifier.classify(["I", "feel", "happy", "this", "morning"])
assert_equal(res, "positive")
def test_train_from_lists_of_words(self):
# classifier can be trained on lists of words instead of strings
train = [(doc.split(), label) for doc, label in train_set]
classifier = NaiveBayesClassifier(train)
assert_equal(classifier.accuracy(test_set),
self.classifier.accuracy(test_set))
def test_prob_classify(self):
res = self.classifier.prob_classify("I feel happy this morning")
assert_equal(res.max(), "positive")
assert_true(res.prob("positive") > res.prob("negative"))
def test_accuracy(self):
acc = self.classifier.accuracy(test_set)
assert_true(isinstance(acc, float))
def test_update(self):
res1 = self.classifier.prob_classify("lorem ipsum")
original_length = len(self.classifier.train_set)
self.classifier.update([("lorem ipsum", "positive")])
new_length = len(self.classifier.train_set)
res2 = self.classifier.prob_classify("lorem ipsum")
assert_true(res2.prob("positive") > res1.prob("positive"))
assert_equal(original_length + 1, new_length)
def test_labels(self):
labels = self.classifier.labels()
assert_true("positive" in labels)
assert_true("negative" in labels)
def test_show_informative_features(self):
feats = self.classifier.show_informative_features()
def test_informative_features(self):
feats = self.classifier.informative_features(3)
assert_true(isinstance(feats, list))
assert_true(isinstance(feats[0], tuple))
def test_custom_feature_extractor(self):
cl = NaiveBayesClassifier(train_set, custom_extractor)
cl.classify("Yay! I'm so happy it works.")
assert_equal(cl.train_features[0][1], 'positive')
def test_init_with_csv_file(self):
cl = NaiveBayesClassifier(CSV_FILE, format="csv")
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_csv_file_without_format_specifier(self):
cl = NaiveBayesClassifier(CSV_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_json_file(self):
cl = NaiveBayesClassifier(JSON_FILE, format="json")
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_json_file_without_format_specifier(self):
cl = NaiveBayesClassifier(JSON_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_accuracy_on_a_csv_file(self):
a = self.classifier.accuracy(CSV_FILE)
assert_true(isinstance(a, float))
def test_accuracy_on_json_file(self):
a = self.classifier.accuracy(JSON_FILE)
assert_true(isinstance(a, float))
def test_init_with_tsv_file(self):
cl = NaiveBayesClassifier(TSV_FILE)
assert_equal(cl.classify("I feel happy this morning"), 'pos')
training_sentence = cl.train_set[0][0]
assert_true(isinstance(training_sentence, unicode))
def test_init_with_bad_format_specifier(self):
#.........这里部分代码省略.........
示例15: NaiveBayesClassifier
# 需要导入模块: from textblob.classifiers import NaiveBayesClassifier [as 别名]
# 或者: from textblob.classifiers.NaiveBayesClassifier import show_informative_features [as 别名]
test_data = [
("Fluggastdatenspeicherung: EU-Parlament votiert für PNR-Datenbank"),
("Chipmaschinen-Hersteller: ASML liefert sechs EUV-Belichtungsmaschinen an Intel aus"),
("Apple: iCloud löscht unter Umständen Daten unwiederbringlich"),
("Spionagesoftware: Hacking Team nutzt UEFI-Rootkit"),
("Mobilfunk: 5G soll für Nutzer wie ein unbegrenztes System sein"),
("Mobilfunknetzbetreiber: Kostenloses WLAN für Regionalzüge kommt"),
("Kickstarter: Kerze lädt Smartphone"),
("Hacking Team: Carabinieri kapern mal kurz das Internet"),
("Nach Hackerangriff: OPM-Chefin Katherine Archuleta tritt zurück"),
("Smartphone-Hersteller: Geeksphone hört auf"),
("Systemverschlüsselung: Yubikeys Zwei-Faktor-Authentifizierung unter Linux nutzen"),
("Kritik an Dieter Nuhr: Wir alle sind der Shitstorm"),
("Navigationsgerät: Autofahrer verursacht wegen Navi schweren Unfall"),
("Until Dawn angespielt: Das Horrorhaus der tödlichen Entscheidungen"),
("Satoru Iwata: Nintendo-Chef im Alter von 55 Jahren gestorben"),
("Call of Duty: Zombies à la Film noir")
]
nbc = NaiveBayesClassifier(train_data, lang='de_DE')
for data in test_data:
print(nbc.classify(data))
print(nbc.accuracy(train_data))
print(nbc.show_informative_features(5))