本文整理汇总了Python中nltk.sentiment.SentimentAnalyzer.classify方法的典型用法代码示例。如果您正苦于以下问题:Python SentimentAnalyzer.classify方法的具体用法?Python SentimentAnalyzer.classify怎么用?Python SentimentAnalyzer.classify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.sentiment.SentimentAnalyzer
的用法示例。
在下文中一共展示了SentimentAnalyzer.classify方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train
# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
def train():
positive_tweets = read_tweets('/root/295/new/positive.txt', 'positive')
negative_tweets = read_tweets('/root/295/new/negative.txt', 'negative')
print len(positive_tweets)
print len(negative_tweets)
#pos_train = positive_tweets[:2000]
#neg_train = negative_tweets[:2000]
#pos_test = positive_tweets[2001:3000]
#neg_test = negative_tweets[2001:3000]
pos_train = positive_tweets[:len(positive_tweets)*80/100]
neg_train = negative_tweets[:len(negative_tweets)*80/100]
pos_test = positive_tweets[len(positive_tweets)*80/100+1:]
neg_test = negative_tweets[len(positive_tweets)*80/100+1:]
training_data = pos_train + neg_train
test_data = pos_test + neg_test
sentim_analyzer = SentimentAnalyzer()
all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_data])
#print all_words_neg
unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
#print unigram_feats
print len(unigram_feats)
sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
training_set = sentim_analyzer.apply_features(training_data)
test_set = sentim_analyzer.apply_features(test_data)
print test_set
trainer = NaiveBayesClassifier.train
classifier = sentim_analyzer.train(trainer, training_set)
for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
print('{0}: {1}'.format(key, value))
print sentim_analyzer.classify(tokenize_sentance('I hate driving car at night'))
return sentim_analyzer
示例2: len
# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
print len(test), len(train)
sentiment_analyzer = SentimentAnalyzer()
all_words = sentiment_analyzer.all_words([doc[0] for doc in train])
# # Get list of terms+frequencies
# words_freqs = {}
# for tweet in train:
# for token in tweet[0]:
# if token in words_freqs:
# words_freqs[token] += 1
# else:
# words_freqs[token] = 1
# unigrams = [token for token in words_freqs if words_freqs[token] >= 4]
unigrams = sentiment_analyzer.unigram_word_feats(all_words, min_freq=4)
#bigrams = sentiment_analyzer.bigram_collocation_feats([doc[0] for doc in train], top_n=1000)
# print unigrams
sentiment_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigrams)
#sentiment_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigrams)
training_set=sentiment_analyzer.apply_features(train)
test_set=sentiment_analyzer.apply_features(test)
#print training_set[0]
trainer = NaiveBayesClassifier.train
classifier = sentiment_analyzer.train(trainer, training_set)
save_file(sentiment_analyzer, "sentiment_classifier.pkl")
for key,value in sorted(sentiment_analyzer.evaluate(test_set).items()):
print("{0}: {1}".format(key,value))
print test[0], sentiment_analyzer.classify(test[0][0])
示例3: word_tokenize
# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
senti = line.split(",")[0]
content = line[len(senti)+1:]
tokens = word_tokenize(content.rstrip())
trainingset.append((tokens,senti))
all_words_neg = sa.all_words([mark_negation(doc) for doc in trainingset])
unigram_feats = sa.unigram_word_feats(all_words_neg,min_freq = 4)
sa.add_feat_extractor(extract_unigram_feats,unigrams=unigram_feats)
training_set = sa.apply_features(trainingset)
for line in sys.stdin:
if "username" in line:
continue
tweetWords=[]
tweet= line.split(";")[4]
likes = line.split(";")[3]
likes = int(likes)
if likes==0:
num=1
else:
num = 1+likes
words = tweet.split()
for i in words:
i = i.lower()
i = i.strip('@#\'"?,.!')
tweetWords.append(i)
sentiment = sa.classify(tweetWords)
print '%s\t%s' % (sentiment, str(num))
示例4: SuicideClassifier
# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
class SuicideClassifier(object):
def __init__(self, sentiment_only, num_phrases_to_track=20):
# neg_phrases = filter_negative_phrases(load_csv_sentences('thoughtsandfeelings.csv'))
# pos_phrases = filter_positive_phrases(load_csv_sentences('spiritualforums.csv'))
# file_pos = open("pos_phrases.txt", 'w')
# file_neg = open("neg_phrases.txt", 'w')
# for item in pos_phrases:
# print>>file_pos, item
# for item in neg_phrases:
# print>>file_neg, item
self.recent_sentiment_scores = []
neg_file = open("ALL_neg_phrases_filtered.txt", "r")
pos_file = open("webtext_phrases_with_lots_of_words.txt", "r")
neg_phrases = neg_file.readlines()
pos_phrases = pos_file.readlines()
neg_docs = []
pos_docs = []
for phrase in neg_phrases:
neg_docs.append((phrase.split(), 'suicidal'))
for phrase in pos_phrases[:len(neg_phrases)]:
pos_docs.append((phrase.split(), 'alright'))
print len(neg_docs)
print len(pos_docs)
# negcutoff = len(neg_docs) * 3 / 4
# poscutoff = len(pos_docs) * 3 / 4
negcutoff = -200
poscutoff = -200
train_pos_docs = pos_docs[:poscutoff]
test_pos_docs = pos_docs[poscutoff:]
train_neg_docs = neg_docs[:negcutoff]
test_neg_docs = neg_docs[negcutoff:]
training_docs = train_pos_docs + train_neg_docs
testing_docs = test_pos_docs + test_neg_docs
self.sentim_analyzer = SentimentAnalyzer()
if not sentiment_only:
all_words = self.sentim_analyzer.all_words([doc for doc in training_docs])
unigram_feats = self.sentim_analyzer.unigram_word_feats(all_words, min_freq=1)
self.sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
self.sentim_analyzer.add_feat_extractor(vader_sentiment_feat)
# bigram_feats = self.sentim_analyzer.bigram_collocation_feats(all_words, min_freq=1)
# self.sentim_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigram_feats)
training_set = self.sentim_analyzer.apply_features(training_docs)
test_set = self.sentim_analyzer.apply_features(testing_docs)
trainer = NaiveBayesClassifier.train
self.classifier = self.sentim_analyzer.train(trainer, training_set)
for key, value in sorted(self.sentim_analyzer.evaluate(test_set).items()):
print('{0}: {1}'.format(key, value))
self.classifier.show_most_informative_features(20)
def test(self, phrase):
return self.sentim_analyzer.classify(phrase.split())
def update_sentiments(self, value):
now = datetime.datetime.now()
self.recent_sentiment_scores.append([now, value])
self.recent_sentiment_scores = [x for x in self.recent_sentiment_scores if x[
0] > now - datetime.timedelta(seconds=60)]
print sum([x[1] for x in self.recent_sentiment_scores]) / len(self.recent_sentiment_scores)
return sum([x[1] for x in self.recent_sentiment_scores]) / len(self.recent_sentiment_scores)