Python SentimentAnalyzer.classify方法代码示例

本文整理汇总了Python中nltk.sentiment.SentimentAnalyzer.classify方法的典型用法代码示例。如果您正苦于以下问题：Python SentimentAnalyzer.classify方法的具体用法？Python SentimentAnalyzer.classify怎么用？Python SentimentAnalyzer.classify使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.sentiment.SentimentAnalyzer的用法示例。

在下文中一共展示了SentimentAnalyzer.classify方法的4个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
def train():
  positive_tweets = read_tweets('/root/295/new/positive.txt', 'positive')
  negative_tweets = read_tweets('/root/295/new/negative.txt', 'negative')
  print len(positive_tweets)
  print len(negative_tweets)

  #pos_train = positive_tweets[:2000]
  #neg_train = negative_tweets[:2000]
  #pos_test = positive_tweets[2001:3000]
  #neg_test = negative_tweets[2001:3000]
  pos_train = positive_tweets[:len(positive_tweets)*80/100]
  neg_train = negative_tweets[:len(negative_tweets)*80/100]
  pos_test = positive_tweets[len(positive_tweets)*80/100+1:]
  neg_test = negative_tweets[len(positive_tweets)*80/100+1:]

  training_data = pos_train + neg_train
  test_data = pos_test + neg_test

  sentim_analyzer = SentimentAnalyzer()
  all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_data])
  #print all_words_neg
  unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
  #print unigram_feats
  print len(unigram_feats)
  sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)
  training_set = sentim_analyzer.apply_features(training_data)
  test_set = sentim_analyzer.apply_features(test_data)
  print test_set  
  trainer = NaiveBayesClassifier.train
  classifier = sentim_analyzer.train(trainer, training_set)
  for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
    print('{0}: {1}'.format(key, value))
  print sentim_analyzer.classify(tokenize_sentance('I hate driving car at night'))
  
  return sentim_analyzer

开发者ID:kpraveen420，项目名称:Sentimental-Analysis，代码行数:37，代码来源:classifier.py

示例2: len

# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
print len(test), len(train)
sentiment_analyzer = SentimentAnalyzer()
all_words = sentiment_analyzer.all_words([doc[0] for doc in train])

# # Get list of terms+frequencies
# words_freqs = {}
# for tweet in train:
# 	for token in tweet[0]:
# 		if token in words_freqs:
# 			words_freqs[token] += 1
# 		else:
# 			words_freqs[token] = 1

# unigrams = [token for token in words_freqs if words_freqs[token] >= 4]
unigrams = sentiment_analyzer.unigram_word_feats(all_words, min_freq=4)
#bigrams = sentiment_analyzer.bigram_collocation_feats([doc[0] for doc in train], top_n=1000)
# print unigrams

sentiment_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigrams)
#sentiment_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigrams)

training_set=sentiment_analyzer.apply_features(train)
test_set=sentiment_analyzer.apply_features(test)
#print training_set[0]
trainer = NaiveBayesClassifier.train
classifier = sentiment_analyzer.train(trainer, training_set)
save_file(sentiment_analyzer, "sentiment_classifier.pkl")
for key,value in sorted(sentiment_analyzer.evaluate(test_set).items()):
	print("{0}: {1}".format(key,value))
print test[0], sentiment_analyzer.classify(test[0][0])

开发者ID:AlasdairNorton，项目名称:ClusterCloudAssg2，代码行数:32，代码来源:trainer.py

示例3: word_tokenize

# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
    senti = line.split(",")[0]
    content = line[len(senti)+1:]
    tokens = word_tokenize(content.rstrip())
    trainingset.append((tokens,senti))
all_words_neg = sa.all_words([mark_negation(doc) for doc in trainingset])
unigram_feats = sa.unigram_word_feats(all_words_neg,min_freq = 4)
sa.add_feat_extractor(extract_unigram_feats,unigrams=unigram_feats)
training_set = sa.apply_features(trainingset)

for line in sys.stdin:
    if "username" in line:
        continue

    tweetWords=[]
    tweet= line.split(";")[4]
    likes = line.split(";")[3]
    likes = int(likes)
    if likes==0:
        num=1
    else:
        num = 1+likes

    words = tweet.split()
    for i in words:
        i = i.lower()
        i = i.strip('@#\'"?,.!')
        tweetWords.append(i)
    sentiment = sa.classify(tweetWords)
    print '%s\t%s' % (sentiment, str(num))

开发者ID:AilingCui，项目名称:BigData_Hadoop，代码行数:31，代码来源:Mapper.py

示例4: SuicideClassifier

# 需要导入模块: from nltk.sentiment import SentimentAnalyzer [as 别名]
# 或者: from nltk.sentiment.SentimentAnalyzer import classify [as 别名]
class SuicideClassifier(object):

    def __init__(self, sentiment_only, num_phrases_to_track=20):
        # neg_phrases = filter_negative_phrases(load_csv_sentences('thoughtsandfeelings.csv'))
        # pos_phrases = filter_positive_phrases(load_csv_sentences('spiritualforums.csv'))
        # file_pos = open("pos_phrases.txt", 'w')
        # file_neg = open("neg_phrases.txt", 'w')

        # for item in pos_phrases:
        #     print>>file_pos, item
        # for item in neg_phrases:
        #     print>>file_neg, item
        self.recent_sentiment_scores = []

        neg_file = open("ALL_neg_phrases_filtered.txt", "r")
        pos_file = open("webtext_phrases_with_lots_of_words.txt", "r")
        neg_phrases = neg_file.readlines()
        pos_phrases = pos_file.readlines()

        neg_docs = []
        pos_docs = []
        for phrase in neg_phrases:
            neg_docs.append((phrase.split(), 'suicidal'))
        for phrase in pos_phrases[:len(neg_phrases)]:
            pos_docs.append((phrase.split(), 'alright'))

        print len(neg_docs)
        print len(pos_docs)
        # negcutoff = len(neg_docs) * 3 / 4
        # poscutoff = len(pos_docs) * 3 / 4
        negcutoff = -200
        poscutoff = -200

        train_pos_docs = pos_docs[:poscutoff]
        test_pos_docs = pos_docs[poscutoff:]
        train_neg_docs = neg_docs[:negcutoff]
        test_neg_docs = neg_docs[negcutoff:]
        training_docs = train_pos_docs + train_neg_docs
        testing_docs = test_pos_docs + test_neg_docs

        self.sentim_analyzer = SentimentAnalyzer()

        if not sentiment_only:
            all_words = self.sentim_analyzer.all_words([doc for doc in training_docs])
            unigram_feats = self.sentim_analyzer.unigram_word_feats(all_words, min_freq=1)
            self.sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

        self.sentim_analyzer.add_feat_extractor(vader_sentiment_feat)

        # bigram_feats = self.sentim_analyzer.bigram_collocation_feats(all_words, min_freq=1)
        # self.sentim_analyzer.add_feat_extractor(extract_bigram_feats, bigrams=bigram_feats)

        training_set = self.sentim_analyzer.apply_features(training_docs)
        test_set = self.sentim_analyzer.apply_features(testing_docs)
        trainer = NaiveBayesClassifier.train
        self.classifier = self.sentim_analyzer.train(trainer, training_set)
        for key, value in sorted(self.sentim_analyzer.evaluate(test_set).items()):
            print('{0}: {1}'.format(key, value))
        self.classifier.show_most_informative_features(20)

    def test(self, phrase):
        return self.sentim_analyzer.classify(phrase.split())

    def update_sentiments(self, value):
        now = datetime.datetime.now()
        self.recent_sentiment_scores.append([now, value])
        self.recent_sentiment_scores = [x for x in self.recent_sentiment_scores if x[
            0] > now - datetime.timedelta(seconds=60)]
        print sum([x[1] for x in self.recent_sentiment_scores]) / len(self.recent_sentiment_scores)
        return sum([x[1] for x in self.recent_sentiment_scores]) / len(self.recent_sentiment_scores)

开发者ID:amcnary，项目名称:cs294SuicideDetector，代码行数:72，代码来源:nltk_classify_unigrams.py

注：本文中的nltk.sentiment.SentimentAnalyzer.classify方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。