本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier.labels方法的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier.labels方法的具体用法?Python SklearnClassifier.labels怎么用?Python SklearnClassifier.labels使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.classify.scikitlearn.SklearnClassifier
的用法示例。
在下文中一共展示了SklearnClassifier.labels方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: print
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import labels [as 别名]
testing_set = nltk.classify.apply_features(extract_features, test_tweets)
for (tweet, sentiment) in test_tweets:
print(classifier.classify(extract_features(tweet)))
print(nltk.classify.accuracy(classifier, testing_set))
classifier.show_most_informative_features(5)
"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k='all')),
('nb', MultinomialNB())])
"""
pipeline = Pipeline([('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k='all')),
('nb', MultinomialNB())])
classif = SklearnClassifier(pipeline)
classif.train(training_set)
print(classif.labels())
for (tweet, sentiment) in test_tweets:
print(classif.classify(extract_features(tweet)))
print(nltk.classify.accuracy(classif, testing_set))
示例2: labelize
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import labels [as 别名]
train_corpus = corpus[150:]
test_corpus = corpus[:150]
train_set = [(features(words), labelize(category in categories)) for (words, categories) in train_corpus]
test_set = [(features(words), labelize(category in categories)) for (words, categories) in test_corpus]
# train classifier
# print "Training classifier for '%s'" % category
# classifier = MaxentClassifier.train(train_set, max_iter= 3)
# classifier = NaiveBayesClassifier.train(train_set)
model = MultinomialNB()
classifier = SklearnClassifier(model)
# set priors
classifier._encoder.fit([category, "no"])
# [category, "no"] unless this is true then ["no", category]
flip = classifier.labels()[0] == "no"
categorized_proportion = len([words for (words, categories) in corpus if category in categories]) * 1.0 / len(corpus)
if flip:
model.class_prior = [1-categorized_proportion, categorized_proportion]
else:
model.class_prior = [categorized_proportion, 1-categorized_proportion]
classifier.train(train_set)
# test classifier
test_results = classifier.classify_many([feat for (feat, label) in test_set])
pos_test_set = set(i for i, result in enumerate(test_results) if result == category)
reference_values = [label for (feat, label) in test_set]
pos_ref_set = set(i for i, (feat, label) in enumerate(test_set) if label == category)
accuracy = scores.accuracy(reference_values, test_results)
accuracies.append(accuracy)
示例3: label_feats_from_tweets
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import labels [as 别名]
print "creating feature sets..."
tweetlist = tweetTest.loadTwitterCSV('trainingandtestdata/testdata.csv')
labeld_features = label_feats_from_tweets(tweetlist)
#labeld_features = label_feats_from_corpus(movie_reviews)
training_set, test_set = split_label_feats(labeld_features)
# tweetlist = tweetTest.loadTwitterCSV('trainingandtestdata/training.1600000.processed.noemoticon.csv')
# training_set = label_feats_from_tweets(tweetlist)
# training_set, garbage = split_label_feats(training_set, 1.0)
# test_set, garbage = split_label_feats(labeld_features, 1.0)
print "training set length: %i test set length: %i" % (len(training_set), len(test_set))
print prettifyFeatureSet(test_set)
print "training classifier..."
#classifier = NaiveBayesClassifier.train(training_set)
#classifier = MaxentClassifier.train(training_set, algorithm='iis', max_iter=99, min_lldelta=0.01)
#classifier = MaxentClassifier.train(training_set)
classifier = SklearnClassifier(LogisticRegression()).train(training_set)
print "calculating accuracy..."
print 'accuracy:', nltk.classify.util.accuracy(classifier, test_set)
#classifier.show_most_informative_features(30)
negfeat = bag_of_words(['the', 'plot', 'was', 'ludicrous'])
print classifier.classify(negfeat)
probdist = classifier.prob_classify(negfeat)
print "pos: ", probdist.prob('pos'), " neg: ", probdist.prob('neg')
print classifier.labels()
classify_tweet(classifier, "I love this movie!", True)
classify_tweet(classifier, "!!!", True)
示例4: len
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import labels [as 别名]
poscutoff = len(posfeats)*3/4
cls_set = ['pos', 'neg']
for i in range(0,2):
print negfeats[i]
print '\n------------------------------------------------\n'
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
classif = SklearnClassifier(LinearSVC())
classif.train(trainfeats)
print classif.labels()
test_skl = []
t_test_skl = []
for d in testfeats:
test_skl.append(d[0])
t_test_skl.append(d[1])
print(set(t_test_skl))
result = []
for item in test_skl:
p = classif.classify(item)
result.append(p)
print len(result)
print len(t_test_skl)