本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier方法的典型用法代码示例。如果您正苦于以下问题:Python scikitlearn.SklearnClassifier方法的具体用法?Python scikitlearn.SklearnClassifier怎么用?Python scikitlearn.SklearnClassifier使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.classify.scikitlearn
的用法示例。
在下文中一共展示了scikitlearn.SklearnClassifier方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classifier_score
# 需要导入模块: from nltk.classify import scikitlearn [as 别名]
# 或者: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
def classifier_score(tp, classifier, train_list, test, test_tag):
'''
?????????
Output:pos_precision, pos_recall, accuracy_score
'''
starttime = datetime.datetime.now()
classifier = SklearnClassifier(classifier)
classifier.train(train_list)
iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
pred = classifier.classify_many(test) # ????????list
y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
y_pred = [1 if tag == 'pos' else 0 for tag in pred]
pos_precision = precision_score(y_true, y_pred)
pos_recall = recall_score(y_true, y_pred)
endtime = datetime.datetime.now()
interval = (endtime - starttime).microseconds
interval = interval / 100
return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)
#------------------------------------------------------------------------------
示例2: train_maxent
# 需要导入模块: from nltk.classify import scikitlearn [as 别名]
# 或者: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
def train_maxent(training_data):
print("training...")
features_set, all_features_results = encode_features(training_data, filter_threshold)
classifier = SklearnClassifier(LogisticRegression(C=1.0, class_weight='balanced'))
classifier.train(all_features_results)
return (features_set, classifier)
示例3: build_classifier_score
# 需要导入模块: from nltk.classify import scikitlearn [as 别名]
# 或者: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
def build_classifier_score(train_set, test_set, classifier):
data, tag = zip(*test_set)
classifier = SklearnClassifier(classifier)
classifier.train(train_set)
pred = classifier.classify_many(data)
return accuracy_score(tag, pred)
示例4: buildClassifier_score
# 需要导入模块: from nltk.classify import scikitlearn [as 别名]
# 或者: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
def buildClassifier_score(trainSet,devtestSet,classifier):
#print devtestSet
from nltk import compat
dev, tag_dev = zip(*devtestSet) #????????????????????????????
classifier = SklearnClassifier(classifier) #?nltk ???scikit-learn ???
#x,y in list(compat.izip(*trainSet))
classifier.train(trainSet) #?????
#help('SklearnClassifier.batch_classify')
pred = classifier.classify_many(dev)#batch_classify(testSet) #?????????????????????
return accuracy_score(tag_dev, pred) #???????????????????????????
示例5: train
# 需要导入模块: from nltk.classify import scikitlearn [as 别名]
# 或者: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
def train(self):
self.pos = open("data/positive.txt", "r").read()
self.neg = open("data/negative.txt", "r").read()
self.words = []
self.doc = []
for p in self.pos.split('\n'):
self.doc.append((p, "pos"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in ["J"]:
self.words.append(w[0].lower())
for p in self.neg.split('\n'):
self.doc.append((p, "neg"))
words = word_tokenize(p)
pos = nltk.pos_tag(words)
for w in pos:
if w[1][0] in ["J"]:
self.words.append(w[0].lower())
pickle.dump(self.doc, open("pickle/doc.pickle", "wb"))
self.words = nltk.FreqDist(self.words)
self.wordFeat = [self.i for (selfi, self.c)in self.words.most_common(5000)]
pickle.dump(self.wordFeat, open("pickle/wordFeat.pickle", "wb"))
self.featSet = [(trainClassifier().featureFind(self.rev,self.wordFeat), self.category) for (self.rev, self.category) in self.doc]
random.shuffle(self.featSet)
self.testSet = self.featSet[10000:]
self.triainSet = self.featSet[:10000]
pickle.dump(self.featSet,open("pickle/featSet.pickle", "wb"))
ONB = nltk.NaiveBayesClassifier.train(self.triainSet)
print("Original Naive Bayes Algo accuracy:",round((nltk.clify.accuracy(ONB, self.testSet)) * 100,2),"%")
pickle.dump(ONB, open("pickle/ONB.pickle", "wb"))
MNB = SklearnClassifier(MultinomialNB())
MNB.train(self.triainSet)
print("MultinomialNB accuracy:",round((nltk.clify.accuracy(MNB, self.testSet)) * 100,2),"%")
pickle.dump(MNB, open("pickle/MNB.pickle", "wb"))
BNB = SklearnClassifier(BernoulliNB())
BNB.train(self.triainSet)
print("BernoulliNB accuracy percent:",round((nltk.clify.accuracy(BNB, self.testSet)) * 100,2),"%")
pickle.dump(BNB, open("pickle/BNB.pickle", "wb"))
LR = SklearnClassifier(LogisticRegression())
LR.train(self.triainSet)
print("LogisticRegression accuracy:",round((nltk.clify.accuracy(LR, self.testSet)) * 100,2),"%")
pickle.dump(LR, open("pickle/LR.pickle", "wb"))
LSVC = SklearnClassifier(LinearSVC())
LSVC.train(self.triainSet)
print("LinearSVC accuracy:",round((nltk.clify.accuracy(LSVC, self.testSet)) * 100,2),"%")
pickle.dump(LSVC, open("pickle/LSVC.pickle", "wb"))
SGDC = SklearnClassifier(SGDClassifier())
SGDC.train(self.triainSet)
print("SGDClassifier accuracy:", round(nltk.clify.accuracy(SGDC, self.testSet) * 100,2),"%")
pickle.dump(SGDC, open("pickle/SGDC.pickle", "wb"))