本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier.classify_many方法的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier.classify_many方法的具体用法?Python SklearnClassifier.classify_many怎么用?Python SklearnClassifier.classify_many使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.classify.scikitlearn.SklearnClassifier
的用法示例。
在下文中一共展示了SklearnClassifier.classify_many方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: chatBot
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
class chatBot(object):
def __init__(self):
self.posts = nltk.corpus.nps_chat.xml_posts()
self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
self.responses = {}
self.featuresets = []
self.train = []
self.test = []
self.testSet = []
self.testSetClass = []
self.classif = SklearnClassifier(LinearSVC())
for i in range(0, 15):
self.responses[i] = []
for post in self.posts:
self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
self.temp = self.responses[self.categories.index(post.get('class'))]
self.temp.append(post.text)
def tokenize(self, sentence):
"""
Extracts a set of features from a message.
"""
features = {}
tokens = nltk.word_tokenize(sentence)
for t in tokens:
features['contains(%s)' % t.lower()] = True
return features
def talk(self):
while 1:
inp = raw_input("YOU: ")
features = self.tokenize(inp)
pp = self.classif.classify_many(features)
pp = pp[0]
pp = int(pp)
m = self.mapper[pp]
r = self.responses[m]
val = randint(0, len(r))
print("BOT: "+r[val])
def trainSet(self):
shuffle(self.featuresets)
size = int(len(self.featuresets) * .1) # 10% is used for the test set
self.train = self.featuresets[size:]
self.test = self.featuresets[:size]
self.classif.train(self.train)
self.testSet = []
self.testSetClass = []
for i in self.test:
self.testSet.append(i[0])
self.testSetClass.append(i[1])
self.batch = self.classif.classify_many(self.testSet)
def statistics(self):
print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
示例2: __init__
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
class SKClassifier:
classifier = None
def __init__(self, cls='SVC'):
self.classifier = SklearnClassifier({
'SVC': SVC(),
'LogisticRegression': LogisticRegression(),
'BernoulliNB': BernoulliNB()
}[cls])
if not self.classifier:
self.classifier = SklearnClassifier(SVC())
def train(self, trainset):
self.classifier.train(trainset)
def test(self, tagged, featuresets):
predict = self.classifier.classify_many(featuresets)
print predict
return accuracy_score(tagged, predict)
def classify(self, featureset):
return self.classifier.classify(featureset)
def classify_many(self, featuresets):
return self.classifier.classify_many(featuresets)
示例3: performCrossValidation
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def performCrossValidation(featureset, labels, foldsCount, sklearnclassifier, uniqLabels):
accuracySum = 0.0
precisionSums = defaultdict(float)
recallSums = defaultdict(float)
fscoreSums = defaultdict(float)
crossValidationIterations = cross_validation.StratifiedKFold(labels, n_folds=foldsCount)
for train, test in crossValidationIterations:
trainset = [featureset[i] for i in train]
testset = [featureset[i] for i in test]
print("before train")
classifier = SklearnClassifier(sklearnclassifier).train(trainset)
true = [label for features, label in testset]
predicted = classifier.classify_many([features for features, label in testset])
precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
accuracy = accuracy_score(true, predicted)
accuracySum += accuracy
for label, value in zip(uniqLabels, precisions):
precisionSums[label] += value
for label, value in zip(uniqLabels, recalls):
recallSums[label] += value
for label, value in zip(uniqLabels, fscores):
fscoreSums[label] += value
print("Average accurancy: {0:.3f}".format(accuracySum/foldsCount))
measures = {label: (sum/foldsCount, recallSums.get(label)/foldsCount, fscoreSums.get(label)/foldsCount) for label, sum in precisionSums.items()}
for label, (prec, recall, fscore) in measures.items():
print("Average precision for {0}: {1:.3f}".format(label, prec))
print("Average recall for {0}: {1:.3f}".format(label, recall))
print("Average f score for {0}: {1:.3f}".format(label, fscore))
示例4: main3
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def main3():
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot
svm = SklearnClassifier(LinearSVC(loss="hinge"))
svm.train(trainData)
print("SVM: ", nltk.classify.accuracy(svm, testData))
results = svm.classify_many(item[0] for item in testData)
print(results)
from sklearn.metrics import classification_report
# getting a full report
print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))
# Compute confusion matrix
import numpy as np
cmm = confusion_matrix([x[1] for x in testData], results)
print(cmm)
cmm = np.array(cmm, dtype = np.float)
print(cmm.shape)
#f=figure()
#ax = f.add_subplot(111)
#show()
#%pylab inline
# Show confusion matrix in a separate window
print(pyplot.imshow(cmm, interpolation='nearest'))
示例5: learn_model
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def learn_model(data,target):
bestwords = best_of_words(data, target)
# preparing data for split validation. 80% training, 20% test
data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
#classifier = BernoulliNB().fit(data_train,target_train)
train_feature=[]
test_feature=[]
for i in range(len(data_train)):
d=data_train[i]
d=jieba.cut(d, cut_all=False)
l=target_train[i]
#tmp=[bigram(d),l]
tmp = [dict([(word, True) for word in d if word in bestwords]), l]
train_feature.append(tmp)
for i in range(len(data_test)):
d=data_test[i]
d=jieba.cut(d, cut_all=False)
l=target_test[i]
#tmp=bigram(d)
tmp = dict([(word, True) for word in d if word in bestwords])
test_feature.append(tmp)
classifier = SklearnClassifier(MultinomialNB())
classifier.train(train_feature)
predicted = classifier.classify_many(test_feature)
evaluate_model(target_test,predicted)
return classifier, bestwords
示例6: score
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def score(classifier):
classifier = SklearnClassifier(classifier)
classifier.train(trainset)
# pred = classifier.batch_classify(test)
pred = classifier.classify_many(test)
return accuracy_score(tag_test, pred)
示例7: SVM
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def SVM(training_set, test_set):
classifier = SklearnClassifier(LinearSVC())
print("Training a new SVM classifier")
classifier.train(training_set)
print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
#print("Running new Decision Tree classifier")
accuracy = nltk.classify.accuracy(classifier, test_set)
trueLabels = [l for d, l in test_set]
predictedLabels = classifier.classify_many([d for d,t in test_set])
#print("Accuracy:",accuracy)
# classifier.show_most_informative_features(MIF)
def runTrained(test_set, hasTags=False):
#print("Running pre-trained Decision Tree classifier")
if hasTags:
tagglessTest_set = [data for data, tag in test_set]
acc = nltk.classify.accuracy(classifier, test_set)
print("Accuracy:", acc)
predictions = classifier.classify_many(tagglessTest_set)
return ([e for e in zip(tagglessTest_set, predictions)], acc)
else:
tagglessTest_set = test_set
predictions = classifier.classify_many(tagglessTest_set)
#print("Predicted Labels:",predictions)
return [e for e in zip(tagglessTest_set, predictions)]
return (runTrained, accuracy, predictedLabels, trueLabels)
示例8: clf_score
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def clf_score(classifier):
classifier = SklearnClassifier(classifier)
classifier.train(train_set)
# nltk.classify.scikitlearn(BernoulliNB())
predict = classifier.classify_many(test)
# classifier.prob_classify_many()
return accuracy_score(tag_test, predict)
示例9: score
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def score(trainset, testset, classifier):
classifier = SklearnClassifier(classifier)
classifier._vectorizer.sort = False
classifier.train(trainset)
(test, tag_test) = zip(*testset)
pred = classifier.classify_many(test)
return accuracy_score(tag_test, pred)
示例10: coem
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def coem(L1, L2, U1, U2):
pipeline = Pipeline([('tfidf', TfidfTransformer()),
('chi2', SelectKBest(chi2, k=100)),
('nb', MultinomialNB())])
classifier1 = SklearnClassifier(pipeline)
classifier1.train(L1)
# Predict on U using 1st classifier
U1_labels = classifier1.classify_many(U1)
# Trained on A classifier.
# Now B will learn on L as well as A's labels on U
iterations = 0
while iterations < 25:
classifier2 = SklearnClassifier(pipeline)
# Add everything in L
L2_train = L2
# Add everything in U with labels from A
for i, sub_bow in enumerate(U2):
L2_train.append((sub_bow, U1_labels[i]))
classifier2.train(L2_train)
# Now, label U.
U2_labels = classifier2.classify_many(U2)
# Now, classifier 2 has finished labelling everything in U
# Classifer 1 starts again
# Again , add all mails in L
L1_train = L1
# Add all mails in U, but with labels from B. (U2)
for i, mail_bow in enumerate(U1):
L1_train.append((mail_bow, U2_labels[i]))
# Train it
classifier1 = SklearnClassifier(pipeline)
classifier1.train(L1_train)
U1_labels = classifier1.classify_many(U1)
#print U1_labels,U2_labels
print labels_find_intersection(U1_labels,U2_labels)
iterations += 1
return U1_labels
示例11: buildClassifier_score
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def buildClassifier_score(trainSet,devtestSet,classifier):
#print devtestSet
from nltk import compat
dev, tag_dev = zip(*devtestSet) #把开发测试集(已经经过特征化和赋予标签了)分为数据和标签
classifier = SklearnClassifier(classifier) #在nltk 中使用scikit-learn 的接口
#x,y in list(compat.izip(*trainSet))
classifier.train(trainSet) #训练分类器
#help('SklearnClassifier.batch_classify')
pred = classifier.classify_many(dev)#batch_classify(testSet) #对开发测试集的数据进行分类,给出预测的标签
return accuracy_score(tag_dev, pred) #对比分类预测结果和人工标注的正确结果,给出分类器准确度
示例12: performTestValidation
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def performTestValidation(trainset, testset, sklearnclassifier, uniqLabels):
classifier = SklearnClassifier(sklearnclassifier).train(trainset)
true = [label for features, label in testset]
predicted = classifier.classify_many([features for features, label in testset])
precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
accuracy = accuracy_score(true, predicted)
print("Test accuracy: {0:.3f}".format(accuracy))
measures = {label: (precision, recall, fscore) for label, precision, recall, fscore in zip(uniqLabels, precisions, recalls, fscores)}
for label, (prec, recall, fscore) in measures.items():
print("Precision for {0}: {1:.3f}".format(label, prec))
print("Recall for {0}: {1:.3f}".format(label, recall))
print("F score for {0}: {1:.3f}".format(label, fscore))
示例13: train_model
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def train_model(classifier, name, printout = False):
classifier = SklearnClassifier(classifier)
classifier.train(trainData)
#predict = classifier.classify_many(validSam)
predict = classifier.classify_many(testSam)
accuracy = accuracy_score(testTag, predict)
if printout:
print '*******模型: %s的测试结果*********' % name
print '\n'
print '%s`s accuracy is %f' % (name, accuracy)
print '%s`s score report is \n' % name
print classification_report(testTag, predict)
print '%s`s confusion is \n' % name
print confusion_matrix(testTag, predict)
print '\n'
model_file = data_path + name + ".pkl"
pickle.dump(classifier, open(model_file, 'w'))
return accuracy
示例14: classifier_score
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def classifier_score(tp, classifier, train_list, test, test_tag):
'''
传入分类器进行分类
Output:pos_precision, pos_recall, accuracy_score
'''
starttime = datetime.datetime.now()
classifier = SklearnClassifier(classifier)
classifier.train(train_list)
iohelper.save_objects2pickle(classifier, './Reviews/' + tp + '.pkl')
pred = classifier.classify_many(test) # 返回的是结果集的list
y_true = [1 if tag == 'pos' else 0 for tag in test_tag]
y_pred = [1 if tag == 'pos' else 0 for tag in pred]
pos_precision = precision_score(y_true, y_pred)
pos_recall = recall_score(y_true, y_pred)
endtime = datetime.datetime.now()
interval = (endtime - starttime).microseconds
interval = interval / 100
return interval, pos_precision, pos_recall, accuracy_score(test_tag, pred)
示例15: learn_model
# 需要导入模块: from nltk.classify.scikitlearn import SklearnClassifier [as 别名]
# 或者: from nltk.classify.scikitlearn.SklearnClassifier import classify_many [as 别名]
def learn_model(data,target):
# preparing data for split validation. 60% training, 40% test
state=43#randrange(1,23432)+123
print "statue 6857"
print state
data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.20,random_state=state)
#classifier = BernoulliNB().fit(data_train,target_train)
stop_word_dict={}#build_stop_word_dict()
sentiment_dict={}#build_sentiment_dict()
global hinfo_dict
hinfo_dict=build_hinfo_dict(data,target)
#print stop_word_dict.keys()
raw_input("begin train")
train_feature=[]
test_feature=[]
for i in range(len(data_train)):
print i
d=data_train[i]
#d=jieba.cut(d, cut_all=False)
l=target_train[i]
tmp=[best_word_feats(d,stop_word_dict,sentiment_dict,hinfo_dict),l]
train_feature.append(tmp)
for i in range(len(data_test)):
print i
d=data_test[i]
#d=jieba.cut(d, cut_all=False)
l=target_test[i]
tmp=best_word_feats(d,stop_word_dict,sentiment_dict,hinfo_dict)
test_feature.append(tmp)
#BernoulliNB MultinomialNB LogisticRegression SVC LinearSVC
print "max_len %d"%(max_len)
print "min_len %d"%(min_len)
print "avg_len %d"%(sum/cnt)
print "BernoulliNB"
classifier = SklearnClassifier(BernoulliNB())
classifier.train(train_feature)
print "--------------"
print len(classifier._vectorizer.get_feature_names())
for f in classifier._vectorizer.get_feature_names():
print f.encode("utf-8")
predicted = classifier.classify_many(test_feature)
evaluate_model(target_test,predicted)
ids=range(len(data_test))
result=[]
for p in predicted:
if p =='positive':
result.append('1')
else:
result.append('-1')
save_predict(data_test, ids, result, "BernoulliNB.xml")
"""