本文整理汇总了Python中nltk.classify.scikitlearn.SklearnClassifier类的典型用法代码示例。如果您正苦于以下问题:Python SklearnClassifier类的具体用法?Python SklearnClassifier怎么用?Python SklearnClassifier使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SklearnClassifier类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classifier_for_lemma
def classifier_for_lemma(lemma, filenames):
# XXX: always doing non-null and Random Forest for initial version
classifier = SklearnClassifier(RandomForestClassifier(), sparse=False)
print("loading training data for", lemma)
load_training_for_word(lemma, filenames.bitextfn, filenames.alignfn,
filenames.annotatedfn)
training = trainingdata.trainingdata_for(lemma, nonnull=True)
print("got {0} instances for {1}".format(len(training), lemma))
# delete the sentences themselves; we have the instances
trainingdata.set_examples([], [])
trainingdata.set_sl_annotated([])
gc.collect()
if len(training) > (20 * 1000):
print("capping to 20k instances to fit in memory")
training = training[: 20 * 1000]
labels = set(label for (feat,label) in training)
print("loaded training data for", lemma)
if (not training) or len(labels) < 2:
return None
classifier.train(training)
return classifier
示例2: trainClassifiers
def trainClassifiers(tweets):
# Generate the training set
training_set = nltk.classify.util.apply_features(extract_features, tweets)
print("Training set created!")
# Train and save the Naive Bayes classifier to a file
NBClassifier = nltk.NaiveBayesClassifier.train(training_set)
f = open('data/trained_classifiers/NBClassifier.pickle', 'wb')
pickle.dump(NBClassifier, f, 1)
f.close()
print("NBClassifier Classifier Trained")
#Train linear SVC
linear_SVC_classifier = SklearnClassifier(LinearSVC())
linear_SVC_classifier.train(training_set)
# Train Max Entropy Classifier
# MaxEntClassifier = nltk.classify.maxent.MaxentClassifier.train(training_set, 'IIS', trace=2, \
# encoding=None, labels=None, sparse=True, gaussian_prior_sigma=0, max_iter = 5)
# f = open('data/trained_classifiers/MaxEntClassifier.pickle', 'wb')
# pickle.dump(MaxEntClassifier, f, 1)
# f.close()
# print("MaxEntClassifier Classifier Trained")
# return (training_set, NBClassifier, MaxEntClassifier)
return (training_set, NBClassifier, linear_SVC_classifier)
示例3: score
def score(trainset, testset, classifier):
classifier = SklearnClassifier(classifier)
classifier._vectorizer.sort = False
classifier.train(trainset)
(test, tag_test) = zip(*testset)
pred = classifier.classify_many(test)
return accuracy_score(tag_test, pred)
示例4: __init__
class SKClassifier:
classifier = None
def __init__(self, cls='SVC'):
self.classifier = SklearnClassifier({
'SVC': SVC(),
'LogisticRegression': LogisticRegression(),
'BernoulliNB': BernoulliNB()
}[cls])
if not self.classifier:
self.classifier = SklearnClassifier(SVC())
def train(self, trainset):
self.classifier.train(trainset)
def test(self, tagged, featuresets):
predict = self.classifier.classify_many(featuresets)
print predict
return accuracy_score(tagged, predict)
def classify(self, featureset):
return self.classifier.classify(featureset)
def classify_many(self, featuresets):
return self.classifier.classify_many(featuresets)
示例5: learn_model
def learn_model(data,target):
bestwords = best_of_words(data, target)
# preparing data for split validation. 80% training, 20% test
data_train,data_test,target_train,target_test = cross_validation.train_test_split(data,target,test_size=0.1,random_state=43)
#classifier = BernoulliNB().fit(data_train,target_train)
train_feature=[]
test_feature=[]
for i in range(len(data_train)):
d=data_train[i]
d=jieba.cut(d, cut_all=False)
l=target_train[i]
#tmp=[bigram(d),l]
tmp = [dict([(word, True) for word in d if word in bestwords]), l]
train_feature.append(tmp)
for i in range(len(data_test)):
d=data_test[i]
d=jieba.cut(d, cut_all=False)
l=target_test[i]
#tmp=bigram(d)
tmp = dict([(word, True) for word in d if word in bestwords])
test_feature.append(tmp)
classifier = SklearnClassifier(MultinomialNB())
classifier.train(train_feature)
predicted = classifier.classify_many(test_feature)
evaluate_model(target_test,predicted)
return classifier, bestwords
示例6: performCrossValidation
def performCrossValidation(featureset, labels, foldsCount, sklearnclassifier, uniqLabels):
accuracySum = 0.0
precisionSums = defaultdict(float)
recallSums = defaultdict(float)
fscoreSums = defaultdict(float)
crossValidationIterations = cross_validation.StratifiedKFold(labels, n_folds=foldsCount)
for train, test in crossValidationIterations:
trainset = [featureset[i] for i in train]
testset = [featureset[i] for i in test]
print("before train")
classifier = SklearnClassifier(sklearnclassifier).train(trainset)
true = [label for features, label in testset]
predicted = classifier.classify_many([features for features, label in testset])
precisions, recalls, fscores, support = precision_recall_fscore_support(true, predicted, pos_label=None, labels=uniqLabels)
accuracy = accuracy_score(true, predicted)
accuracySum += accuracy
for label, value in zip(uniqLabels, precisions):
precisionSums[label] += value
for label, value in zip(uniqLabels, recalls):
recallSums[label] += value
for label, value in zip(uniqLabels, fscores):
fscoreSums[label] += value
print("Average accurancy: {0:.3f}".format(accuracySum/foldsCount))
measures = {label: (sum/foldsCount, recallSums.get(label)/foldsCount, fscoreSums.get(label)/foldsCount) for label, sum in precisionSums.items()}
for label, (prec, recall, fscore) in measures.items():
print("Average precision for {0}: {1:.3f}".format(label, prec))
print("Average recall for {0}: {1:.3f}".format(label, recall))
print("Average f score for {0}: {1:.3f}".format(label, fscore))
示例7: svm
def svm(train_data,preprocessing=True):
training_data = []
for data in train_data:
training_data.append(preprocess(data[0],label=data[1]))
cl = SklearnClassifier(LinearSVC())
cl.train(training_data)
return cl
示例8: sentiment_classifier
def sentiment_classifier(debug):
# trainingfp = open('training.csv', 'rb')
train = pd.read_csv( 'training.csv', delimiter=',', quotechar='"', escapechar='\\',header=0 )
num_tweets = train['TweetText'].size
cleantweets = []
for i in xrange(0, num_tweets):
if debug and ( (i+1)%1000 == 0 ):
print "Tweet %d of %d\n" % ( i+1, num_tweets )
cleantweets.append((tweet_to_words(train['TweetText'][i]), train['Sentiment'][i]))
# vectorizer = CountVectorizer(analyzer = "word", \
# tokenizer = None, \
# preprocessor = None, \
# stop_words = None, \
# max_features = 5000)
# train_data_features = vectorizer.fit_transform([t for (t,_) in cleantweets])
# feature_labels = [(m,l) for ((f,l),m) in zip(cleantweets, train_data_features)]
# forest = RandomForestClassifier(n_estimators = sensitivity)
# forest = forest.fit(train_data_features, train['Sentiment'])
classif = SklearnClassifier(LinearSVC())
classif.train(cleantweets)
return (classif)
示例9: evaluate
def evaluate(classifier_alo):
classifier = SklearnClassifier(classifier_alo) #在nltk 中使用scikit-learn 的接口
classifier.train(trainFeatures) #训练分类器
referenceSets = collections.defaultdict(set)
testSets = collections.defaultdict(set)
i = 0
for item in testFeatures:
referenceSets[item[1]].add(i)
predicted = classifier.classify(item[0])
testSets[predicted].add(i)
i += 1
pos_pre = nltk.metrics.precision(referenceSets['pos'], testSets['pos'])
pos_recall = nltk.metrics.recall(referenceSets['pos'], testSets['pos'])
neg_pre = nltk.metrics.precision(referenceSets['neg'], testSets['neg'])
neg_recall = nltk.metrics.recall(referenceSets['neg'], testSets['neg'])
print (str('{0:.3f}'.format(float(pos_pre))) + " "
+str('{0:.3f}'.format(float(pos_recall))) + " "
+str('{0:.3f}'.format(float(neg_pre))) + " "
+str( '{0:.3f}'.format(float(neg_recall))) + " "
+str('{0:.3f}'.format(2*(float(pos_pre)*float(pos_recall)) / (float(pos_recall)+float(pos_pre)))) + " "
+str('{0:.3f}'.format(2*(float(neg_pre)*float(neg_recall)) / (float(neg_recall)+float(neg_pre)))))
示例10: chatBot
class chatBot(object):
def __init__(self):
self.posts = nltk.corpus.nps_chat.xml_posts()
self.categories = ['Emotion', 'ynQuestion', 'yAnswer', 'Continuer',
'whQuestion', 'System', 'Accept', 'Clarify', 'Emphasis',
'nAnswer', 'Greet', 'Statement', 'Reject', 'Bye', 'Other']
self.mapper = [0, 2, 6, 3, 11, 5, 8, 1, 8, 3, 10, 11, 13, 13, 13]
self.responses = {}
self.featuresets = []
self.train = []
self.test = []
self.testSet = []
self.testSetClass = []
self.classif = SklearnClassifier(LinearSVC())
for i in range(0, 15):
self.responses[i] = []
for post in self.posts:
self.featuresets.append((self.tokenize(post.text),self.categories.index(post.get('class'))))
self.temp = self.responses[self.categories.index(post.get('class'))]
self.temp.append(post.text)
def tokenize(self, sentence):
"""
Extracts a set of features from a message.
"""
features = {}
tokens = nltk.word_tokenize(sentence)
for t in tokens:
features['contains(%s)' % t.lower()] = True
return features
def talk(self):
while 1:
inp = raw_input("YOU: ")
features = self.tokenize(inp)
pp = self.classif.classify_many(features)
pp = pp[0]
pp = int(pp)
m = self.mapper[pp]
r = self.responses[m]
val = randint(0, len(r))
print("BOT: "+r[val])
def trainSet(self):
shuffle(self.featuresets)
size = int(len(self.featuresets) * .1) # 10% is used for the test set
self.train = self.featuresets[size:]
self.test = self.featuresets[:size]
self.classif.train(self.train)
self.testSet = []
self.testSetClass = []
for i in self.test:
self.testSet.append(i[0])
self.testSetClass.append(i[1])
self.batch = self.classif.classify_many(self.testSet)
def statistics(self):
print (classification_report(self.testSetClass, self.batch, labels=list(set(self.testSetClass)),target_names=self.categories))
示例11: train
def train(cleanedDataCollection, tagPool):
posSamples = []
negSamples = []
featuresets = [(extractFeatures(d,tagPool), c) for (d,c) in cleanedDataCollection]
for sample in featuresets:
if sample[1] == "trash":
negSamples.append(sample)
else:
posSamples.append(sample)
train_set = negSamples[10:]+posSamples[10:]
test_set = negSamples[:10]+posSamples[:10]
# classifier = nltk.NaiveBayesClassifier.train(train_set)
# print(nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
# return classifier
sk_classifier = SklearnClassifier(MultinomialNB())
sk_classifier.train(train_set)
print "accuracy is: %s" % (accuracy(sk_classifier, test_set))
precision, recall, fMeasure = precision_recall_fmeasure(sk_classifier, test_set, "useful")
print "precision is: %s" % (precision)
print "recall is: %s" % (recall)
print "F-measure is: %s" % (fMeasure)
return sk_classifier
示例12: main3
def main3():
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.svm import LinearSVC
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot
svm = SklearnClassifier(LinearSVC(loss="hinge"))
svm.train(trainData)
print("SVM: ", nltk.classify.accuracy(svm, testData))
results = svm.classify_many(item[0] for item in testData)
print(results)
from sklearn.metrics import classification_report
# getting a full report
print(classification_report(t_test_skl, results, labels=list(set(t_test_skl)), target_names=t_test_skl))
# Compute confusion matrix
import numpy as np
cmm = confusion_matrix([x[1] for x in testData], results)
print(cmm)
cmm = np.array(cmm, dtype = np.float)
print(cmm.shape)
#f=figure()
#ax = f.add_subplot(111)
#show()
#%pylab inline
# Show confusion matrix in a separate window
print(pyplot.imshow(cmm, interpolation='nearest'))
示例13: SVM
def SVM(training_set, test_set):
classifier = SklearnClassifier(LinearSVC())
print("Training a new SVM classifier")
classifier.train(training_set)
print("Accuracy of SVM in training:",nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
#print("Running new Decision Tree classifier")
accuracy = nltk.classify.accuracy(classifier, test_set)
trueLabels = [l for d, l in test_set]
predictedLabels = classifier.classify_many([d for d,t in test_set])
#print("Accuracy:",accuracy)
# classifier.show_most_informative_features(MIF)
def runTrained(test_set, hasTags=False):
#print("Running pre-trained Decision Tree classifier")
if hasTags:
tagglessTest_set = [data for data, tag in test_set]
acc = nltk.classify.accuracy(classifier, test_set)
print("Accuracy:", acc)
predictions = classifier.classify_many(tagglessTest_set)
return ([e for e in zip(tagglessTest_set, predictions)], acc)
else:
tagglessTest_set = test_set
predictions = classifier.classify_many(tagglessTest_set)
#print("Predicted Labels:",predictions)
return [e for e in zip(tagglessTest_set, predictions)]
return (runTrained, accuracy, predictedLabels, trueLabels)
示例14: clf_score
def clf_score(classifier):
classifier = SklearnClassifier(classifier)
classifier.train(train_set)
# nltk.classify.scikitlearn(BernoulliNB())
predict = classifier.classify_many(test)
# classifier.prob_classify_many()
return accuracy_score(tag_test, predict)
示例15: get_performance
def get_performance(clf_sel, train_features, test_features):
ref_set = collections.defaultdict(set)
test_set = collections.defaultdict(set)
classification_error = False
clf = SklearnClassifier(clf_sel)
try:
classifier = clf.train(train_features)
except:
classification_error = True
# print (str(clf_sel.__class__),'NA')
if str(clf_sel.__class__) == "<class 'sklearn.naive_bayes.MultinomialNB'>":
pickle_cls(classifier, 'MultinomialNB')
# print(str(clf_sel), 'accuracy:'(nltk.classify.accuracy(classifier, test_features)) * 100)
if not classification_error:
clf_acc = nltk.classify.accuracy(classifier, test_features)
for i, (features, label) in enumerate(test_features):
ref_set[label].add(i)
predicted = classifier.classify(features)
test_set[predicted].add(i)
pos_precision = precision(ref_set['pos'], test_set['pos'])
pos_recall = recall(ref_set['pos'], test_set['pos'])
neg_precision = precision(ref_set['neg'], test_set['neg'])
neg_recall = recall(ref_set['neg'], test_set['neg'])
print(
"{0},{1},{2},{3},{4},{5}".format(clf_sel.__class__, clf_acc, pos_precision, pos_recall, neg_precision,
neg_recall))