本文整理汇总了Python中nltk.classify.util.accuracy函数的典型用法代码示例。如果您正苦于以下问题:Python accuracy函数的具体用法?Python accuracy怎么用?Python accuracy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了accuracy函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: classify
def classify(inputdir):
#filenames = os.listdir('d:\\shir\\')
filenames = os.listdir(inputdir)
feat_set = []
sets = []
for name in filenames:
# print name
lineno=0
path = os.path.join(inputdir, name)
sense = name.split('\\')[-1].split('.')[0]
print 'training', sense
file = codecs.open(path, 'r', 'utf-8')
allwords = []
for line in file:
if len(line.split())>2:
lineno+=1
line = line.strip()
words=[]
tags=[]
tokens = line.split()
for item in tokens:
if len(item.split('\\'))==2:
word=item.split('\\')[0]
tag= item.split('\\')[1]
words.append(word)
tags.append(tag)
allwords.append(word)
feat_set.append((bag_of_words(line),sense))
#feat_set.append((get_feature2(line),sense))
else:
words=[]
tags=[]
file.close()
random.shuffle(feat_set)
random.shuffle(feat_set)
#random.shuffle(feat_set)
train_data = train_feats(feat_set)
test_data = test_feats(feat_set)
#classifier= MaxentClassifier.train(train_data)
nb_classifier = NaiveBayesClassifier.train(train_data)
dt_classifier = DecisionTreeClassifier.train(train_data, entropy_cutoff=0.8, depth_cutoff=5, support_cutoff=30)
# pickle.dump(classifier, classifier_save_file)
entropy_classifier = MaxentClassifier.train(train_data,algorithm='iis', trace=0, max_iter=1, min_lldelta=0.5)
print "nb accuracy "+ str(accuracy(nb_classifier, test_data) * 100)
print "dt accuracy "+ str(accuracy(dt_classifier, test_data) * 100)
print "entropy accuracy "+ str(accuracy(entropy_classifier, test_data) * 100)
mv_classifier = MaxVoteClassifier(nb_classifier, dt_classifier, entropy_classifier)
print "max vote accuracy "+ str(accuracy(mv_classifier, test_data) * 100)
示例2: train
def train(self):
print 'Classifier Training in progress....'
poscutoff = len(self.positiveFeatures)
negcutoff = len(self.negativeFeatures)
print "Train Pos Cutoff: " + str(poscutoff) + " Train Neg Cutoff: " + str(negcutoff)
trainfeats = self.positiveFeatures[:poscutoff] + self.negativeFeatures[:negcutoff]
testfeats = self.test()
print 'Train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))
self.classifier = NaiveBayesClassifier.train(trainfeats)
print 'accuracy:', accuracy(self.classifier, testfeats)
refsets = collections.defaultdict(set)
testsets = collections.defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = self.classifier.classify(feats)
#print label, observed
testsets[observed].add(i)
print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
print 'pos F-measure:', nltk.metrics.f_measure(refsets['pos'], testsets['pos'])
print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
print 'neg F-measure:', nltk.metrics.f_measure(refsets['neg'], testsets['neg'])
示例3: model_test
def model_test(classifier, test_features):
print('Model Accuracy: {0}'.format(accuracy(classifier, test_features)))
precisions, recalls, f_measure, conf_matrix = get_precision_recall_fmeasure_conf_matrix(classifier, test_features)
print('Precisions: {0}'.format(precisions))
print('Recalls: {0}'.format(recalls))
print('F-Measure: {0}'.format(f_measure))
print('Confusion Matrix: {0}'.format(conf_matrix))
示例4: main
def main():
'''
Main function of the boilerplate code is the entry point of the 'chitragoopt' executable script (defined in setup.py).
Use doctests, those are very helpful.
>>> main()
Hello
>>> 2 + 2
4
'''
lfeats = label_feats_from_corpus(movie_reviews)
train_feats, test_feats = split_label_feats(lfeats, split=0.75)
train_feats, test_feats = split_label_feats(lfeats, split=0.75)
# nb_classifier = NaiveBayesClassifier.train(train_feats)
print(sys.argv[1].split())
negfeat = bag_of_words(sys.argv[1].split())
f = open('my_classifier.pickle')
nb_classifier = pickle.load(f)
f.close()
print(accuracy(nb_classifier, test_feats))
print(nb_classifier.classify(negfeat))
for x in range(0, 50):
print(nb_classifier.classify(negfeat))
示例5: train_with_movie_db
def train_with_movie_db(self):
"""
Training possible with movie reviews
- this does not yield particularly good results
"""
self.use_movie_reviews = True
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
negfeats = [(self.feature_extraction_movie_reviews(movie_reviews.words(fileids=[f])),
"negative") for f in negids]
posfeats = [(self.feature_extraction_movie_reviews(movie_reviews.words(fileids=[f])),
"positive") for f in posids]
negcutoff = len(negfeats) * 3 / 4
poscutoff = len(posfeats) * 3 / 4
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
DLOG("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats)))
self.classifier = NaiveBayesClassifier.train(trainfeats)
DLOG("accuracy: " + str(util.accuracy(self.classifier, testfeats)))
DLOG(self.classifier.show_most_informative_features())
示例6: train
def train(cleanedDataCollection, tagPool):
posSamples = []
negSamples = []
featuresets = [(extractFeatures(d,tagPool), c) for (d,c) in cleanedDataCollection]
for sample in featuresets:
if sample[1] == "trash":
negSamples.append(sample)
else:
posSamples.append(sample)
train_set = negSamples[10:]+posSamples[10:]
test_set = negSamples[:10]+posSamples[:10]
# classifier = nltk.NaiveBayesClassifier.train(train_set)
# print(nltk.classify.accuracy(classifier, test_set))
# classifier.show_most_informative_features(5)
# return classifier
sk_classifier = SklearnClassifier(MultinomialNB())
sk_classifier.train(train_set)
print "accuracy is: %s" % (accuracy(sk_classifier, test_set))
precision, recall, fMeasure = precision_recall_fmeasure(sk_classifier, test_set, "useful")
print "precision is: %s" % (precision)
print "recall is: %s" % (recall)
print "F-measure is: %s" % (fMeasure)
return sk_classifier
示例7: rte_classifier
def rte_classifier(algorithm):
from nltk.corpus import rte as rte_corpus
train_set = rte_corpus.pairs(['rte1_dev.xml', 'rte2_dev.xml', 'rte3_dev.xml'])
test_set = rte_corpus.pairs(['rte1_test.xml', 'rte2_test.xml', 'rte3_test.xml'])
featurized_train_set = rte_featurize(train_set)
featurized_test_set = rte_featurize(test_set)
# Train the classifier
print('Training classifier...')
if algorithm in ['megam', 'BFGS']: # MEGAM based algorithms.
# Ensure that MEGAM is configured first.
check_megam_config()
clf = lambda x: MaxentClassifier.train(featurized_train_set, algorithm)
elif algorithm in ['GIS', 'IIS']: # Use default GIS/IIS MaxEnt algorithm
clf = MaxentClassifier.train(featurized_train_set, algorithm)
else:
err_msg = str(
"RTEClassifier only supports these algorithms:\n "
"'megam', 'BFGS', 'GIS', 'IIS'.\n"
)
raise Exception(err_msg)
print('Testing classifier...')
acc = accuracy(clf, featurized_test_set)
print('Accuracy: %6.4f' % acc)
return clf
示例8: test
def test(self):
if not hasattr(self, 'test_sets'): self._split_features()
if not hasattr(self, 'classifiers'): self.train()
result = {}
for tag, classifier in self.classifiers.iteritems():
result[tag] = accuracy(classifier, self.test_sets[tag])
return result
示例9: results
def results(train, query_data, query_no_label, query_labels):
print '\nCalculating final results...'
megam_classifier = MaxentClassifier.train(train, 'megam') # build and train the maxent classifier
accu = accuracy(megam_classifier, query_data) # calculate the classification accuracy
predicted = megam_classifier.classify_many(query_no_label) # get a list of predicted labels
cm = confusion_matrix(query_labels, predicted) # build confusion matrix
return accu, cm
示例10: implementMethods
def implementMethods(self,sents,labelsData,clsent):
labelwords=[]
k=0
cl=self.featureList(sents,labelsData)
tr,te=self.setSplit(cl)
nb_classifier = NaiveBayesClassifier.train(tr)
print('Accuracy = '+str(accuracy(nb_classifier, te)*100)+'%')
return nb_classifier
示例11: RunBayesNetwork
def RunBayesNetwork(self, type_of_Feature_extractor):
#Bayes Network classifier, return accuracy
if type_of_Feature_extractor == 1:
#Format the positive and negative separately
formatted_pos_training = BNFormat.format_data(self.pos_training_data, "pos", BNFormat.Feature_extractor1)
formatted_neg_training = BNFormat.format_data(self.neg_training_data, "neg", BNFormat.Feature_extractor1)
#Same again but for the testing data
formatted_pos_testing = BNFormat.format_data(self.pos_testing_data, "pos", BNFormat.Feature_extractor1)
formatted_neg_testing = BNFormat.format_data(self.neg_testing_data, "neg", BNFormat.Feature_extractor1)
elif type_of_Feature_extractor == 2:
#Format the positive and negative separately
formatted_pos_training = BNFormat.format_data(self.pos_training_data, "pos", BNFormat.Feature_extractor2)
formatted_neg_training = BNFormat.format_data(self.neg_training_data, "neg", BNFormat.Feature_extractor2)
#Same again but for the testing data
formatted_pos_testing = BNFormat.format_data(self.pos_testing_data, "pos", BNFormat.Feature_extractor2)
formatted_neg_testing = BNFormat.format_data(self.neg_testing_data, "neg", BNFormat.Feature_extractor2)
elif type_of_Feature_extractor == 3:
#Format the positive and negative separately
formatted_pos_training = BNFormat.format_data(self.pos_training_data, "pos", BNFormat.Feature_extractor3)
formatted_neg_training = BNFormat.format_data(self.neg_training_data, "neg", BNFormat.Feature_extractor3)
#Same again but for the testing data
formatted_pos_testing = BNFormat.format_data(self.pos_testing_data, "pos", BNFormat.Feature_extractor3)
formatted_neg_testing = BNFormat.format_data(self.neg_testing_data, "neg", BNFormat.Feature_extractor3)
elif type_of_Feature_extractor == 4:
#Format the positive and negative separately
formatted_pos_training = BNFormat.format_data(self.pos_training_data, "pos", BNFormat.Feature_extractor4)
formatted_neg_training = BNFormat.format_data(self.neg_training_data, "neg", BNFormat.Feature_extractor4)
#Same again but for the testing data
formatted_pos_testing = BNFormat.format_data(self.pos_testing_data, "pos", BNFormat.Feature_extractor4)
formatted_neg_testing = BNFormat.format_data(self.neg_testing_data, "neg", BNFormat.Feature_extractor4)
else:
#Format the positive and negative separately
formatted_pos_training = BNFormat.format_data(self.pos_training_data, "pos")
formatted_neg_training = BNFormat.format_data(self.neg_training_data, "neg")
#Same again but for the testing data
formatted_pos_testing = BNFormat.format_data(self.pos_testing_data, "pos")
formatted_neg_testing = BNFormat.format_data(self.neg_testing_data, "neg")
#Combine them
formatted_training_data = formatted_pos_training + formatted_neg_training
#Combine them
formatted_testing_data = formatted_pos_testing + formatted_neg_testing
#Train on a list of reviews
nb_classifier = NaiveBayesClassifier.train(formatted_training_data)
#Print the features that the NB classifier found to be most important in making classifications
nb_classifier.show_most_informative_features()
#Test on another list of reviews
accuracy_ = accuracy(nb_classifier, formatted_testing_data)
return accuracy_
示例12: evaluate_features
def evaluate_features(feature_extractor, N, only_acc=False):
from nltk.corpus import movie_reviews
from nltk.classify import NaiveBayesClassifier as naive
from nltk.classify.util import accuracy
from nltk.metrics import precision, recall, f_measure
from sys import stdout
negative = movie_reviews.fileids('neg')
positive = movie_reviews.fileids('pos')
negfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
'neg') for f in negative]
posfeats = [(feature_extractor(movie_reviews.sents(fileids=[f])),
'pos') for f in positive]
negtrain, negtest = stratifiedSamples(negfeats, N)
postrain, postest = stratifiedSamples(posfeats, N)
trainfeats = negtrain + postrain
testfeats = negtest + postest
classifier = naive.train(trainfeats)
if only_acc: return accuracy(classifier, testfeats)
print 'accuracy: {}'.format(accuracy(classifier, testfeats))
# Precision, Recall, F-measure
from collections import defaultdict
refsets = defaultdict(set)
testsets = defaultdict(set)
for i, (feats, label) in enumerate(testfeats):
refsets[label].add(i)
observed = classifier.classify(feats)
testsets[observed].add(i)
print 'pos precision:', precision(refsets['pos'], testsets['pos'])
print 'pos recall:', recall(refsets['pos'], testsets['pos'])
print 'pos F-measure:', f_measure(refsets['pos'], testsets['pos'])
print 'neg precision:', precision(refsets['neg'], testsets['neg'])
print 'neg recall:', recall(refsets['neg'], testsets['neg'])
print 'neg F-measure:', f_measure(refsets['neg'], testsets['neg'])
stdout.flush()
classifier.show_most_informative_features()
return classifier
示例13: dt_classify
def dt_classify(filename):
raw_sample_stream = get_samples_stream(filename)
all_samples = list( binary_bow_feature(raw_sample_stream) )
# filter out two classes of outliers
# these two categories contain too few examples, so the word frequency in these two categories
# cannot reflect the true probability
# all_samples = [(features,aspect) for features,aspect in all_samples if aspect != common.AspectNothing and aspect != common.AspectBusiness]
test_sample_ratio = 0.25
train_samples,test_samples = split_samples(all_samples,test_sample_ratio)
print "training set has {} samples, test set has {} samples".format(len(train_samples),len(test_samples))
classifier = DecisionTreeClassifier.train(train_samples,binary=True, depth_cutoff=15,verbose=True)
print "training completes"
print "training accuracy: {}".format(accuracy(classifier,train_samples))
print "test accuracy: {}".format(accuracy(classifier,test_samples))
return classifier
示例14: cross_validate
def cross_validate():
training_set = load_training_set()
random.shuffle(training_set)
average = 0
cv = KFold(len(training_set), n_folds=10, indices=True, shuffle=False, random_state=None)
for traincv, evalcv in cv:
classifier = NaiveBayesClassifier.train(training_set[traincv[0]:traincv[len(traincv) - 1]])
acc = accuracy(classifier, training_set[evalcv[0]:evalcv[len(evalcv) - 1]])
print 'Range: ', evalcv[0], 'to', evalcv[len(evalcv) - 1]
print 'Accuracy: %4.2f' % acc
average += acc
print 'Average accuracy: %4.2f' % (average / 10)
示例15: run_classifier_tests
def run_classifier_tests(classifier):
testfiles = [{'traffic': 'traffic-training.txt'},
{'useless': 'useless-training.txt'}]
testfeats = []
for file in testfiles:
for sense, loc in file.iteritems():
for line in open(loc, 'r'):
testfeats = testfeats + create_training_dict(line, sense)
acc = accuracy(classifier, testfeats) * 100
print 'accuracy: %.2f%%' % acc
sys.exit()