当前位置: 首页>>代码示例>>Python>>正文


Python classify.accuracy函数代码示例

本文整理汇总了Python中nltk.classify.accuracy函数的典型用法代码示例。如果您正苦于以下问题:Python accuracy函数的具体用法?Python accuracy怎么用?Python accuracy使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了accuracy函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: category_by_movie

def category_by_movie():
    from nltk.corpus import movie_reviews as mr
    from nltk import FreqDist
    from nltk import NaiveBayesClassifier
    from nltk import classify
    from nltk.corpus import names
    from nltk.classify import apply_features
    import random

    documents = [(list(mr.words(f)), c) for c in mr.categories() for f in
mr.fileids(c)]
    random.shuffle(documents)

    all_words = FreqDist(w.lower() for w in mr.words())
    word_features = all_words.keys()[:2000]

    def document_features(document):
        document_words = set(document)
        features = {}
        for word in word_features:
            features['contains(%s)' % word] = (word in document_words)
        return features

    #print document_features(mr.words('pos/cv957_8737.txt'))
    #print documents[0]

    features = [(document_features(d), c) for (d, c) in documents]
    train_set, test_set = features[100:], features[:100]
    classifier = NaiveBayesClassifier.train(train_set)
    print classify.accuracy(classifier, train_set)
开发者ID:brenden17,项目名称:infinity,代码行数:30,代码来源:category_nltk.py

示例2: cross_validate

def cross_validate(classifier, training_set, test_set):
    chosen_classif = classifier
    best_accuracy = 0.0
    best_train_accuracy = None
    best_classifier = None
    k_fold = cross_validation.KFold(len(training_set), n_folds=10)
    for train_indices, test_indices in k_fold:
        train = itemgetter(*train_indices)(training_set)
        test = itemgetter(*test_indices)(training_set)
        classifier = chosen_classif.train(train)
        print '--------------------------------'
        train_accuracy = classify.accuracy(classifier, train)
        print 'Training set accuracy:' + str(train_accuracy)
        if len(test_indices) == 1:
            test = (test,)
        accuracy = classify.accuracy(classifier, test)
        if accuracy > best_accuracy:
            best_classifier = classifier
            best_accuracy = accuracy
            best_train_accuracy = train_accuracy
        print 'Cross validation set accuracy: ' + str(accuracy)
        get_fscore(classifier, test)
    print 'Best classifier CV accuracy: ' + str(best_accuracy)
    test_accuracy = classify.accuracy(best_classifier, test_set)
    print 'Best classifier accuracy: ' + str(test_accuracy)
    print 'Best classifier precision recall fscore: '
    print get_fscore(best_classifier, test_set)
    return [test_accuracy, best_train_accuracy, best_classifier]
开发者ID:jedijulia,项目名称:nlp-tourism,代码行数:28,代码来源:sentiment_analyzer.py

示例3: evaluate

def evaluate(train_set, test_set, classifier, name):
	refsets = collections.defaultdict(set)
	testsets = collections.defaultdict(set)
	for i, (features, label) in enumerate(test_set):
		refsets[label].add(i)
		observed = classifier.classify(features)
		testsets[observed].add(i)
	# Get accuracy on training set, test set and get positive and negative recall.
	trainacc = 100 * classify.accuracy(classifier, train_set)
	testacc = 100 * classify.accuracy(classifier, test_set)
	spam_false = 100 - nltk.recall(refsets['spam'], testsets['spam'])*100
	ham_false = 100 - nltk.recall(refsets['ham'], testsets['ham'])*100
	return trainacc, testacc, spam_false, ham_false
开发者ID:Vermeij,项目名称:Spamfilter,代码行数:13,代码来源:evaluate.py

示例4: main

def main():
    spam = load_dataset('spam', sys.argv[1], True)
    ham = load_dataset('ham', sys.argv[2], True)
    training_spam = spam[:11500]
    training_ham = ham[:11500]
    test_spam = spam[1000:]
    test_ham = ham[1000:]

    nbc = NaiveBayesClassifier.train(training_ham + training_spam)
    cPickle.dump(nbc, sys.stdout)

    sys.stderr.writelines(['Spam accuracy: %f\n' % accuracy(nbc, test_spam),
                           'Ham accuracy: %f\n' % accuracy(nbc, test_ham)])
开发者ID:cscenter,项目名称:BuzzScore,代码行数:13,代码来源:classifier_trainer.py

示例5: classifier

def classifier(lambda_):
  clf = get_classifier('%f' % lambda_, __train_fs, lambda_)
  logging.debug("Finished training the classifier lambda=%f ..." % lambda_)
  dev_acc = accuracy(clf, __dev_fs)
  logging.debug("classifier lambda=%f accuracy on DEV is: %3.5f",
                lambda_, dev_acc)
  train_acc = accuracy(clf, __train_fs)
  logging.debug("classifier lambda=%f accuracy on TRAIN is: %3.5f",
                lambda_, train_acc)
#  clf.show_most_informative_features()
  result = [clf.classify(fs) for fs,label in __dev_fs]
  gold = [label for fs,label in __dev_fs]
  cm = nltk.ConfusionMatrix(gold, result)
  cf_text = cm.pp(sort_by_count=True, show_percents=True, truncate=20)
  return (lambda_, dev_acc, train_acc, cf_text, clf)
开发者ID:aboSamoor,项目名称:NLP,代码行数:15,代码来源:rami_learning.py

示例6: main_function

def main_function():
	conn = MySQLdb.connect(host=DATABASES['date_cutoff']['HOST'], 
			user=DATABASES['date_cutoff']['USER'], 
			passwd=DATABASES['date_cutoff']['PASSWORD'], 
			db=DATABASES['date_cutoff']['NAME'])

	training_tweets = classify.get_training_tweets(conn_analysis)
	training_feature_set = process_tweets(training_tweets)

	config_megam('/opt/packages')
	classifier = MaxentClassifier.train(training_feature_set, algorithm="megam", trace=0)

	error_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	count_dict = {'+':0, '-':0, 'I':0, 'O':0} 
	guess_dict = {'+':0, '-':0, 'I':0, 'O':0} 

	full_matrix = {'+':{'+':0, '-':0, 'I':0, 'O':0}, 
				'-':{'+':0, '-':0, 'I':0, 'O':0}, 
				'I':{'+':0, '-':0, 'I':0, 'O':0}, 
				'O':{'+':0, '-':0, 'I':0, 'O':0}}


	test_tweets = classify.get_test_tweets(conn_analysis)
	test_feature_set = process_tweets(test_tweets)

	classifier.show_most_informative_features(10)
	classifier_accuracy = accuracy(classifier, test_feature_set)
	print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:28,代码来源:max-ent-bigrams.py

示例7: main_function

def main_function():
	conn = MySQLdb.connect(host="localhost", user="root", passwd="tanzania", db="twitter_analysis")
	hq_conn = MySQLdb.connect(host="localhost", user="root", passwd="tanzania", db="twitter")

	training_tweets = get_test_tweets(conn)
	training_feature_set = process_tweets(training_tweets)

	classifier = DecisionTreeClassifier.train(training_feature_set)

	test_tweets = get_training_tweets(conn)
	test_feature_set = process_tweets(test_tweets)

	classifier_accuracy = accuracy(classifier, test_feature_set)

	alt_full_matrix = {'+':{'+':0, '-':0, 'E':0}, 
				'-':{'+':0, '-':0, 'E':0}, 
				'E':{'+':0, '-':0, 'E':0}}

	#for f in test_tweets:
	#f = test_tweets[0]

	#print f
	#guess = classifier.classify(process_tweet(f[1]))
	#print guess
	#	update_tweet_polarity(f[0], guess, conn)
	##	pl = classifier.prob_classify(process_tweet(f[1]))
	#	idx = f[2]
	#	if idx == 'I' or idx == 'O':
	#		idx = 'E'
	#	alt_full_matrix[idx][guess] += 1

	#print alt_full_matrix

	print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:7andrew7,项目名称:vaccine-sentiment,代码行数:34,代码来源:decision-tree.py

示例8: main_function

def main_function():
    conn = MySQLdb.connect(
        host=DATABASES["date_cutoff"]["HOST"],
        user=DATABASES["date_cutoff"]["USER"],
        passwd=DATABASES["date_cutoff"]["PASSWORD"],
        db=DATABASES["date_cutoff"]["NAME"],
    )

    training_tweets = get_training_tweets(conn)
    training_feature_set = classify.process_tweets(training_tweets)

    classifier = NaiveBayesClassifier.train(training_feature_set)

    error_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
    count_dict = {"+": 0, "-": 0, "I": 0, "O": 0}
    guess_dict = {"+": 0, "-": 0, "I": 0, "O": 0}

    full_matrix = {
        "+": {"+": 0, "-": 0, "I": 0, "O": 0},
        "-": {"+": 0, "-": 0, "I": 0, "O": 0},
        "I": {"+": 0, "-": 0, "I": 0, "O": 0},
        "O": {"+": 0, "-": 0, "I": 0, "O": 0},
    }

    count_table = {"+": 0, "-": 0, "I": 0, "O": 0}

    test_tweets = get_test_tweets(conn)
    test_feature_set = classify.process_tweets(test_tweets)

    classifier_accuracy = accuracy(classifier, test_feature_set)

    print count_table
    print "classifier accuracy: " + repr(classifier_accuracy)
开发者ID:khandelwal,项目名称:vaccine-sentiment,代码行数:33,代码来源:bayes-refactored.py

示例9: demo

def demo():
    def gender_features(word):
        return {"last_letter": word[-1], "penultimate_letter": word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names

    import random

    names = [(name, "male") for name in names.words("male.txt")] + [
        (name, "female") for name in names.words("female.txt")
    ]
    import random

    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n, g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print "--- nltk.classify.svm demo ---"
    print "Number of training examples:", len(train_set)
    classifier = SvmClassifier.train(train_set)
    print "Total SVM dimensions:", len(classifier._svmfeatureindex)
    print "Label mapping:", classifier._labelmapping
    print "--- Processing an example instance ---"
    print "Reference instance:", names[0]
    print "NLTK-format features:\n    " + str(test_set[0])
    print "SVMlight-format features:\n    " + str(
        map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex)
    )
    distr = classifier.prob_classify(test_set[0][0])
    print "Instance classification and confidence:", distr.max(), distr.prob(distr.max())
    print "--- Measuring classifier performance ---"
    print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:trunghlt,项目名称:nltk,代码行数:35,代码来源:svm.py

示例10: update_category_by_pos

def update_category_by_pos():
    from nltk.corpus import brown
    from nltk import NaiveBayesClassifier
    from nltk import classify
    from nltk.tag import untag
    from nltk import DecisionTreeClassifier

    def pos_features(sentence, i):
        features = {'suffix(1)':sentence[i][-1:],
                    'suffix(2)':sentence[i][-2:],
                    'suffix(3)':sentence[i][-3:]
                    }
        features['prev-word'] = '<start>' if i==0 else sentence[i-1]
        return features

    print pos_features(brown.sents()[0], 8)

    tagged_sents = brown.tagged_sents(categories='news')
    featuresets = []

    for tagged_sent in tagged_sents:
        untagged_sent = untag(tagged_sent)
        for i, (word, tag) in enumerate(tagged_sent):
            featuresets.append((pos_features(untagged_sent, i), tag))

    size = int(len(featuresets) * 0.1)
    train_set, test_set = featuresets[size:], featuresets[:size]
#    classifier = NaiveBayesClassifier.train(train_set)
    classifier = DecisionTreeClassifier.train(train_set)
    print 'NaiveBay %f' % classify.accuracy(classifier, test_set)
开发者ID:brenden17,项目名称:infinity,代码行数:30,代码来源:category_nltk.py

示例11: weka

def weka(train_set, test_set, algorithm="svm"):
    from nltk.classify import weka

    print "--- nltk.classify.weka %s ---" % algorithm
    temp_dir = tempfile.mkdtemp()
    classifier = nltk.classify.WekaClassifier.train(temp_dir + "/cred.model", train_set, algorithm)
    print "Overall accuracy:", accuracy(classifier, test_set)
开发者ID:stanvp,项目名称:webcredibility,代码行数:7,代码来源:classifier.py

示例12: category_by_pos

def category_by_pos():
    from nltk.corpus import brown
    from nltk import FreqDist
    from nltk import DecisionTreeClassifier
    from nltk import NaiveBayesClassifier
    from nltk import classify

    suffix_fdist = FreqDist()
    for word in brown.words():
        word = word.lower()
        suffix_fdist.inc(word[-1:])
        suffix_fdist.inc(word[-2:])
        suffix_fdist.inc(word[-3:])

    common_suffixes = suffix_fdist.keys()[:100]
#    print common_suffixes

    def pos_features(word):
        features = {}
        for suffix in common_suffixes:
            features['endswith(%s)' % suffix] = word.lower().endswith(suffix)
        return features

    tagged_words = brown.tagged_words(categories='news')
    featuresets = [(pos_features(n), g) for (n, g) in tagged_words]
    size = int(len(featuresets) * 0.1)
    train_set, test_set = featuresets[size:], featuresets[:size]
#    classifier = DecisionTreeClassifier.train(train_set)
#    print 'Decision Tree %f' % classify.accuracy(classifier, test_set)

    classifier = NaiveBayesClassifier.train(train_set)
    print 'NaiveBay %f' % classify.accuracy(classifier, test_set)
开发者ID:brenden17,项目名称:infinity,代码行数:32,代码来源:category_nltk.py

示例13: demo

def demo():

    def gender_features(word):
        return {'last_letter': word[-1], 'penultimate_letter': word[-2]}

    from nltk.classify import accuracy
    from nltk.corpus import names
    
    
    import random
    names = ([(name, 'male') for name in names.words('male.txt')] +
             [(name, 'female') for name in names.words('female.txt')])
    import random
    random.seed(60221023)
    random.shuffle(names)

    featuresets = [(gender_features(n), g) for (n,g) in names]
    train_set, test_set = featuresets[500:], featuresets[:500]

    print '--- nltk.classify.svm demo ---'
    print 'Number of training examples:', len(train_set)
    classifier = SvmClassifier.train(train_set)
    print 'Total SVM dimensions:', len(classifier._svmfeatureindex)
    print 'Label mapping:', classifier._labelmapping
    print '--- Processing an example instance ---'
    print 'Reference instance:', names[0]
    print 'NLTK-format features:\n    ' + str(test_set[0])
    print 'SVMlight-format features:\n    ' + str(map_instance_to_svm(test_set[0], classifier._labelmapping, classifier._svmfeatureindex))
    distr = classifier.prob_classify(test_set[0][0])
    print 'Instance classification and confidence:', distr.max(), distr.prob(distr.max())
    print '--- Measuring classifier performance ---'
    print 'Overall accuracy:', accuracy(classifier, test_set)
开发者ID:approximatelylinear,项目名称:nltk,代码行数:32,代码来源:svm.py

示例14: benchmarking

 def benchmarking(self, classifier,_test_set,all_f_measure=[],all_precision=[],all_recall=[]):
     from nltk import classify
     accuracy = classify.accuracy(classifier, _test_set)
     
     print("accuracy:",accuracy)
     
     from nltk.metrics import precision
     from nltk.metrics import recall
     from nltk.metrics import f_measure
     
     import collections
     refsets = collections.defaultdict(set)
     testsets = collections.defaultdict(set)
     for i, (feats, label) in enumerate(_test_set):
         refsets[label].add(i)
         observed = classifier.classify(feats)
         testsets[observed].add(i)
         
     prec=precision(refsets['class'], testsets['class'])
     rec=recall(refsets['class'], testsets['class'])
     f1=f_measure(refsets['class'], testsets['class'])
     print('precision:', prec)
     print('recall:', rec)
     print('F-measure:', f1)
             
     all_f_measure.append(f1)
     all_precision.append(prec)
     all_recall.append(rec)
     print('========Show top 10 most informative features========')
     classifier.show_most_informative_features(10)
开发者ID:jerrygaoLondon,项目名称:oke-extractor,代码行数:30,代码来源:okeConceptRecogniser.py

示例15: test_raw_mail

def test_raw_mail(org_email):

	features_test = {}
	wordtokens_test = [word_limit.lemmatize(key.lower()) for key in
	word_tokenize(org_email)]
	for key in wordtokens_test:
		if key not in stpwords:
			features_test[key] = True
	return features_test

	#Extracting the features(Tonenized, stemmed and non-stopwords emails) from all the emails
	feature_sets = [(raw_mail(n), g) for (n,g) in mail_shuffle]

	#Splitting the test and training data sets from the whole email set features
	size_feature = int(len(feature_sets) * 0.10)
	train_set, test_set = feature_sets[size_feature:], feature_sets[:size_feature]
	classifier = NaiveBayesClassifier.train(train_set)
	#print (test_set[1:5])

	#Printing the accuracy of the machine
	print ('accuracy of the machine: ', (classify.accuracy(classifier,test_set))*100) 
	
	#Printing the top 50 features
	classifier.show_most_informative_features(50) 

	#Printing the spam and ham labels
	print ('labels:',classifier.labels())

	#Classification of user entered email
	while(True):
		featset = raw_mail(input("Enter text to classify: "))
		print (classifier.classify(featset))
开发者ID:Pooshan,项目名称:Project__spam-and-ham-detection-using-natural-language-processing,代码行数:32,代码来源:NLP-spam-ham.py


注:本文中的nltk.classify.accuracy函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。