当前位置: 首页>>代码示例>>Python>>正文


Python NaiveBayesClassifier.train方法代码示例

本文整理汇总了Python中nltk.classify.NaiveBayesClassifier.train方法的典型用法代码示例。如果您正苦于以下问题:Python NaiveBayesClassifier.train方法的具体用法?Python NaiveBayesClassifier.train怎么用?Python NaiveBayesClassifier.train使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.classify.NaiveBayesClassifier的用法示例。


在下文中一共展示了NaiveBayesClassifier.train方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def train(test=False):

    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')


    negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]


    if(test):
        negcutoff = len(negfeats)*3/4
        poscutoff = len(posfeats)*3/4

        trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
        testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

        print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

        classifier = NaiveBayesClassifier.train(trainfeats)
        print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)

        classifier.show_most_informative_features()

    else:
        return NaiveBayesClassifier.train(negfeats+posfeats)
开发者ID:jnu,项目名称:texecutions,代码行数:28,代码来源:sentiment.py

示例2: train_and_show_results

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def train_and_show_results(pos, neg, pos_bigrams, neg_bigrams, pos_control, neg_control, pos_control_bigrams, neg_control_bigrams):
    if pos_control == None or neg_control == None or pos_control_bigrams == None or neg_control_bigrams == None:
        negcutoff = len(neg)*3/4
        poscutoff = len(pos)*3/4
        neg_bigrams_cutoff = len(neg_bigrams)*3/4
        pos_bigrams_cutoff = len(pos_bigrams)*3/4
        test_bag_of_words = neg[negcutoff:] + pos[poscutoff:]
        test_bigrams = neg_bigrams[neg_bigrams_cutoff:] + pos_bigrams[pos_bigrams_cutoff:]
        train_corpora_bag_of_words = neg[:negcutoff] + pos[:poscutoff]
        train_corpora_bigrams = neg_bigrams[:neg_bigrams_cutoff] + pos_bigrams[:pos_bigrams_cutoff]
    else:
        test_bag_of_words = neg_control + pos_control
        test_bigrams = neg_control_bigrams + pos_control_bigrams
        train_corpora_bag_of_words = neg+pos
        train_corpora_bigrams = neg_bigrams + pos_bigrams
    
    print "negative corpus: ", len(neg) 
    print "positive corpus: ", len(pos)

    if neg_control != None:
        print "negative test corpus: ", len(neg_control) 
        print "positive test corpus: ", len(pos_control)

    print 'bag of words and bigrams - Naive Bayes' 
    naive_bayes = NaiveBayesClassifier.train(train_corpora_bag_of_words)
    naive_bayes_bigrams = NaiveBayesClassifier.train(train_corpora_bigrams)
   
    save_dataset('naive_bayes.dat', naive_bayes)
    save_dataset('naive_bayes_bigrams.dat', naive_bayes_bigrams)
    
    print 'bag of words and bigrams - Maximum Entropy' 
    maximum_entropy = nltk.MaxentClassifier.train(train_corpora_bag_of_words, max_iter=2)
    maximum_entropy_bigrams = nltk.MaxentClassifier.train(train_corpora_bigrams, max_iter=2)
    
    save_dataset('maximum_entropy.dat', maximum_entropy)
    save_dataset('maximum_entropy_bigrams.dat', maximum_entropy_bigrams)

    print 'Naive Bayesian results'
    print 'bag of words' 
    print 'Accuracy:', nltk.classify.util.accuracy(naive_bayes, test_bag_of_words)
    naive_bayes.show_most_informative_features()  
    print_precision_recall(naive_bayes, test_bag_of_words) 


    print '\nbigrams'
    print 'Accuracy:', nltk.classify.util.accuracy(naive_bayes_bigrams, test_bigrams)
    naive_bayes_bigrams.show_most_informative_features()  
    print_precision_recall(naive_bayes_bigrams, test_bigrams) 

    print 'Maximum Entropy results'
    print 'bag of words' 
    print 'Accuracy:', nltk.classify.util.accuracy(maximum_entropy, test_bag_of_words)
    maximum_entropy.show_most_informative_features()  
    print_precision_recall(maximum_entropy, test_bag_of_words) 


    print '\nbigrams'
    print 'Accuracy:', nltk.classify.util.accuracy(maximum_entropy_bigrams, test_bigrams)
    maximum_entropy_bigrams.show_most_informative_features()  
    print_precision_recall(maximum_entropy_bigrams, test_bigrams) 
开发者ID:gleicon,项目名称:sentiment_analysis,代码行数:62,代码来源:train_classifier.py

示例3: evaluate_features

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def evaluate_features(feature_select):
    posFeatures = []
    negFeatures = []
    inposFeatures = []
    innegFeatures = []
	#http://stackoverflow.com/questions/367155/splitting-a-string-into-words-and-punctuation
	#breaks up the sentences into lists of individual words (as selected by the input mechanism) and appends 'pos' or 'neg' after each list
    with open(RT_POLARITY_POS_FILE, 'r') as posSentences:
        for i in posSentences:
            posWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            posWords = [feature_select(posWords), 'pos']
            posFeatures.append(posWords)
    with open(RT_POLARITY_NEG_FILE, 'r') as negSentences:
        for i in negSentences:
            negWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            negWords = [feature_select(negWords), 'neg']
            negFeatures.append(negWords)
    """
    with open(RT_INPUT_POS_FILE, 'r') as posSentences:
        for i in posSentences:
            inposWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            inposWords = [feature_select(inposWords), 'pos']
            inposFeatures.append(inposWords)
    """
    with open(RT_INPUT_NEG_FILE, 'r') as negSentences:
        for i in negSentences:
            innegWords = re.findall(r"[\w']+|[.,!?;]", i.rstrip())
            innegWords = [feature_select(innegWords), 'neg']
            innegFeatures.append(innegWords)
   
	#selects 3/4 of the features to be used for training and 1/4 to be used for testing
	#posCutoff = int(math.floor(len(posFeatures)*3/4))
	#negCutoff = int(math.floor(len(negFeatures)*3/4))
    trainFeatures = posFeatures + negFeatures
    testFeatures = innegFeatures #+ inposFeatures
      
    	#trains a Naive Bayes Classifier
    classifier = NaiveBayesClassifier.train(trainFeatures)	
    
    	#initiates referenceSets and testSets
    referenceSets = collections.defaultdict(set)
    testSets = collections.defaultdict(set)	

    fileOutput ={'key':[],'pos':[],'neg':[]}
	#puts correctly labeled sentences in referenceSets and the predictively labeled version in testsets
    for i, (features, label) in enumerate(testFeatures):
        #print features , label
        referenceSets[label].add(i)
        predicted = classifier.prob_classify(features)
        print "\n"
        fileOutput['key'].append(i)
        fileOutput['pos'].append(predicted.prob("pos"))
        fileOutput['neg'].append(predicted.prob("neg"))
        #posValues =  predicted.prob("pos") 
        #negValues = predicted.prob("neg") 
        fileOutput.values()
        testSets[predicted].add(i)
        #print i
        #print testSets[predicted]
    return fileOutput
开发者ID:hmanikfan,项目名称:ABCDEFG,代码行数:62,代码来源:ML_NaiveBayes_ProbabilisticClassifier.py

示例4: classify_and_evaluate

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def classify_and_evaluate(reviews, feature_extractor=word_feats):
    random.shuffle(reviews)

    pos_reviews = filter(lambda x: x['class'] == 'POSITIVE', reviews)
    neg_reviews = filter(lambda x: x['class'] == 'NEGATIVE', reviews)

    # get unique features
    pos_features = []
    neg_features = []
    for review in pos_reviews:
        split_reviews = review['text'].split(' ')
        split_reviews = [x for x in split_reviews if x]
        pos_features.append((feature_extractor(split_reviews), 'pos'))

    for review in neg_reviews:
        split_reviews = review['text'].split(' ')
        split_reviews = [x for x in split_reviews if x]
        neg_features.append((feature_extractor(split_reviews), 'neg'))

    # divide groups
    pos_offset = int(math.floor(len(pos_reviews) * 3 / 4))
    neg_offset = int(math.floor(len(neg_reviews) * 3 / 4))

    training = pos_features[:pos_offset] + neg_features[:neg_offset]
    testing = pos_features[pos_offset:] + neg_features[neg_offset:]

    # train classifier
    classifier = NaiveBayesClassifier.train(training)

    print 'treinada em %d reviews, testada em %d reviews' % (len(training), len(testing))
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testing)
    classifier.show_most_informative_features()
开发者ID:teago19,项目名称:sentimentAnalysis,代码行数:34,代码来源:classify.py

示例5: evaluate_classifier

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def evaluate_classifier(featx):
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')
 
    negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
 
    negcutoff = len(negfeats)*3/4
    poscutoff = len(posfeats)*3/4
 
    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
 
    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
 
    for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)
 
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
    print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
    print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
    classifier.show_most_informative_features()
开发者ID:zhougr1993,项目名称:Bayes_kick_momo_spam,代码行数:30,代码来源:test_sentiment.py

示例6: main

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def main():
 
#    vote_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.votes.txt'
#    votes = get_justice_votes(vote_file)
#    for v in votes: print(v, votes[v])
    
#    win_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.outcome.txt'
#    winners = get_winners(win_file)
#    for w in winners: print(w, winners[w])

    text_file = '/Users/nasrallah/Desktop/Insight/courtcast/data/supreme_court_dialogs_corpus_v1.01/supreme.conversations.txt'
    #text_file = '/Users/nasrallah/Desktop/some_text.txt'
    
    ## Extract the feature sets
    feature_sets = get_training_features(text_file)
    
    ## Shuffle the features to mix up pos and neg
    #random.shuffle(feature_sets)
    
    ## Separate into train and test sets 
    cutoff = int(len(feature_sets)*3/4)
    train_feature_sets = feature_sets[:cutoff]
    test_feature_sets = feature_sets[cutoff:]
    print('train on %d instances, test on %d instances' % (len(train_feature_sets), len(test_feature_sets)))
 
    classifier = NaiveBayesClassifier.train(train_feature_sets)
    print('accuracy:', nltk.classify.util.accuracy(classifier, test_feature_sets))
    classifier.show_most_informative_features()  
开发者ID:FrankYoshida,项目名称:CourtCast,代码行数:30,代码来源:training.py

示例7: evaluate_classifier

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def evaluate_classifier(featx):
    #negids = movie_reviews.fileids('neg')
    #posids = movie_reviews.fileids('pos')
    
    ##For Movie Review train:
    #negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    #posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
     
    ##For product reviews train:
    negfeats = [(featx([wrd for wrd in nltk.word_tokenize(con) if wrd not in stpwrds]), 'neg') for con in traincons]
    posfeats = [(featx([wrd for wrd in nltk.word_tokenize(pro) if wrd not in stpwrds]), 'pos') for pro in trainpros]
    
    negcutoff = len(negfeats)*3/4
    poscutoff = len(posfeats)*3/4
 
    trainfeats = negfeats[:] + posfeats[:]
    #trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
 
    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
 
    for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)
 
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
    print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
    print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
    classifier.show_most_informative_features()
    return classifier
开发者ID:pdk2015,项目名称:productReview,代码行数:37,代码来源:productReview.py

示例8: __init__

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
    def __init__(self):
        # neg_phrases = filter_negative_phrases(load_csv_sentences('thoughtsandfeelings.csv'))
        # pos_phrases = filter_positive_phrases(load_csv_sentences('spiritualforums.csv'))
        neg_file = open("neg_phrases.txt", "r")
        pos_file = open("pos_phrases.txt", "r")
        neg_phrases = neg_file.readlines()
        pos_phrases = pos_file.readlines()

        neg_phrases_tagged = []
        pos_phrases_tagged = []
        for phrase in neg_phrases:
            neg_phrases_tagged.append((word_feats(phrase.split()), 'suicidal'))
        for phrase in pos_phrases:
            pos_phrases_tagged.append((word_feats(phrase.split()), 'alright'))

        negcutoff = int(len(neg_phrases_tagged) * .8)
        poscutoff = int(len(pos_phrases_tagged) * .8)

        trainfeats = neg_phrases_tagged[:negcutoff] + pos_phrases_tagged[:poscutoff]
        testfeats = neg_phrases_tagged[negcutoff:] + pos_phrases_tagged[poscutoff:]
        print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

        self.classifier = NaiveBayesClassifier.train(trainfeats)
        print 'accuracy:', nltk.classify.util.accuracy(self.classifier, testfeats)
        self.classifier.show_most_informative_features()
开发者ID:amcnary,项目名称:cs294SuicideDetector,代码行数:27,代码来源:nltk_classify_bag_of_words.py

示例9: naiveBayes

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def naiveBayes(features_train, features_test):
	print 'train on %d instances, test on %d instances' % (len(features_train), len(features_test))
	classifier = NaiveBayesClassifier.train(features_train)
	print 'accuracy:', nltk.classify.util.accuracy(classifier, features_test)
	classifier.show_most_informative_features()	
	precisions, recalls = precision_recall(classifier, features_test)
	print "accuracy: ", precisions, "fitness: ", recalls
开发者ID:andylikescodes,项目名称:SentimentalAnalysis,代码行数:9,代码来源:Classifiers.py

示例10: classify

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
    def classify(self):
        # Classify

        articles = Article.objects.filter(entity=self.entity)

        def word_feats(body):
            words = body.split(" ")
            return dict([(word, True) for word in words])

        negids = articles.filter(score__lt=0)
        posids = articles.filter(score__gt=0)

        negfeats = [(word_feats(a.body), "neg") for a in negids]
        posfeats = [(word_feats(a.body), "pos") for a in posids]

        negcutoff = len(negfeats) * 3 / 4
        poscutoff = len(posfeats) * 3 / 4

        trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
        testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
        print "train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats))

        classifier = NaiveBayesClassifier.train(trainfeats)
        print "accuracy:", nltk.classify.util.accuracy(classifier, testfeats)
        classifier.show_most_informative_features()
开发者ID:funkotron,项目名称:sentience,代码行数:27,代码来源:spider.py

示例11: __init_naive_bayes

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
	def __init_naive_bayes(self):
		"""
		__init_naive_bayes(self):
		Gets the data from the positive, negative and neutral text files.
		Creates and trains the Naive Bayes classifier, using the data, so 
		that it can learn what constitutes a positive, negative or neutral tweet.
		"""
		
		try:
			pos_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_positive.txt")
			f = codecs.open(pos_file, mode="rU", encoding='utf-8')
			positive = [line.lower().replace("\n" , " ") for line in f]
			positive = "".join(word[:] for word in positive).split()
			f.close
		
			neu_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_neutral.txt")
			f = codecs.open(neu_file, mode="rU", encoding='utf-8')
			neutral = [line.lower().replace("\n" , " ") for line in f]
			neutral = "".join(word[:] for word in neutral).split()
			f.close
		
			neg_file = pjoin(sys.path[0], "sentiment_word_files", "tweets_negative.txt")
			f = codecs.open(neg_file, mode="rU", encoding='utf-8')
			negative = [line.lower().replace("\n" , " ") for line in f]
			negative = "".join(word[:] for word in negative).split()
			f.close
		
			posfeats = [(dict({word.lower() : True}), 'pos') for word in positive if self.__check_word(word)]
			neufeats = [(dict({word.lower() : True}), 'neu') for word in neutral if self.__check_word(word)]
			negfeats = [(dict({word.lower() : True}), 'neg') for word in negative if self.__check_word(word)]
		
			self.classifier = NaiveBayesClassifier.train( posfeats + neufeats + negfeats )
		
		except:
			raise Exception ("Unknown error in SentimentAnalyzer::__init_naive_bayes")
开发者ID:StrongBrain,项目名称:test_twitter,代码行数:37,代码来源:sentiment_analyzer.py

示例12: generate_sentiment_classifier

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def generate_sentiment_classifier(corpus, word_feats):
    negids = corpus.fileids('neg')
    posids = corpus.fileids('pos')
    negfeats = [(word_feats(corpus.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(word_feats(corpus.words(fileids=[f])), 'pos') for f in posids]

    negcutoff = len(negfeats)*3/4
    poscutoff = len(posfeats)*3/4

    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
    print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
 

    classifier = NaiveBayesClassifier.train(trainfeats)

    for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)



    print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
    print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
    print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
    classifier.show_most_informative_features()

    return classifier
开发者ID:rohankshir,项目名称:football,代码行数:36,代码来源:util.py

示例13: main

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def main():
    org_names = Org.objects.values_list('name', flat=True)

    users = User.objects.filter(likely_org=False)
    user_names = [user.get_name for user in users]
    # Exclude the users we know are orgs (exact same name). This mostly gets run the first time and for new users with org names
    non_org_user_names = set(user_names) - set(org_names)

    org_features = [(word_features(name), 'org') for name in org_names]
    user_features = [(word_features(name), 'user') for name in non_org_user_names]

    classifier = NaiveBayesClassifier.train(user_features + org_features)

    counter = 0

    likely_orgs = []

    for user in users:
        prediction = classifier.prob_classify(word_features(user.get_name))
        if prediction.max() == 'org':
            # Log probability ratio, so if P(org) == 2.4 and P(user) == 0.3 then log2(P(org)/P(user)) = log2(8.0) = 3.0
            ratio = math.log(((float(prediction.prob('org')) + NORMALIZING_CONST) / (float(prediction.prob('user')) + NORMALIZING_CONST)), 2)
            if ratio >= MIN_RATIO and user.likely_org == False and user.admin_classification != 'user':
                log.info('User ID %d with name "%s" is probably an org. Saving.' % (user.id, user.get_name))
                user.likely_org = True
                user.org_probability = ratio
                user.save()
                counter += 1

    log.info("Processed %d users with org-like names" % counter)
开发者ID:Bartelo,项目名称:openjumo,代码行数:32,代码来源:user_is_org.py

示例14: create_train_classifier

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def create_train_classifier():
    print "Recreating training classifier"
    corpus_dir = nltk.data.find(TRAIN_DATASET_LOC)
    train_data = nltk.corpus.CategorizedPlaintextCorpusReader(corpus_dir, fileids='.*\.txt',cat_pattern="(pos|neg)")
        

    negids_train = train_data.fileids('neg')
    posids_train = train_data.fileids('pos')
        
    # negids_movies = movie_reviews.fileids('neg')
    # posids_movies = movie_reviews.fileids('pos')

    negfeats = [(__word_feats_neg(train_data.words(fileids=[f])), 'neg') for f in negids_train]
    posfeats = [(__word_feats_pos(train_data.words(fileids=[f])), 'pos') for f in posids_train]

    # negfeats.extend([(__word_feats_neg(movie_reviews.words(fileids=[f])), 'neg') for f in negids_movies])
    # posfeats.extend([(__word_feats_pos(movie_reviews.words(fileids=[f])), 'pos') for f in posids_movies])

    trainfeats = negfeats + posfeats

    classifier = NaiveBayesClassifier.train(trainfeats)
    
    pos_file_name = 'pickles'+os.sep+'positive_train.pickle'
    neg_file_name = 'pickles'+os.sep+'negative_train.pickle'
    class_file_name = 'pickles'+os.sep+'nbClassifier.pickle'
    
    __write_file(pos_file_name,cPickle.dumps(posfeats))
    __write_file(neg_file_name,cPickle.dumps(negfeats))
    __write_file(class_file_name,cPickle.dumps(classifier))
    print "Done!"
开发者ID:Eman-Naguib,项目名称:TwitterSentiment,代码行数:32,代码来源:recreate_pickles.py

示例15: naivebayes

# 需要导入模块: from nltk.classify import NaiveBayesClassifier [as 别名]
# 或者: from nltk.classify.NaiveBayesClassifier import train [as 别名]
def naivebayes(trainfeats, testfeats):
	classifier = NaiveBayesClassifier.train(trainfeats)
	print "NaiveBayes output"
	print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

	print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
	print classifier.show_most_informative_features()
开发者ID:shachi04,项目名称:PSL_sentiment,代码行数:9,代码来源:baseline.py


注:本文中的nltk.classify.NaiveBayesClassifier.train方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。