当前位置: 首页>>代码示例>>Python>>正文


Python movie_reviews.fileids函数代码示例

本文整理汇总了Python中nltk.corpus.movie_reviews.fileids函数的典型用法代码示例。如果您正苦于以下问题:Python fileids函数的具体用法?Python fileids怎么用?Python fileids使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了fileids函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_data

def load_data():
   global posfeats,negfeats
   negids = movie_reviews.fileids('neg')
   posids = movie_reviews.fileids('pos')
   negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
   posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
   return
开发者ID:sjayakum,项目名称:sentiment-analysis,代码行数:7,代码来源:NaiveBayesArticle.py

示例2: prep_reviews_data

    def prep_reviews_data(self): # messy code to test classifier with movie reviews
        if not self.movie_review_data:
            print 'Preparing movie reviews...\n'
            from nltk.corpus import movie_reviews
            docs = [movie_reviews.raw(fileid) 
                    for category in movie_reviews.categories() 
                    for fileid in movie_reviews.fileids(category)]

            process = lambda x: 1 if x == 'pos' else -1
            labels = [process(category)
                    for category in movie_reviews.categories() 
                    for fileid in movie_reviews.fileids(category)]

            docs, labels = double_shuffle(docs, labels)
            training, testing = divide_list_by_ratio(docs)
            self.train_labs, self.test_labs = divide_list_by_ratio(labels)

            train_vecs = self.vectorizer.fit_transform(training)
            test_vecs = self.vectorizer.transform(testing)

            if isinstance(self.model, naive_bayes.GaussianNB):
                train_vecs = train_vecs.toarray()
                test_vecs = test_vecs.toarray()

            self.train_vecs = train_vecs
            self.test_vecs = test_vecs

            self.movie_review_data = True
            self.news_market_data = False
开发者ID:willpots,项目名称:stockrockanddropit,代码行数:29,代码来源:ham.py

示例3: category_by_movie

def category_by_movie():
    from nltk.corpus import movie_reviews as mr
    from nltk import FreqDist
    from nltk import NaiveBayesClassifier
    from nltk import classify
    from nltk.corpus import names
    from nltk.classify import apply_features
    import random

    documents = [(list(mr.words(f)), c) for c in mr.categories() for f in
mr.fileids(c)]
    random.shuffle(documents)

    all_words = FreqDist(w.lower() for w in mr.words())
    word_features = all_words.keys()[:2000]

    def document_features(document):
        document_words = set(document)
        features = {}
        for word in word_features:
            features['contains(%s)' % word] = (word in document_words)
        return features

    #print document_features(mr.words('pos/cv957_8737.txt'))
    #print documents[0]

    features = [(document_features(d), c) for (d, c) in documents]
    train_set, test_set = features[100:], features[:100]
    classifier = NaiveBayesClassifier.train(train_set)
    print classify.accuracy(classifier, train_set)
开发者ID:brenden17,项目名称:infinity,代码行数:30,代码来源:category_nltk.py

示例4: main

def main():
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')
    negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
    negcutoff = int(len(negfeats) * 3 / 4)
    poscutoff = int(len(posfeats) * 3 / 4)
    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
    classifier = NaiveBayesClassifier.train(trainfeats)

    with open("output.json") as fin:
        sid = SentimentIntensityAnalyzer()
        data = json.load(fin)
    for key in data:
        reviews = data[key]["reviews"]
        for i in range(len(reviews)):
            text = reviews[i]["review"]
            sentiment_dict = {'positive_probability':0, 'label':'', 'negative_probability':0}
            prob = classifier.prob_classify(word_feats(text.split(" ")))
            classification = classifier.classify(word_feats(text.split(" ")))
            sentiment_dict['positive_probability'] = prob.prob('pos')
            sentiment_dict['negative_probability'] = prob.prob('neg')
            sentiment_dict['label'] = classification
            reviews[i]["sentiment"] = sentiment_dict
        data[key]["reviews"] = reviews
    with open('out_with_sentiment.json', 'w') as outfile:
        json.dump(data, outfile)
开发者ID:bifft2,项目名称:cs410FinalProject,代码行数:28,代码来源:sentiment.py

示例5: train_with_movie_db

    def train_with_movie_db(self):
        """
        Training possible with movie reviews
        - this does not yield particularly good results
        """
        self.use_movie_reviews = True

        negids = movie_reviews.fileids('neg')
        posids = movie_reviews.fileids('pos')

        negfeats = [(self.feature_extraction_movie_reviews(movie_reviews.words(fileids=[f])),
                     "negative") for f in negids]
        posfeats = [(self.feature_extraction_movie_reviews(movie_reviews.words(fileids=[f])),
                     "positive") for f in posids]

        negcutoff = len(negfeats) * 3 / 4
        poscutoff = len(posfeats) * 3 / 4

        trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
        testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

        DLOG("train on %d instances, test on %d instances" % (len(trainfeats), len(testfeats)))

        self.classifier = NaiveBayesClassifier.train(trainfeats)

        DLOG("accuracy: " + str(util.accuracy(self.classifier, testfeats)))
        DLOG(self.classifier.show_most_informative_features())
开发者ID:maagaard,项目名称:dmup,代码行数:27,代码来源:sentimentanalyzer.py

示例6: evaluate_classifier

def evaluate_classifier(featx):
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')
 
    negfeats = [(featx(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(featx(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
 
    negcutoff = len(negfeats)*3/4
    poscutoff = len(posfeats)*3/4
 
    trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
    testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
 
    classifier = NaiveBayesClassifier.train(trainfeats)
    refsets = collections.defaultdict(set)
    testsets = collections.defaultdict(set)
 
    for i, (feats, label) in enumerate(testfeats):
            refsets[label].add(i)
            observed = classifier.classify(feats)
            testsets[observed].add(i)
 
    print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)
    print 'pos precision:', nltk.metrics.precision(refsets['pos'], testsets['pos'])
    print 'pos recall:', nltk.metrics.recall(refsets['pos'], testsets['pos'])
    print 'neg precision:', nltk.metrics.precision(refsets['neg'], testsets['neg'])
    print 'neg recall:', nltk.metrics.recall(refsets['neg'], testsets['neg'])
    classifier.show_most_informative_features()
开发者ID:zhougr1993,项目名称:Bayes_kick_momo_spam,代码行数:28,代码来源:test_sentiment.py

示例7: train

def train(test=False):

    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')


    negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
    posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]


    if(test):
        negcutoff = len(negfeats)*3/4
        poscutoff = len(posfeats)*3/4

        trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
        testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]

        print 'train on %d instances, test on %d instances' % (len(trainfeats), len(testfeats))

        classifier = NaiveBayesClassifier.train(trainfeats)
        print 'accuracy:', nltk.classify.util.accuracy(classifier, testfeats)

        classifier.show_most_informative_features()

    else:
        return NaiveBayesClassifier.train(negfeats+posfeats)
开发者ID:jnu,项目名称:texecutions,代码行数:26,代码来源:sentiment.py

示例8: median_approach

def median_approach(llimit,ulimit,isphrase,pathname):

    posmedlist=[]
    negmedlist=[]
    medians=[]

    lpcount=0
    totalcount=ulimit-llimit
    cnt_var=0
    print '\nNo of +ve reviews trained : '
    for fid in movie_reviews.fileids(categories=['pos'])[llimit:ulimit]:
        testmed=proximity_tagger.medianlist(movie_reviews.abspath(fid),isphrase,cnt_var,0,pathname)
        posmedlist.append(testmed)
        lpcount=lpcount+1
	cnt_var+=1
        print 'Training +ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%'

    lpcount=0
    cnt_var=0
    print '\nNo of -ve reviews trained : '
    for fid in movie_reviews.fileids(categories=['neg'])[llimit:ulimit]:
        testmed=proximity_tagger.medianlist(movie_reviews.abspath(fid),isphrase,cnt_var,1,pathname)
        negmedlist.append(testmed)
        lpcount=lpcount+1
	cnt_var+=1
        print 'Training -ve review ',lpcount,'.'*10,(float(lpcount)*100/float(totalcount)),'%'

    medians.append([numpy.median(x) for x in itertools.izip(*posmedlist)])
    medians.append([numpy.median(x) for x in itertools.izip(*negmedlist)])

    f = open('train_result\proximity_median_train_result_'+str(isphrase),'w')
    json.dump(medians,f)
    f.close()
开发者ID:nidhinbalakrishnan,项目名称:academic-project,代码行数:33,代码来源:review_train.py

示例9: maketrainset

 def maketrainset(movie_reviews, tokenizer, stemmer):
     negids = movie_reviews.fileids('neg')
     posids = movie_reviews.fileids('pos')
     negfeats = [(tokenizer(movie_reviews.words(fileids=[f]), stemmer), 'neg') for f in negids]
     posfeats = [(tokenizer(movie_reviews.words(fileids=[f]), stemmer), 'pos') for f in posids]
     trainfeats = negfeats + posfeats
     return trainfeats
开发者ID:askerry,项目名称:FGE_MISC,代码行数:7,代码来源:stimanalysisfuncs.py

示例10: evaluate_features

 def evaluate_features(self,feature_extractor, N):
     self.negative = movie_reviews.fileids('neg') #list of all names of the documents under neg folder
     self.positive = movie_reviews.fileids('pos') #list of all names of the documents under pos folder
     self.maintrain, self.maintest = self.stratifiedSplit(self.negative, self.positive, N)
     lst = []
     trainvocabulary = []
     for doc,lbl in self.maintrain:
         x = (feature_extractor(movie_reviews.words(fileids=[doc])),lbl)
         lst.append(x)
         trainvocabulary = trainvocabulary + x[0].keys()
     trainvocabulary = set(trainvocabulary)
     if q2_1.W == 0:
         q2_1.W = len(trainvocabulary)
     print "no. of features in train:", self.W
     nb = classifier.train(lst)
     self.testClassify = self.classifyTest(self.maintest, nb, feature_extractor)
     print "accuracy = ", accuracy(self.maintest, self.testClassify)
     print "Negative:"
     print "    precision = ", self.calcPrec('neg', self.maintest, self.testClassify)
     print "    recall = ", self.calcRecall('neg', self.maintest, self.testClassify)
     print "    f measure = ", self.calcFMeasur('neg', self.maintest, self.testClassify)
     print "Positive:"
     print "    precision = ", self.calcPrec('pos', self.maintest, self.testClassify)
     print "    recall = ", self.calcRecall('pos', self.maintest, self.testClassify)
     print "    f measure = ", self.calcFMeasur('pos', self.maintest, self.testClassify)
     nb.show_most_informative_features()
     return nb
开发者ID:atiassa,项目名称:recommend-2011,代码行数:27,代码来源:q2_1.py

示例11: main

def main(argv):
    negids = movie_reviews.fileids('neg')
    posids = movie_reviews.fileids('pos')

    #print negids
 
    negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'negative') for f in negids]
    posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'positive') for f in posids]

    trainfeats =  posfeats+negfeats
    #print trainfeats
    #    break
    classifier = NaiveBayesClassifier.train(trainfeats)

    #classifier = pickle.load(open("classifier.p", "rb"))
    topicList = ["media", "sports", "news", "fashion", "finance", "politics"]
    for line in sys.stdin:
        try:
            tolk_posset = word_tokenize(line.rstrip())
            d = word_feats(tolk_posset)
            for topic in topicList:
                subjectFull = subj(line, topic)
                if not subjectFull == "No match":
                    #print d
                    print "LongValueSum:" + "" + str(line.split(":")[0])+","+subjectFull + "," + classifier.classify(d) + "\t" + "1"                    
        except:
                #print "Error"
                continue
开发者ID:BhavdeepSethi,项目名称:cloudBigData,代码行数:28,代码来源:sentiment.py

示例12: main

def main():
	negids = movie_reviews.fileids('neg')
	posids = movie_reviews.fileids('pos')

	to_review1 = "A man with a magnanimous spirit helps a mute girl from Pakistan return home."
	to_review2 = "Forced out of his own company by former Darren Cross, Dr. Hank Pym (Michael Douglas) recruits the talents of Scott Lang (Paul Rudd), a master thief just released from prison. Lang becomes Ant-Man, trained by Pym and armed with a suit that allows him to shrink in size, possess superhuman strength and control an army of ants. The miniature hero must use his new skills to prevent Cross, also known as Yellowjacket, from perfecting the same technology and using it as a weapon for evil."
	to_review3 = '''Parents need to know that kids may clamor to see this fast-paced, action-packed comic book-based adventure. But it's definitely more age-appropriate for teens than younger children. Although much of the violence is clearly meant to be based in the realm of sci-fi and fantasy -- and/or is shown at a distance -- there's plenty of it, from massive explosions to children held at gunpoint to super-powered fistfights. Some of the violence is war themed, and some characters get hurt and/or die. While much is made of lead character Tony Stark's devil-may-care lifestyle of fun and frolic, viewers also see him turn away from the more irresponsible aspects of playboyhood. Language is minimal, and sexual content is more suggested than shown overall -- though there are a few eyebrow-raising moments.'''
	reviews = []
	reviews.append(to_review1)
	reviews.append(to_review2)
	reviews.append(to_review3)

	for to_review in reviews:
		to_review_words = to_review.split(" ")
		print "Reviewing",to_review,"\n\n\n"


		print ''' Normal classification ''',"\n\n"
		negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
		posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
		calculateScore(classification(negfeats, posfeats, 1, 1), to_review_words)
		calculateScore(classification(negfeats, posfeats, 1, 0.95), to_review_words)
		calculateScore(classification(negfeats, posfeats, 0.95, 1), to_review_words)
		calculateScore(classification(negfeats, posfeats, 0.9, 1), to_review_words)
		calculateScore(classification(negfeats, posfeats, 1, 0.9), to_review_words)

		print ''' Without Punctuations ''',"\n\n"
		negfeats_stopwords = [(word_feats_punctuations(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
		posfeats_stopwords = [(word_feats_punctuations(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
		calculateScore_punctuations(classification(negfeats, posfeats, 1, 1), to_review_words)
		calculateScore_punctuations(classification(negfeats, posfeats, 1, 0.95), to_review_words)
		calculateScore_punctuations(classification(negfeats, posfeats, 0.95, 1), to_review_words)
		calculateScore_punctuations(classification(negfeats, posfeats, 0.9, 1), to_review_words)
		calculateScore_punctuations(classification(negfeats, posfeats, 1, 0.9), to_review_words)



		print ''' Without Stop Words ''',"\n\n"
		negfeats_stopwords = [(word_feats_stopwords(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
		posfeats_stopwords = [(word_feats_stopwords(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
		wordstoreview = []
		for each in to_review_words:
			if each not in stopwords.words('english'):
				wordstoreview.append(each)
		calculateScore_stopwords(classification(negfeats, posfeats, 1, 1), wordstoreview)
		calculateScore_stopwords(classification(negfeats, posfeats, 1, 0.95), to_review_words)
		calculateScore_stopwords(classification(negfeats, posfeats, 0.95, 1), to_review_words)
		calculateScore_stopwords(classification(negfeats, posfeats, 0.9, 1), to_review_words)
		calculateScore_stopwords(classification(negfeats, posfeats, 1, 0.9), to_review_words)


		print ''' With Lemmatizer ''',"\n\n"
		negfeats_stopwords = [(word_feats_lemmatize(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
		posfeats_stopwords = [(word_feats_lemmatize(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
		calculateScore_lemmatizer(classification(negfeats, posfeats, 1, 1), to_review_words)
		calculateScore_lemmatizer(classification(negfeats, posfeats, 1, 0.95), to_review_words)
		calculateScore_lemmatizer(classification(negfeats, posfeats, 0.95, 1), to_review_words)
		calculateScore_lemmatizer(classification(negfeats, posfeats, 0.9, 1), to_review_words)
		calculateScore_lemmatizer(classification(negfeats, posfeats, 1, 0.9), to_review_words)
开发者ID:saransh2405,项目名称:sentiment-Analysis-using-Maximum-Entropy-Classification,代码行数:59,代码来源:maxent.py

示例13: __init__

 def __init__(self, train1=True, train2=True, train3=True, train4=True):
     self.trainfeats = []        
     
     if train1:
         negids = movie_reviews.fileids('neg')
         posids = movie_reviews.fileids('pos')
          
         neg_movies = [(self.word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
         pos_movies = [(self.word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
         
         self.trainfeats = neg_movies + pos_movies
     
     if train2:
         f = open("out.txt", "r")
         
         negfeats = []
         posfeats = []
         for line in f:
             status = line[0]
             texto = line[2:]
 
             if status == '0':
                 negfeats.append((self.word_feats(texto.split(" ")), 'neg'))
             elif status == '1':
                 posfeats.append((self.word_feats(texto.split(" ")), 'pos'))               
     
         self.trainfeats += negfeats + posfeats
     
     if train3:    
         f = open("E:\\Workspace\\WS_TG\\analisador1\\AFINN\\AFINN-111.txt", 'r')
         for l in f:
             data = l.strip().split('\t')
             self.trainfeats.append( (self.word_feats(data[0]), 'neg' if int(data[1]) < 0 else 'pos'))
             
     if train4:
         f = open("E:\\Workspace\\WS_TG\\api\\trainning set.txt", 'r')
         pos = []
         neutral = []
         neg = []
         for line in f:
             if line.startswith("pos"):
                 pos.append(line)
             elif line.startswith("neutral"):
                 neutral.append(line)
             elif line.startswith("neg"):
                 neg.append(line)
                 
         print len(pos), len(neutral), len(neg)
         
         total = pos + neutral[:200] + neg
         
         for line in total:
             data = line.split(' .:. ')
             self.trainfeats.append( (self.word_feats(data[1].split()), data[0]) )
                    
     self.classifier = NaiveBayesClassifier.train(self.trainfeats)
     
     print self.classifier.show_most_informative_features(20)
开发者ID:phslfo,项目名称:TGSAT,代码行数:58,代码来源:analisador.py

示例14: setup_demo

def setup_demo(lower):
    print 'running movie reviews demo. data dir: ', nltk_movie_reviews_data_root
    negative_reviews = map (lambda x: nltk_movie_reviews_data_root + x, movie_reviews.fileids('neg'))
    positive_reviews = map (lambda x: nltk_movie_reviews_data_root + x, movie_reviews.fileids('pos'))
    pos = create_corpus_from_file_list(negative_reviews, "negative", None, None, lower) 
    neg = create_corpus_from_file_list(positive_reviews, "positive", None, None, lower)         
    pos_bigrams = create_corpus_from_file_list(negative_reviews, "negative", None, None, lower, wordlist_to_bigrams_dict) 
    neg_bigrams = create_corpus_from_file_list(positive_reviews, "positive", None, None, lower, wordlist_to_bigrams_dict)         
    return (pos, neg, pos_bigrams, neg_bigrams)
开发者ID:gleicon,项目名称:sentiment_analysis,代码行数:9,代码来源:demo_movie_reviews.py

示例15: __init__

 def __init__(self, load = False, loadFile = ""):
     if(load):
         self.loadClassifier(loadFile)
     else:
         negids = movie_reviews.fileids('neg')
         posids = movie_reviews.fileids('pos')
         negfeats = [(self.word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in movie_reviews.fileids('neg')]
         posfeats = [(self.word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in movie_reviews.fileids('pos')]
         trainfeats = negfeats + posfeats
         self.classifier = NaiveBayesClassifier.train(trainfeats)
开发者ID:rzsun,项目名称:Enquire,代码行数:10,代码来源:sentclassifier.py


注:本文中的nltk.corpus.movie_reviews.fileids函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。