当前位置: 首页>>代码示例>>Python>>正文


Python FreqDist.items方法代码示例

本文整理汇总了Python中nltk.probability.FreqDist.items方法的典型用法代码示例。如果您正苦于以下问题:Python FreqDist.items方法的具体用法?Python FreqDist.items怎么用?Python FreqDist.items使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.probability.FreqDist的用法示例。


在下文中一共展示了FreqDist.items方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: summarize

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def summarize(self, input, num_sentences ):
                s=[]
                punt_list=['.',',','!','?']
                summ_sentences = []
                sentences=input
                #sentences = sent_tokenize(input)
                lowercase_sentences =[sentence.lower() 
                        for sentence in sentences]
                #print lowercase_sentences
                saito=' '.join(sentences)
                s=input
                ts=''.join([ o for o in s if not o in  punt_list ]).split()
                lowercase_words=[word.lower() for word in ts]
                words = [word for word in lowercase_words if word not in stopwords.words()]
                word_frequencies = FreqDist(words)
                
                most_frequent_words = [pair[0] for pair in 
                        word_frequencies.items()[:100]]

                # add sentences with the most frequent words
                if(len(s) < num_sentences):
                    num_sentences=len(s)
                for word in most_frequent_words:
                        for i in range(len(lowercase_sentences)):
                            if len(summ_sentences) < num_sentences:
                                        if (lowercase_sentences[i] not in summ_sentences and word in lowercase_sentences[i]):
                                                summ_sentences.append(lowercase_sentences[i])
                            else:
								break
                        if len(summ_sentences) >= num_sentences:
                             break  
                        
                # reorder the selected sentences
                summ_sentences.sort( lambda s1, s2: saito.find(s1) - saito.find(s2) )
                return summ_sentences
开发者ID:benjbigot,项目名称:BNN_WIN,代码行数:37,代码来源:naivesumm.py

示例2: create_word_bigram_scores

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def create_word_bigram_scores(posWords, negWords, n = 5000):
    # (posWords,negWords) = readwordarr()
    posWords = list(itertools.chain(*posWords))
    negWords = list(itertools.chain(*negWords))
    bigramfinder = BigramCollocationFinder.from_words(posWords)
    posbigrams = bigramfinder.nbest(BigramAssocMeasures.chi_sq, n)
    bigramfinder = BigramCollocationFinder.from_words(negWords)
    negbigrams = bigramfinder.nbest(BigramAssocMeasures.chi_sq, n)
    posWords = posWords + posbigrams
    negWords = negWords + negbigrams
    wordscores = {}
    wordfd = FreqDist()
    conditionwordfd = ConditionalFreqDist()
    for word in posWords:
        wordfd[word]+=1
        conditionwordfd['pos'][word]+=1
        
    for word in negWords:
        wordfd[word]+=1
        conditionwordfd['neg'][word]+=1
    
    pos_word_count = conditionwordfd['pos'].N()
    neg_word_count = conditionwordfd['neg'].N()
    totalcount = pos_word_count + neg_word_count
    for word,freq in wordfd.items():
        pos_score = BigramAssocMeasures.chi_sq(conditionwordfd['pos'][word], (freq, pos_word_count), totalcount)
        neg_score = BigramAssocMeasures.chi_sq(conditionwordfd['neg'][word], (freq, neg_word_count), totalcount)
        wordscores[word] = pos_score + neg_score
    return wordscores
开发者ID:eleanordong,项目名称:datamining,代码行数:31,代码来源:sentimentexample.py

示例3: get_summarized

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
	def get_summarized(self, inputContent, num_sentences ):
	
		base_words = [word.lower()
			for word in nltk.word_tokenize(inputContent)]
		words = [word for word in base_words if word not in stopwords.words()]
		word_frequencies = FreqDist(words)
		
		most_frequent_words = [pair[0] for pair in
			word_frequencies.items()]
		
		
		sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
		actual_sentences = sent_detector.tokenize(inputContent)
		working_sentences = [sentence.lower()
			for sentence in actual_sentences]

		
		output_sentences = []

		for word in most_frequent_words:
			for i in range(0, len(working_sentences)):
				if (word in working_sentences[i]
				  and actual_sentences[i] not in output_sentences):
					output_sentences.append(actual_sentences[i])
					break
				if len(output_sentences) >= num_sentences: break
			if len(output_sentences) >= num_sentences: break
			
		
		return output_sentences
开发者ID:Ameya-SK,项目名称:Comment-Summarizer,代码行数:32,代码来源:summarizer.py

示例4: get_negative_grams

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def get_negative_grams(filePath,n):
	l = list()
	#Open the file and write on it the result
	with codecs.open(filePath,'r') as myfile:
		sentence=myfile.read()
		sentence=sentence.replace('points forts', ' ')
		sentence=sentence.replace('points faibles', ' ')
		sentence=sentence.replace('commentaires', ' ')

	n_grams = ngrams(sentence.split(), n)
	s=''
	for grams in n_grams:
		if('est pas' in grams or 'ai pas' in grams or 'pas' in grams or 'cher' in grams):
			s+=str(grams)+'\n'
			l.append(grams)

	'''fe = open('negative-'+str(n)+'-gram.txt', 'w')
	fe.write(s)
	fe.close()'''

	Dict = FreqDist(l)
	Dict = sorted(Dict.items(), key=operator.itemgetter(1), reverse=True)

	t=''
	for x in Dict:
		t+= '(\''+str(x[0])+'\' , ' +str(x[1])+')\n'

	fe = open('stats/Freq_negative-'+str(n)+'-gram.txt', 'w')
	fe.write(t)
	fe.close()
开发者ID:BelkhousNabil,项目名称:Projets-Informatiques,代码行数:32,代码来源:opinion_crawling.py

示例5: summarize

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
    def summarize(self, input, num_sentences):

        punt_list = [".", ",", "!", "?"]
        summ_sentences = []

        sentences = sent_tokenize(input)
        lowercase_sentences = [sentence.lower() for sentence in sentences]
        # print lowercase_sentences

        s = list(input)
        ts = "".join([o for o in s if not o in punt_list]).split()
        lowercase_words = [word.lower() for word in ts]
        words = [word for word in lowercase_words if word not in stopwords.words()]
        word_frequencies = FreqDist(words)

        most_frequent_words = [pair[0] for pair in word_frequencies.items()[:100]]

        # add sentences with the most frequent words
        for word in most_frequent_words:
            for i in range(0, len(lowercase_sentences)):
                if len(summ_sentences) < num_sentences:
                    if lowercase_sentences[i] not in summ_sentences and word in lowercase_sentences[i]:
                        summ_sentences.append(sentences[i])
                        break

            # reorder the selected sentences
        summ_sentences.sort(lambda s1, s2: input.find(s1) - input.find(s2))
        return " ".join(summ_sentences)
开发者ID:RobBlackwell,项目名称:literature-survey-tools,代码行数:30,代码来源:summarise.py

示例6: getBestWords

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def getBestWords(posWords, negWords):
    word_fd = FreqDist()
    label_word_fd = ConditionalFreqDist()

    for word in posWords:
        word_fd[word.lower()] += 1
        label_word_fd["pos"][word.lower()] += 1

    for word in negWords:
        word_fd[word.lower()] += 1
        label_word_fd["neg"][word.lower()] += 1

    pos_word_count = label_word_fd["pos"].N()
    neg_word_count = label_word_fd["neg"].N()
    total_word_count = pos_word_count + neg_word_count

    word_scores = {}

    for word, freq in word_fd.items():
        pos_score = BigramAssocMeasures.chi_sq(label_word_fd["pos"][word], (freq, pos_word_count), total_word_count)
        neg_score = BigramAssocMeasures.chi_sq(label_word_fd["neg"][word], (freq, neg_word_count), total_word_count)
        word_scores[word] = pos_score + neg_score

    # best = sorted(word_scores.iteritems(), key=lambda (w,s): s, reverse=True)[:10000]
    sorted_x = sorted(word_scores.items(), key=operator.itemgetter(1), reverse=True)
    bestwords = set([w for w, s in sorted_x])

    return bestwords
开发者ID:dakshvar22,项目名称:DishingOut,代码行数:30,代码来源:sentimentTrainer.py

示例7: train_MLT

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
    def train_MLT(self, tagged_train_data, untagged_training_data):
        """
        Builds a most likely tag tagger from the given tagged training data as WORDS
        :param train_data:
        :return: model
        """
        # find the set of words
        words = set()
        for sent in untagged_training_data:
            for word in sent:
                words.add(word)
        # Define mlt_dict of format {word1:{(word1,tag1):count1, (word1, tag2):count2 ........},..........}
        mlt_dict = dict()
        # Initialize keys and values to it
        for word in words:
            mlt_dict[word] = dict()
        # Compute the freq dist of tagged words
        tagged_words_fdist = FreqDist(tagged_train_data)

        for tagged_word, count in tagged_words_fdist.items():
            (mlt_dict[tagged_word[0]])[tagged_word] = count

        # Update the dict to contain the most likely tag for each word
        #for word, inside_dict in mlt_dict.items():
        #   max_val = max(inside_dict.values())
        #    inside_dict =
        print("Training is done!")
        return mlt_dict
开发者ID:GaddipatiAsish,项目名称:Natural-Language-Processing,代码行数:30,代码来源:Q6_Part1.py

示例8: make_summary

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def make_summary( text):
	sent = []
	stemmed = []
	tokens = word_tokenize(text)
	sent = sent_tokenize(text)
	for token in tokens:
		if token in stopwords.words('english'):
			tokens.remove(token)
	stemmer = PorterStemmer()

	for token in tokens:
	 	stemmed.append(stemmer.stem(token))
#freq(stemmed)
	for word in stemmed:
		word.lower()
	word_freq = FreqDist(stemmed)

	most_freq_words = [pair[0] for pair in word_freq.items()[:60]]

	working_sent = [sentence.lower() for sentence in sent]

	out_sent = []

	for word in most_freq_words:
		for i in range(0,len(working_sent)):
			if (word in working_sent[i] and sent[i] not in out_sent):
				out_sent.append(sent[i])
				break
			if len(out_sent) >= 5:
			 	break
		
		if len(out_sent) >= 5:
			break

	return reorder(out_sent,text)
开发者ID:aigeano,项目名称:Summaly,代码行数:37,代码来源:summaly.py

示例9: high_words

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def high_words(posids, negids, cutoff, score_fn=BigramAssocMeasures.chi_sq, min_score=5):

    word_fd = FreqDist()
    label_word_fd = ConditionalFreqDist()
    pos = 0
    neg = 0
    for review in posids:
        pos += 1
        if (pos != cutoff):
            for word in review['text'].split(' '):
                word_fd.update(token_helpers.tokenize_simple(word))
                label_word_fd['pos'].update(token_helpers.tokenize_simple(word))
 
    for review in negids:
        neg += 1
        if (neg != cutoff):
            for word in review['text'].split(' '):
                word_fd.update(token_helpers.tokenize_simple(word))
                label_word_fd['neg'].update(token_helpers.tokenize_simple(word))
    
    pos_word_count = label_word_fd['pos'].N()
    neg_word_count = label_word_fd['neg'].N()
    total_word_count = pos_word_count + neg_word_count

    word_scores = {}
    for word, freq in word_fd.items():
        pos_score = BigramAssocMeasures.chi_sq(label_word_fd['pos'][word], (freq, pos_word_count), total_word_count)
        neg_score = BigramAssocMeasures.chi_sq(label_word_fd['neg'][word], (freq, neg_word_count), total_word_count)
        word_scores[word] = pos_score + neg_score
    best = sorted(word_scores.items(), key=itemgetter(1), reverse=True)[:10000]
    bestwords = set([w for w, s in best])
    return bestwords
    
    """
开发者ID:efrenaguilar95,项目名称:Yelp_Analyzer,代码行数:36,代码来源:classifiers.py

示例10: create_word_scores

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def create_word_scores():
    posWords = pickle.load(open('pos_review.pkl', 'rb'))
    negWords = pickle.load(open('neg_review.pkl', 'rb'))

    posWords = list(itertools.chain(*posWords))  # 把多维数组解链成一维数组
    negWords = list(itertools.chain(*negWords))  # 同理

    word_fd = FreqDist()  # 可统计所有词的词频
    cond_word_fd = ConditionalFreqDist()  # 可统计积极文本中的词频和消极文本中的词频
    for word in posWords:
        word_fd[word] += 1
        cond_word_fd['pos'][word] += 1
    for word in negWords:
        word_fd[word] += 1
        cond_word_fd['neg'][word] += 1

    pos_word_count = cond_word_fd['pos'].N()  # 积极词的数量
    neg_word_count = cond_word_fd['neg'].N()  # 消极词的数量
    total_word_count = pos_word_count + neg_word_count

    word_scores = {}
    for word, freq in word_fd.items():
        pos_score = BigramAssocMeasures.chi_sq(cond_word_fd['pos'][word], (freq, pos_word_count),
                                               total_word_count)  # 计算积极词的卡方统计量,这里也可以计算互信息等其它统计量
        neg_score = BigramAssocMeasures.chi_sq(cond_word_fd['neg'][word], (freq, neg_word_count),
                                               total_word_count)  # 同理
        word_scores[word] = pos_score + neg_score  # 一个词的信息量等于积极卡方统计量加上消极卡方统计量

    return word_scores  # 包括了每个词和这个词的信息量
开发者ID:WhiteDevilBan,项目名称:CommentCrawler,代码行数:31,代码来源:NltkUtil.py

示例11: create_word_bigram_scores

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def create_word_bigram_scores():
    posdata = pickle.load(open('pos_review.pkl', 'rb'))
    negdata = pickle.load(open('neg_review.pkl', 'rb'))

    posWords = list(itertools.chain(*posdata))
    negWords = list(itertools.chain(*negdata))

    bigram_finder_pos = BigramCollocationFinder.from_words(posWords)
    bigram_finder_neg = BigramCollocationFinder.from_words(negWords)
    posBigrams = bigram_finder_pos.nbest(BigramAssocMeasures.chi_sq, 5000)
    negBigrams = bigram_finder_neg.nbest(BigramAssocMeasures.chi_sq, 5000)

    pos = posWords + posBigrams  # 词和双词搭配
    neg = negWords + negBigrams

    word_fd = FreqDist()
    cond_word_fd = ConditionalFreqDist()
    for word in pos:
        word_fd[word] += 1
        cond_word_fd['pos'][word] += 1
    for word in neg:
        word_fd[word] += 1
        cond_word_fd['neg'][word] += 1

    pos_word_count = cond_word_fd['pos'].N()
    neg_word_count = cond_word_fd['neg'].N()
    total_word_count = pos_word_count + neg_word_count

    word_scores = {}
    for word, freq in word_fd.items():
        pos_score = BigramAssocMeasures.chi_sq(cond_word_fd['pos'][word], (freq, pos_word_count), total_word_count)
        neg_score = BigramAssocMeasures.chi_sq(cond_word_fd['neg'][word], (freq, neg_word_count), total_word_count)
        word_scores[word] = pos_score + neg_score

    return word_scores
开发者ID:WhiteDevilBan,项目名称:CommentCrawler,代码行数:37,代码来源:NltkUtil.py

示例12: get_words_frequency

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def get_words_frequency(string, top_values):
    """
    Gets the words frequency in a corpus
    :param string: corpus
    :param top_values: maximum of sorted values to return
    :return: list of frequencies of the word in there synset form
    """

    # import stop words from nltk corpus
    stop_words_en_nltk = list(stopwords.words('english'))

    # create additional stop words for puntuations and others
    stop_words_en_custom = ['.', ',', '\'', '!', '(', ')', ':', ';', '?', '--', '*', '[', ']', '``', str("''"),
                            '&', '\'ll', '\'ve', '\'s', '\'re', 'a', 'b', 'c',
                            'i', '\'i', 'this', 'n\'t', 'a', 'could', 'should', 'would', 'can', 'will', 'shall',
                            'there', 'it', 'also', 'in', 'the', 'many', 'by', 'an',
                            '1990s', 'the', '+', '-', '...', '=', '%', '#', '[hide]', '[edit]', '.jpg', '/',
                            'be.v.01', 'have.v.01', 'use.v.01', 'besides.r.02', 'analysis.n.01', 'categorization.n.03',
                            'vitamin_e.n.01', 'vitamin_c.n.01', 'include.v.01', 'such.s.01', 'many.a.01', 'order.n.01',
                            'episode.n.01', 'show.n.01', 'not.r.01', 'standard.n.01', 'survey.n.01', 'factor.n.01',
                            'first.a.01']
    until_number = 300
    stop_words_en_custom_numbers = []
    for value in [lambda i=i: i for i in range(until_number+1)]:
        stop_words_en_custom_numbers.append(str(value()))

    # add them together
    stop_words_en = stop_words_en_nltk + stop_words_en_custom + stop_words_en_custom_numbers

    words_list_tmp = word_tokenize(string.lower())
    words_list = []

    lemmatizer = WordNetLemmatizer()
    for word in nltk.pos_tag(words_list_tmp):
        tag = get_word_tag(word[1])
        if tag is not '':
            try:
                synset_word = wordnet.synsets(lemmatizer.lemmatize(word[0], pos=tag), pos=tag)[0]
                words_list.append(synset_word.name())
            except:
                pass

    processed_word_list = [word for word in words_list if word not in stop_words_en]

    text_obj = nltk.Text(processed_word_list)

    fd = FreqDist(text_obj)

    result = list(fd.items())

    if top_values is not 0:
        result.sort(key=lambda x: x[1], reverse=True)
        result = result[:top_values]
        return result

    else:
        return result
开发者ID:Rocla,项目名称:Emergency-Calls-Dispatcher,代码行数:59,代码来源:part_of_speech.py

示例13: probDist

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def probDist():

    ### files pointers to reading files
    f1 = open(os.path.join('allfiles', 'document01-finance.txt'), "r")
    f2 = open(os.path.join('allfiles', 'document02-finance.txt'), "r")
    f3 = open(os.path.join('allfiles', 'document03-finance.txt'), "r")
    f4 = open(os.path.join('allfiles', 'document04-ee.txt'), "r")
    f5 = open(os.path.join('allfiles', 'document05-ee.txt'), "r")
     
    ### read the file content
    line1 = f1.read()
    line2 = f2.read()
    line3 = f3.read()
    line4 = f4.read()
    line5 = f5.read()
    
    
    ### document01-finance.txt is the writer document and other files are 
    ### are reader files so we get the word list from the write document 
    words = line1.split()
    X_words = []
    
    ### create a dictionary to store the frequency of each term
    dict_x1 = {}
    
    ### using nltk calcuate frequency of each word
    unigramWordList = FreqDist(words)
    datalen = len(unigramWordList) ### total words in the document
    
    for k,v in unigramWordList.items():
        #print k,v
        X_words.append(k)
        dict_x1[k] = (v/float(datalen))
        pd_x1.append(v/float(datalen))
    #print X_words
    #print dict_x1
    #print pd_x1
    
    ### create probability distribution of all files
    for word in X_words:
        pd_x2.append( line2.count(word)/float(datalen) )
        pd_x3.append( line3.count(word)/float(datalen) )
        pd_x4.append( line4.count(word)/float(datalen) )
        pd_x5.append( line5.count(word)/float(datalen) )
        
    #print pd_x2
    #print pd_x3
        
    ### calculate total probability distribution across 3 files
    line_S = line1+line2+line3+line4+line5
    #print line_S
    
    for word in X_words:
        s.append( line_S.count(word)/float(datalen) )
    
    print s
开发者ID:UW-INFX575,项目名称:Kirtika_dhathathri,代码行数:58,代码来源:jargonDist.py

示例14: opinion_tokens_Fr

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def opinion_tokens_Fr(liste):
	#Creat the results floder in case it doesn't exist
	result = "stats"
	if not os.path.exists(result):
		os.mkdir(result,0777)

	i=0
	comments = ''
	while(i<len(liste)):
		comments+=liste[i]+'\n'
		i+=1

	comments=comments.lower()

	#Open the file and write on it the result
	f = open('opinions.txt', 'w')
	f.write(comments)
	f.close()

	w=['"','→','–','’','»','«',',','.','[',']','|','{','}',':',';','!','?','(',')','_','-','=','/',
	' qui ',' cette ',' mais ',' ou ',' où ',' et ',' donc ',' or ',' ni ',' car ',' la ',' là ',' le ',
	' les ',' de ',' des ',' du ',' tout ',' tous ',' toutes ',' que ',' comme ',' si ',' quand ',' je ',
	' tu ',' il ',' elle ',' nous ',' vous ',' ils ',' elles ',' un ',' une ',' au ',' aux ',' dans ',' ce '
	,' se ',' ces ',' ses ',' on ',' en ',' leur ',' leurs ',' a ',' à ',' pour ',' par ',' sous ',' sur ']

	#Open the file and write on it the result
	with codecs.open('opinions.txt','r') as myfile:
    	
		content=myfile.read()
		content=content.replace('points forts', ' ')
		content=content.replace('points faibles', ' ')
		content=content.replace('commentaires', ' ')
		
		# remove numeric forms
		content = ''.join([i for i in content if not i.isdigit()])
		while w:
			# remove conjuction, connectors, ...			
			content=content.replace(w.pop(0), ' ')

	content = content.split()

	tokenDict = FreqDist(content)
	tokenDict = sorted(tokenDict.items(), key=operator.itemgetter(1), reverse=True)

	s=''
	for x in tokenDict:
		s+= '(\''+x[0].decode('utf-8', 'ignore').encode('utf-8')+'\' , ' +str(x[1])+')\n'
	fe = open('stats/freq_tokens.txt', 'w')
	fe.write(s)
	fe.close()

	return tokenDict
开发者ID:BelkhousNabil,项目名称:Projets-Informatiques,代码行数:54,代码来源:opinion_crawling.py

示例15: main

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import items [as 别名]
def main():
	# Number of words to display
	count = 40

	# Open files as strings
	obama = open("obama.txt", "r").read()
	bush = open("bush.txt", "r").read()

	#Tokenize texts into words, then count frequencies for all words
	top_obama = FreqDist(word.lower() for word in word_tokenize(obama))
	top_bush = FreqDist(word.lower() for word in word_tokenize(bush))
	
	#Return top {count} most occurring words
	print "No stoplist".upper()
	print "Obama/2009\t".upper(), " ".join(item[0] for item in top_obama.items()[:count])
	print "Bush/2001\t".upper(), " ".join(item[0] for item in top_bush.items()[:count])

	#Return most occurring words that are not in the NLTK English stoplist
	print
	print "Stoplisted".upper()
	print "Obama/2009\t".upper(), " ".join([item[0] for item in top_obama.items() if not item[0] in stopwords.words('english')][:count])
	print "Bush/2001\t".upper(), " ".join([item[0] for item in top_bush.items() if not item[0] in stopwords.words('english')][:count])
开发者ID:organisciak,项目名称:field-exam,代码行数:24,代码来源:__init__.py


注:本文中的nltk.probability.FreqDist.items方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。