当前位置: 首页>>代码示例>>Python>>正文


Python FreqDist.plot方法代码示例

本文整理汇总了Python中nltk.probability.FreqDist.plot方法的典型用法代码示例。如果您正苦于以下问题:Python FreqDist.plot方法的具体用法?Python FreqDist.plot怎么用?Python FreqDist.plot使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.probability.FreqDist的用法示例。


在下文中一共展示了FreqDist.plot方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: fun10

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
def fun10():
    """frequency distribution"""
    fdist1 = FreqDist(text1)
    # print fdist1
    vocabulary1 = fdist1.keys()
    # print vocabulary1[:50]
    fdist1.plot(50, cumulative=True)
开发者ID:gree2,项目名称:hobby,代码行数:9,代码来源:ch01.py

示例2: main

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
def main():
    index = get_index("index.data")

    results = bfs('Obama', 'GAB', index)
    
    print_results(results)
    fdistAB = FreqDist([rel.A() for rel in results] + [rel.B() for rel in results])
    fdistAB.plot(10)
开发者ID:mattiskan,项目名称:spraktproj,代码行数:10,代码来源:bfs.py

示例3: main

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
def main():
        argparser = argparse.ArgumentParser(description='text file')
        argparser.add_argument('file', type=str, help='file to produce frequency distribution for')
        args = argparser.parse_args()
        
	#toker = WhitespaceTokenizer()

	f = open(args.file)
	text = f.read()
	print(text)
	fdist = FreqDist(text)
	print(fdist.freq('28') * 100)
	fdist.plot()
开发者ID:Argonaught,项目名称:playground,代码行数:15,代码来源:freq.py

示例4: testFunc

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
def testFunc():
    fw = open("./MZI/data.doc", "r", encoding="utf8");
    text = fw.read();
    tockens = getWordList(text)
    print(len(set(tockens)))
    from nltk.probability import FreqDist
    from nltk.util import bigrams
    fdist = FreqDist(w for w in tockens if len(w) > 1);
    fdist.tabulate(50);
    big = list(bigrams(w for w in tockens if len(w) > 1));
    print(big[:100]);
    fdist = FreqDist(str(w) for w in big);
    fdist.tabulate(10);
    fdist.plot(50)
开发者ID:olee12,项目名称:Stylogenetics,代码行数:16,代码来源:MakeNormalData.py

示例5: create_enhanced_dale_chall_list

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
    def create_enhanced_dale_chall_list(self):
        #list of sites used to create list of most frequent words 
        alexa_list = ['Google', 'Facebook', 'YouTube', 'Yahoo!', 'Wikipedia', 'Microsoft', 'Amazon', 'Twitter', 'LinkedIn', 'Wordpress', 'Ebay', 'Apple', 'Paypal', 'Imdb', 'Tumblr', 'Disney', 'BBC', 'Livejasmin', 'Craigslist', 'Ask']
    
        #bring all privacy texts into one list
        corpus = []
        data = get_all_policies()
        for site in data:
                if site in alexa_list:
                    corpus.append(data[site]["text"])
        
        #get the words of this list into a list of words
        t = textanalyzer("eng")
        words = t.getWords("".join(corpus))
        
        #open the dale chall wordlist        
        dale_chall_list = open('../nltk_contrib/dale_chall_wordlist.txt').read().split(';')
        
        #create a text that consists of the words of the 20 privacy policies and delete all words that are on the dale-chall list of easy words
        new_corpus = []
        
        for word in words:
            if word.lower() not in dale_chall_list and word not in alexa_list:
                new_corpus.append(word.lower())
        
        #create a frequency distribution of the words of this list of words
        fdist = FreqDist(new_corpus)
        #plot this
        fdist.plot(80, cumulative=True)
        
        #make a list of the words that make up 33% percent of the words that are not in the dale chall list (cummulative)
        most_frequ = []
        cum_percentage = 0.0
        for sample in fdist:
            cum_percentage += fdist.freq(sample)
            most_frequ.append(sample)
            if cum_percentage > 0.33:
                break

        #write those into a file
        privacy_file = open("privacy_wordlist.txt", "w")
        privacy_file.write(";".join(most_frequ))
开发者ID:Saragon87,项目名称:raTest,代码行数:44,代码来源:readabilityanalyzer.py

示例6: content_fraction

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
# def content_fraction(text):
#	stopwords = nltk.corpus.stopwords.words('spanish')
#	content = [w for w in text if w.lower() not in stopwords]
#	return len(content) / len(text)
# content_fraction(text)

# Step 4: stem words


# SOME INITIAL EXPLORATIONS OF THE TEXT

sorted(set(text))  	             # displays sorted unique words
fdist = FreqDist(text)           # creates a frequency distribution for words
vocabulary = fdist.keys()        # creates frequency distributions vocabularies
vocabulary[:50]                  # displays 50 most frequent words in text 
fdist.plot(50, cumulative=True)  # frequency distribution for 50 most frequent words 
text.collocations()              # common word collocations



# APPROACH 1: POINTWISE MUTUAL INFORMATION (PMI)

bigram_measures   = nltk.collocations.BigramAssocMeasures()
trigram_measures  = nltk.collocations.TrigramAssocMeasures()
#quadgram_measures = nltk.collocations.QuadgramAssocMeasures()


finder_bi   = BigramCollocationFinder.from_words(text)
finder_tri  = TrigramCollocationFinder.from_words(text)
finder_quad = QuadgramCollocationFinder.from_words(text)
开发者ID:marco-morales,项目名称:-YaMeCanse_PorEsoPropongo_analysis,代码行数:32,代码来源:extraction.py

示例7: open

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
content = open(file_name, 'rb').read()

cutedText = " ".join(jieba.cut(content))
#nltkText = nltk.corpus.gutenberg.raw(cutedText)
fd = FreqDist(cutedText)
items = fd.items()
print items[:30] 
#fd.plot()
#print cutedText
print dir(cutedText)
#print dir(nltkText)
print cutedText.count(u'ÃÏ¿Ì')

tags = jieba.analyse.extract_tags(content, topK=30)
fd = FreqDist(tags)
for keyword in tags:
    print "result of ",keyword
    count = cutedText.count(keyword)
    print count
    fd[keyword] = count
    #cutedText.split().concordance(keyword)

print fd

from pylab import *
mpl.rcParams['font.sans-serif'] = ['SimHei']
plt.xlabel(u'')
plt.ylabel(u'¥Œ ˝')
plt.title(u'')
fd.plot()
开发者ID:tonglanli,项目名称:jiebademo,代码行数:32,代码来源:extractkeycount.py

示例8: len

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
# Compute the Percentage of Hapax Legomena's Occurrences and the longest in them
hapax_legomenas = fdist.hapaxes() # Get the list of words that appeared just once in corpus
hapax_legomena_counts = len(hapax_legomenas) # Get the count of them
percentage_of_hapax_legomena = (hapax_legomena_counts/no_of_tokens)*100 # Compute percentage
print("Percentage of Hapax Legomena Occurrences", percentage_of_hapax_legomena)
max_len_happax_legomena = max([len(word) for word in hapax_legomenas])
print("Longest happax Legomena's are", [word for word in hapax_legomenas if len(word) == max_len_happax_legomena])

# Compute the Percentage of dis legomena Occurrences and the longest in them
dis_legomenas = [key for key, value in fdist.items() if value == 2] # Get the words that occurred just twice
dis_legomena_counts = len(dis_legomenas) * 2 # Get their counts
percentage_of_dis_legomena = (dis_legomena_counts/no_of_tokens)*100 # Compute percentage
print("Percentage of Dis Legomena Occurrences", percentage_of_dis_legomena)
max_len_dis_legomena = max([len(word) for word in dis_legomenas])
print("Longest Dis Legomena's are ", [word for word in dis_legomenas if len(word) == max_len_dis_legomena])

# Plot the r vs Nr graph
fdist.plot(50)

# Compute the log scaled version of r vs Nr
log_rvsNr = {log(key):log(value) for key, value in (fdist.r_Nr()).items() if value!=0}

# Plot the graph of log(r) vs log(Nr)
plot.plot(log_rvsNr.keys(), log_rvsNr.values(), 'r.')
plot.axis([-1, 11, -1, 11])
plot.xlabel('log(r)')
plot.ylabel('log(Nr)')
plot.title('log(r) vs log(Nr) Brown Corpus')
plot.show()

开发者ID:GaddipatiAsish,项目名称:Natural-Language-Processing,代码行数:31,代码来源:Ex3_part1.py

示例9: print

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]

#*******************************************************************************
# Question: Are there differences between word-length frequencies of converted
#      vs. unconverted requests?
# Answer: No
# Correlation Coefficient: 
#*******************************************************************************
print('Begin calculating word length frequencies...')
      
cnvtText = ' '.join([item['request_text'] for item in data
                     if len(item['request_text'])>0
                     and item['requester_received_pizza']==1])
wl1 = [len(word) for word in nltk.word_tokenize(cnvtText) if word.isalpha()]
wl1fd = FreqDist(wl1)
if graphs == 'yes': wl1fd.plot()
## 4, 3, 2, 5, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 18
print('...Word length frequencies for successful requests have been plotted.')

uncnvtText = ' '.join([item['request_text'] for item in data
                     if len(item['request_text'])>0
                     and item['requester_received_pizza']==0])
wl2 = [len(word) for word in nltk.word_tokenize(uncnvtText) if word.isalpha()]
wl2fd = FreqDist(wl2)
if graphs == 'yes': wl2fd.plot()
## 4, 3, 2, 5, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 17, 35, 20
print('...Word length frequencies for unsuccessful requests have been plotted.')



#*******************************************************************************
开发者ID:Sandy4321,项目名称:Exploratory-Data-Analysis,代码行数:32,代码来源:RandomActsOfPizzaEDA.py

示例10: PlaintextCorpusReader

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
# -*- coding: utf-8

from nltk.probability import FreqDist
from nltk.corpus import PlaintextCorpusReader
from nltk.tokenize import LineTokenizer

FIRST = 0
END = 150

corpus_root = './data'
fileids = 'data_title_sample'

wordlists = PlaintextCorpusReader(corpus_root,
    fileids,
    sent_tokenizer=LineTokenizer(),
    encoding='utf-8')

tokens = []
for word in wordlists.words() :
  try :
    tokens += [ word.lower() ]
  except :
    pass

fdist = FreqDist(tokens)

fdist.plot(FIRST,END)

for k,v in fdist.items() :
  print "{} {}".format(k.encode("utf-8"),v)
开发者ID:rueshyna,项目名称:Taipei.py_20130425,代码行数:32,代码来源:freq.py

示例11: plot_html_results

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
 def plot_html_results(self, lemmatized_list_by_verb_noun_adj_adv, number_of_cat):
     fdist = FreqDist(w for w in lemmatized_list_by_verb_noun_adj_adv)
     fdist.plot(number_of_cat)
开发者ID:EduardoCarvalho,项目名称:nltkAnalyzer,代码行数:5,代码来源:nltkAnalyzer.py

示例12: set

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
# Creating main text object based on the Wall Street Journal corpora
# Setting all words to lowercase and removing non-alphabetical entrys
myText = [ word.lower() for word in text7 if word.isalpha() ]

# Creating text object based on myText, without repetitions
myTextSet = set( myText )

# Creating a frequency distribution with myText
fdMyText = FreqDist(myText)

# Creating histogram, and copying to file, in order of appearance
histogram = [ "%s - %s" % ( word, fdMyText[word] ) for word in myTextSet ]

fileObj = open("histogram.txt","w")
for wordInfo in histogram:
	fileObj.write("%s\n" % (wordInfo) )
fileObj.close()

# Creating sorted list of the most frequent words, to the less frequent words,
# of the reuters text and copying to file
sortedList = fdMyText.keys()

fileObj = open("sortedHistogram.txt","w")
for word in sortedList:
	fileObj.write("%s - %d\n" % (word, fdMyText[word]) )
fileObj.close()

# Only showing 50 most frequent words in plot because of limited monitor space
fdMyText.plot(50)
开发者ID:htaunay,项目名称:TextComparison,代码行数:31,代码来源:Exercise01.py

示例13: open

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
from nltk.probability import FreqDist

with open('christ-and-satan.txt') as f:
    cs_text = f.read()

word_list = cs_text.split()
first_letter = [word[0] for word in word_list if word[0].isalpha()]
letter_dist = FreqDist(first_letter)
letter_dist.plot(4,cumulative=True)
开发者ID:teddyroland,项目名称:NLTK-Workshop,代码行数:11,代码来源:solution.py

示例14: FreqDist

# 需要导入模块: from nltk.probability import FreqDist [as 别名]
# 或者: from nltk.probability.FreqDist import plot [as 别名]
print state_union_text.count("war")
state_union_text.concordance("economy")
state_union_text.similar("economy")
state_union_text.common_contexts(["economy", "jobs"])

from nltk.probability import FreqDist

fdist = FreqDist(state_union_text)
result = fdist.most_common(15)
result


from nltk.corpus import stopwords
stopwords.words("english")


filtered = [w for w in state_union.words() if not w in stopwords.words("english")]
len(filtered)


fdist_filtered = FreqDist(filtered)
fdist_filtered.most_common(20)


fdist_filtered.freq("good")/fdist_filtered.freq("bad")
fdist_filtered.freq("bad")/fdist_filtered.freq("evil")


fdist_filtered.plot(30)

开发者ID:MikeXL,项目名称:Machine-Learning,代码行数:31,代码来源:nltk.py


注:本文中的nltk.probability.FreqDist.plot方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。