本文整理汇总了Python中Statistics.Statistics.importantPara方法的典型用法代码示例。如果您正苦于以下问题:Python Statistics.importantPara方法的具体用法?Python Statistics.importantPara怎么用?Python Statistics.importantPara使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Statistics.Statistics
的用法示例。
在下文中一共展示了Statistics.importantPara方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: processParagraphs
# 需要导入模块: from Statistics import Statistics [as 别名]
# 或者: from Statistics.Statistics import importantPara [as 别名]
#.........这里部分代码省略.........
#it may contain duplicates at the end of this loop
#Duplicates are removed using an elegant method (See:self.removeDuplicates() )
for paragraph in self.paragraphs:
sentenses=nltk.sent_tokenize(paragraph)
tags=[]
for sentens in sentenses:
tokens=nltk.word_tokenize(sentens)
tags.extend(nltk.pos_tag(tokens))
nouns=self.getNouns(tags)
filteredNouns=self.removeStopWords(nouns)
allNouns.extend(filteredNouns)
nounCountDict=self.getNounsCounts(filteredNouns)
nounCountDictList.append(nounCountDict)
allNouns=self.removeDuplicates(allNouns)
#Creates occurrenceVector. See createOccurenceVector() for more details
occurenceVectorDict=self.createOccurenceVector(allNouns,nounCountDictList)
weightVectorDict=self.createNounWeightDict(occurenceVectorDict)
numberOfParagraphs=len(self.paragraphs)
pointList=[]
for key in weightVectorDict.keys():
totalOccurrences=sum(weightVectorDict[key])
averageCount=totalOccurrences/numberOfParagraphs
variance=numpy.var(weightVectorDict[key])
#TODO: have to replace the following line with a better formula that balances mean and variance
point=averageCount-variance
pointList.append((key,point))
#Sort keywords according to weight
pointList.sort(key=itemgetter(1),reverse=True)
print(pointList)
#Take most important 10 words
keyWords=[]
print("Important words")
if len(pointList)>10:
for i in range(0,10):
keyWords.append(pointList[i][0])
else:
for i in range(0,len(pointList)):
keyWords.append(pointList[i][0])
s=Statistics()
s.keywords=keyWords
s.title=keyWords[0]
'''
Following code implements the paragraph scoring algorithm based of Eigan vectors of similarity matrix
'''
#Creating the similarity vector to find rate paragraphs
similarityMatrix=[];
for i in range(0,numberOfParagraphs):
currentRow=[]
for j in range(0,numberOfParagraphs):
freq=0
nounsInThisPara=0
for k in allNouns:
currentOccurenceVector=occurenceVectorDict[k]
if currentOccurenceVector[i]>0:
nounsInThisPara=nounsInThisPara+1
if currentOccurenceVector[i]>0 and currentOccurenceVector[j]>0:
freq=freq+1
if nounsInThisPara==0:
similarity=0
else:
similarity=float(freq)/float(nounsInThisPara)
currentRow.append(similarity)
similarityMatrix.append(currentRow)
print("Similarity Matrix")
self.printMatrix(similarityMatrix)
similarityArray=numpy.array(similarityMatrix)
#Calculating eigan values of similarity matrix
eigenvalues, eigenvectors = numpy.linalg.eig(similarityArray)
#Only for the purpose of printing Eigan values
print("Eigan Vectors")
for i in range(0,numberOfParagraphs):
print(self.paragraphs[i])
print(eigenvectors[i])
paragraphRatings=[];
k=0
for i in eigenvectors:
count=0
for j in i:
if j>0.001: #checking for positive value
count=count+1
paragraphRatings.append((count,k))
k=k+1
#The following lines are to locate the most important paragraph
#Sort paragraphs according to rating
paragraphRatings.sort(key=itemgetter(0),reverse=True)
s.importantPara=self.paragraphs[paragraphRatings[0][1]]
s.importantParaRating=paragraphRatings[0][0]
return s