本文整理汇总了Python中nltk.PorterStemmer.decode方法的典型用法代码示例。如果您正苦于以下问题:Python PorterStemmer.decode方法的具体用法?Python PorterStemmer.decode怎么用?Python PorterStemmer.decode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类nltk.PorterStemmer
的用法示例。
在下文中一共展示了PorterStemmer.decode方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: preprocess
# 需要导入模块: from nltk import PorterStemmer [as 别名]
# 或者: from nltk.PorterStemmer import decode [as 别名]
def preprocess( result ):
words = removePunct(result.title)
words += " "
words += removePunct(result.snippet)
result.tokens = nltk.word_tokenize(words)
for tok in result.tokens:
if tok not in STOPS:
tok = PorterStemmer().stem(tok.decode('utf-8'))
tok = tok.lower().encode('utf-8')
return result
示例2: searchRank
# 需要导入模块: from nltk import PorterStemmer [as 别名]
# 或者: from nltk.PorterStemmer import decode [as 别名]
def searchRank( query ):
resList = [] # list of search result objects
relList = [] # list of "indexes" of relevant results
googleSearch(query, resList, 1)
googleSearch(query, resList, 11)
for r in resList:
r = preprocess(r) # initialize tokens attribute with pre-processed words
r.vector = Counter(r.tokens)
print r.rank
print r.title
print r.url
print r.snippet
print
# ask user which results are relevant
print "Choose up to 5 results that were relevant to your search."
print "Enter a negative number to quit."
relNum = int(input("Enter a result number: "))
i = 0
while relNum >= 0 and i < 5:
if relNum not in relList:
relList.append(relNum)
else:
print "Error: You already entered that result"
i += 1
relNum = int(input("Enter a result number ( negative to quit ): "))
# write relevant data to file
infile = open(query+'.txt', 'wb')
for i in relList:
for r in resList:
if i == r.rank:
infile.write(r.title + ' ')
infile.write(r.snippet + ' ')
infile.close()
'''--------------------pre-process our relevance test set-------------------------'''
readfile = open(query+'.txt', 'rb')
relWords = readfile.read()
relWords = removePunct(relWords)
relTokens = nltk.word_tokenize(relWords)
infile = open(query+'-clean.txt', 'w')
for tok in relTokens:
if tok not in STOPS:
tok = PorterStemmer().stem(tok.decode('utf-8'))
tok = tok.lower().encode('utf-8')
infile.write(tok + ' ')
infile.close()
'''--------------------calculate, sort, and display----------------------------------'''
relevanceVector = Counter(relTokens) # get vector for relevance data to calc similarity
print "Calculating relevancy of your search results......"
# calculate similarity
for r in resList:
r.cosine = calc_cos(r.vector, relevanceVector)
r.jaccard = jaccard(set(r.tokens), set(relTokens))
# print "cosine:", r.cosine
# print "jaccard:", r.jaccard
print "Select sorting preference:"
print "[1] Jaccard Coefficient"
print "[2] Cosine Similarity"
print
sortChoice = raw_input("Enter choice here: ")
if sortChoice.lower() in ['1', 'j', 'jaccard', 'jaccard coefficient']:
resList.sort(key = lambda x: x.jaccard, reverse=True)
print "Showing results based on jaccard coeffecient: "
elif sortChoice.lower() in ['2', 'c', 'cosine','cosine similarity']:
resList.sort(key = lambda x: x.cosine, reverse=True)
print "Showing results based on cosine similarity: "
for r in resList:
print
print r.rank
print r.title
print r.url
print r.snippet
print