当前位置: 首页>>代码示例>>Python>>正文


Python PorterStemmer.decode方法代码示例

本文整理汇总了Python中nltk.PorterStemmer.decode方法的典型用法代码示例。如果您正苦于以下问题:Python PorterStemmer.decode方法的具体用法?Python PorterStemmer.decode怎么用?Python PorterStemmer.decode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在nltk.PorterStemmer的用法示例。


在下文中一共展示了PorterStemmer.decode方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: preprocess

# 需要导入模块: from nltk import PorterStemmer [as 别名]
# 或者: from nltk.PorterStemmer import decode [as 别名]
def preprocess( result ):
    words = removePunct(result.title)
    words += " "
    words += removePunct(result.snippet)
    result.tokens = nltk.word_tokenize(words)
    for tok in result.tokens:
        if tok not in STOPS:
            tok = PorterStemmer().stem(tok.decode('utf-8'))
            tok = tok.lower().encode('utf-8')
    return result
开发者ID:jaredhancock31,项目名称:GoogleRelevanceRanker,代码行数:12,代码来源:ranker.py

示例2: searchRank

# 需要导入模块: from nltk import PorterStemmer [as 别名]
# 或者: from nltk.PorterStemmer import decode [as 别名]
def searchRank( query ):
    resList = []    # list of search result objects
    relList = []    # list of "indexes" of relevant results

    googleSearch(query, resList, 1)
    googleSearch(query, resList, 11)

    for r in resList:
        r = preprocess(r)       # initialize tokens attribute with pre-processed words
        r.vector = Counter(r.tokens)
        print r.rank
        print r.title
        print r.url
        print r.snippet
        print

    # ask user which results are relevant
    print "Choose up to 5 results that were relevant to your search."
    print "Enter a negative number to quit."
    relNum = int(input("Enter a result number: "))
    i = 0
    while relNum >= 0 and i < 5:
        if relNum not in relList:
            relList.append(relNum)
        else:
            print "Error: You already entered that result"
        i += 1
        relNum = int(input("Enter a result number ( negative to quit ): "))

    # write relevant data to file
    infile = open(query+'.txt', 'wb')
    for i in relList:
        for r in resList:
            if i == r.rank:
                infile.write(r.title + ' ')
                infile.write(r.snippet + ' ')
    infile.close()

    '''--------------------pre-process our relevance test set-------------------------'''
    readfile = open(query+'.txt', 'rb')
    relWords = readfile.read()
    relWords = removePunct(relWords)
    relTokens = nltk.word_tokenize(relWords)

    infile = open(query+'-clean.txt', 'w')

    for tok in relTokens:
        if tok not in STOPS:
            tok = PorterStemmer().stem(tok.decode('utf-8'))
            tok = tok.lower().encode('utf-8')
            infile.write(tok + ' ')

    infile.close()

    '''--------------------calculate, sort, and display----------------------------------'''
    relevanceVector = Counter(relTokens)    # get vector for relevance data to calc similarity

    print "Calculating relevancy of your search results......"
    # calculate similarity
    for r in resList:
        r.cosine = calc_cos(r.vector, relevanceVector)
        r.jaccard = jaccard(set(r.tokens), set(relTokens))
        # print "cosine:", r.cosine
        # print "jaccard:", r.jaccard

    print "Select sorting preference:"
    print "[1] Jaccard Coefficient"
    print "[2] Cosine Similarity"
    print
    sortChoice = raw_input("Enter choice here: ")

    if sortChoice.lower() in ['1', 'j', 'jaccard', 'jaccard coefficient']:
        resList.sort(key = lambda x: x.jaccard, reverse=True)
        print "Showing results based on jaccard coeffecient: "
    elif sortChoice.lower() in ['2', 'c', 'cosine','cosine similarity']:
        resList.sort(key = lambda x: x.cosine, reverse=True)
        print "Showing results based on cosine similarity: "

    for r in resList:
        print
        print r.rank
        print r.title
        print r.url
        print r.snippet
        print
开发者ID:jaredhancock31,项目名称:GoogleRelevanceRanker,代码行数:87,代码来源:ranker.py


注:本文中的nltk.PorterStemmer.decode方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。