當前位置: 首頁>>代碼示例>>Python>>正文


Python Filter.unstem方法代碼示例

本文整理匯總了Python中Filter.unstem方法的典型用法代碼示例。如果您正苦於以下問題:Python Filter.unstem方法的具體用法?Python Filter.unstem怎麽用?Python Filter.unstem使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在Filter的用法示例。


在下文中一共展示了Filter.unstem方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: getGroupWide

# 需要導入模塊: import Filter [as 別名]
# 或者: from Filter import unstem [as 別名]
def getGroupWide(folder='../test/patents/US_out/full/'):
    """Return a set of terms used across an entire set of files."""
    parser = NPParser.NPParser()
    filters = Settings.getDocumentFilters()
    if 'stops' in filters:
        filters.remove('stops')
    termlist = []
    filenames = [f for f in os.listdir(folder) if f[-4:]=='.txt']
    filtfname = os.path.join(folder, 'filter.save')
    if os.path.exists(filtfname):
            Filter._get_stemdict(filtfname)
    for f in filenames:
        nps = parser.getTerms(os.path.join(folder,f), filters)
        termlist.append(nps)
#    if not os.path.exists(filtfname):
#        Filter._save_stemdict(filtfname)
    all_terms = set()
    for termset in termlist:
        all_terms.update(termset)
    retlist = set()
    for term in all_terms:
        count = 0
        for termset in termlist:
            if term in termset:
                count += 1
        if count > len(filenames)*0.2:
            if 'stem' in filters:
                retlist.update(Filter.unstem(term))
            else:
                retlist.add(term)
    return retlist
開發者ID:AdamMeyers,項目名稱:The_Termolator,代碼行數:33,代碼來源:patentterms.py

示例2: run

# 需要導入模塊: import Filter [as 別名]
# 或者: from Filter import unstem [as 別名]
def run(args):
    usage = 'Usage: ' + args[0] + ' path [-terms|-chem|-dna|-group]'
    logging.basicConfig(level=LEVEL)
    if len(args) < 2 or len(args) > 3:
        print usage
        return None
    path = None
    getFunc = None
    for arg in args[1:]:
        if os.path.isdir(arg):
            path = arg
        elif arg == '-terms':
            getFunc = getPatentTerms
        elif arg == '-chem':
            getFunc = getChemicals
        elif arg == '-dna':
            getFunc = getDNA
        elif arg == '-group':
            getFunc = getGroupWide
    if not path or not getFunc:
        print usage
        return None
    path = os.path.abspath(path)
    logging.info('RDG path: '+path)
    logging.info('Get Function: '+getFunc.func_name)
    if getFunc.func_name == 'getGroupWide':
        terms = getFunc(path)
    else:
        logging.debug('Collecting File ids...')
        filenames = [f for f in os.listdir(path) if f[-4:]=='.txt']
        terms = []
        logging.debug('Finding terms...')
        filtfname = os.path.join(path, 'filter.save')
        if getFunc.func_name == 'getPatentTerms' and os.path.exists(filtfname):
            Filter._get_stemdict(filtfname)
        for f in filenames:
            logging.debug('...'+f+'...')
            terms.extend(getFunc(os.path.join(path,f)))
#        if getFunc.func_name == 'getPatentTerms' and not os.path.exists(filtfname):
#            Filter._save_stemdict(filtfname)
        logging.debug('Clean up...')
        if getFunc.func_name == 'getPatentTerms':
            temp = set()
            for t in terms:
                temp.update(Filter.unstem(t))
            terms = temp
        terms = set(terms)
    return terms
開發者ID:AdamMeyers,項目名稱:The_Termolator,代碼行數:50,代碼來源:patentterms.py


注:本文中的Filter.unstem方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。