当前位置: 首页>>代码示例>>Python>>正文


Python Filter._get_stemdict方法代码示例

本文整理汇总了Python中Filter._get_stemdict方法的典型用法代码示例。如果您正苦于以下问题:Python Filter._get_stemdict方法的具体用法?Python Filter._get_stemdict怎么用?Python Filter._get_stemdict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Filter的用法示例。


在下文中一共展示了Filter._get_stemdict方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: getGroupWide

# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
def getGroupWide(folder='../test/patents/US_out/full/'):
    """Return a set of terms used across an entire set of files."""
    parser = NPParser.NPParser()
    filters = Settings.getDocumentFilters()
    if 'stops' in filters:
        filters.remove('stops')
    termlist = []
    filenames = [f for f in os.listdir(folder) if f[-4:]=='.txt']
    filtfname = os.path.join(folder, 'filter.save')
    if os.path.exists(filtfname):
            Filter._get_stemdict(filtfname)
    for f in filenames:
        nps = parser.getTerms(os.path.join(folder,f), filters)
        termlist.append(nps)
#    if not os.path.exists(filtfname):
#        Filter._save_stemdict(filtfname)
    all_terms = set()
    for termset in termlist:
        all_terms.update(termset)
    retlist = set()
    for term in all_terms:
        count = 0
        for termset in termlist:
            if term in termset:
                count += 1
        if count > len(filenames)*0.2:
            if 'stem' in filters:
                retlist.update(Filter.unstem(term))
            else:
                retlist.add(term)
    return retlist
开发者ID:AdamMeyers,项目名称:The_Termolator,代码行数:33,代码来源:patentterms.py

示例2: getTerms

# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
 def getTerms(self, filename, filters=[], relaxed=False):
     """Input file, output a FreqDist of terms"""
     filterfname = os.path.join(os.path.dirname(filename), "filter.save")
     if os.path.exists(filename + ".nps") and os.path.exists(filterfname):
         f = open(filename + ".nps")
         old_filters, fd = pickle.load(f)
         f.close()
         if old_filters == filters:
             if not Filter.unstemdict:
                 Filter._get_stemdict(filterfname)
             return fd
     NPs = self.getNPs(filename)
     fd = FreqDist()
     for NP in NPs:
         # get the possible terms for each NP
         terms = self.extractPossibleTerms(NP, relaxed)
         # filter each term by some given criteria
         # this requires keeping case information until this point
         # filt = Filter.Filter() # class containing all filters
         for t in terms:
             for f in filters:
                 t = Filter.criteria[f](t)
             if t:
                 fd[t] += 1
     f = open(filename + ".nps", "w")
     pickle.dump((filters, fd), f)
     f.close()
     if os.path.exists(filterfname):
         os.remove(filterfname)
     return fd
开发者ID:AdamMeyers,项目名称:The_Termolator,代码行数:32,代码来源:NPParser.py

示例3: run

# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
def run(args):
    usage = 'Usage: ' + args[0] + ' path [-terms|-chem|-dna|-group]'
    logging.basicConfig(level=LEVEL)
    if len(args) < 2 or len(args) > 3:
        print usage
        return None
    path = None
    getFunc = None
    for arg in args[1:]:
        if os.path.isdir(arg):
            path = arg
        elif arg == '-terms':
            getFunc = getPatentTerms
        elif arg == '-chem':
            getFunc = getChemicals
        elif arg == '-dna':
            getFunc = getDNA
        elif arg == '-group':
            getFunc = getGroupWide
    if not path or not getFunc:
        print usage
        return None
    path = os.path.abspath(path)
    logging.info('RDG path: '+path)
    logging.info('Get Function: '+getFunc.func_name)
    if getFunc.func_name == 'getGroupWide':
        terms = getFunc(path)
    else:
        logging.debug('Collecting File ids...')
        filenames = [f for f in os.listdir(path) if f[-4:]=='.txt']
        terms = []
        logging.debug('Finding terms...')
        filtfname = os.path.join(path, 'filter.save')
        if getFunc.func_name == 'getPatentTerms' and os.path.exists(filtfname):
            Filter._get_stemdict(filtfname)
        for f in filenames:
            logging.debug('...'+f+'...')
            terms.extend(getFunc(os.path.join(path,f)))
#        if getFunc.func_name == 'getPatentTerms' and not os.path.exists(filtfname):
#            Filter._save_stemdict(filtfname)
        logging.debug('Clean up...')
        if getFunc.func_name == 'getPatentTerms':
            temp = set()
            for t in terms:
                temp.update(Filter.unstem(t))
            terms = temp
        terms = set(terms)
    return terms
开发者ID:AdamMeyers,项目名称:The_Termolator,代码行数:50,代码来源:patentterms.py


注:本文中的Filter._get_stemdict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。