本文整理汇总了Python中Filter.unstem方法的典型用法代码示例。如果您正苦于以下问题:Python Filter.unstem方法的具体用法?Python Filter.unstem怎么用?Python Filter.unstem使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Filter
的用法示例。
在下文中一共展示了Filter.unstem方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getGroupWide
# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import unstem [as 别名]
def getGroupWide(folder='../test/patents/US_out/full/'):
"""Return a set of terms used across an entire set of files."""
parser = NPParser.NPParser()
filters = Settings.getDocumentFilters()
if 'stops' in filters:
filters.remove('stops')
termlist = []
filenames = [f for f in os.listdir(folder) if f[-4:]=='.txt']
filtfname = os.path.join(folder, 'filter.save')
if os.path.exists(filtfname):
Filter._get_stemdict(filtfname)
for f in filenames:
nps = parser.getTerms(os.path.join(folder,f), filters)
termlist.append(nps)
# if not os.path.exists(filtfname):
# Filter._save_stemdict(filtfname)
all_terms = set()
for termset in termlist:
all_terms.update(termset)
retlist = set()
for term in all_terms:
count = 0
for termset in termlist:
if term in termset:
count += 1
if count > len(filenames)*0.2:
if 'stem' in filters:
retlist.update(Filter.unstem(term))
else:
retlist.add(term)
return retlist
示例2: run
# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import unstem [as 别名]
def run(args):
usage = 'Usage: ' + args[0] + ' path [-terms|-chem|-dna|-group]'
logging.basicConfig(level=LEVEL)
if len(args) < 2 or len(args) > 3:
print usage
return None
path = None
getFunc = None
for arg in args[1:]:
if os.path.isdir(arg):
path = arg
elif arg == '-terms':
getFunc = getPatentTerms
elif arg == '-chem':
getFunc = getChemicals
elif arg == '-dna':
getFunc = getDNA
elif arg == '-group':
getFunc = getGroupWide
if not path or not getFunc:
print usage
return None
path = os.path.abspath(path)
logging.info('RDG path: '+path)
logging.info('Get Function: '+getFunc.func_name)
if getFunc.func_name == 'getGroupWide':
terms = getFunc(path)
else:
logging.debug('Collecting File ids...')
filenames = [f for f in os.listdir(path) if f[-4:]=='.txt']
terms = []
logging.debug('Finding terms...')
filtfname = os.path.join(path, 'filter.save')
if getFunc.func_name == 'getPatentTerms' and os.path.exists(filtfname):
Filter._get_stemdict(filtfname)
for f in filenames:
logging.debug('...'+f+'...')
terms.extend(getFunc(os.path.join(path,f)))
# if getFunc.func_name == 'getPatentTerms' and not os.path.exists(filtfname):
# Filter._save_stemdict(filtfname)
logging.debug('Clean up...')
if getFunc.func_name == 'getPatentTerms':
temp = set()
for t in terms:
temp.update(Filter.unstem(t))
terms = temp
terms = set(terms)
return terms