本文整理汇总了Python中Filter._get_stemdict方法的典型用法代码示例。如果您正苦于以下问题:Python Filter._get_stemdict方法的具体用法?Python Filter._get_stemdict怎么用?Python Filter._get_stemdict使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Filter
的用法示例。
在下文中一共展示了Filter._get_stemdict方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: getGroupWide
# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
def getGroupWide(folder='../test/patents/US_out/full/'):
"""Return a set of terms used across an entire set of files."""
parser = NPParser.NPParser()
filters = Settings.getDocumentFilters()
if 'stops' in filters:
filters.remove('stops')
termlist = []
filenames = [f for f in os.listdir(folder) if f[-4:]=='.txt']
filtfname = os.path.join(folder, 'filter.save')
if os.path.exists(filtfname):
Filter._get_stemdict(filtfname)
for f in filenames:
nps = parser.getTerms(os.path.join(folder,f), filters)
termlist.append(nps)
# if not os.path.exists(filtfname):
# Filter._save_stemdict(filtfname)
all_terms = set()
for termset in termlist:
all_terms.update(termset)
retlist = set()
for term in all_terms:
count = 0
for termset in termlist:
if term in termset:
count += 1
if count > len(filenames)*0.2:
if 'stem' in filters:
retlist.update(Filter.unstem(term))
else:
retlist.add(term)
return retlist
示例2: getTerms
# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
def getTerms(self, filename, filters=[], relaxed=False):
"""Input file, output a FreqDist of terms"""
filterfname = os.path.join(os.path.dirname(filename), "filter.save")
if os.path.exists(filename + ".nps") and os.path.exists(filterfname):
f = open(filename + ".nps")
old_filters, fd = pickle.load(f)
f.close()
if old_filters == filters:
if not Filter.unstemdict:
Filter._get_stemdict(filterfname)
return fd
NPs = self.getNPs(filename)
fd = FreqDist()
for NP in NPs:
# get the possible terms for each NP
terms = self.extractPossibleTerms(NP, relaxed)
# filter each term by some given criteria
# this requires keeping case information until this point
# filt = Filter.Filter() # class containing all filters
for t in terms:
for f in filters:
t = Filter.criteria[f](t)
if t:
fd[t] += 1
f = open(filename + ".nps", "w")
pickle.dump((filters, fd), f)
f.close()
if os.path.exists(filterfname):
os.remove(filterfname)
return fd
示例3: run
# 需要导入模块: import Filter [as 别名]
# 或者: from Filter import _get_stemdict [as 别名]
def run(args):
usage = 'Usage: ' + args[0] + ' path [-terms|-chem|-dna|-group]'
logging.basicConfig(level=LEVEL)
if len(args) < 2 or len(args) > 3:
print usage
return None
path = None
getFunc = None
for arg in args[1:]:
if os.path.isdir(arg):
path = arg
elif arg == '-terms':
getFunc = getPatentTerms
elif arg == '-chem':
getFunc = getChemicals
elif arg == '-dna':
getFunc = getDNA
elif arg == '-group':
getFunc = getGroupWide
if not path or not getFunc:
print usage
return None
path = os.path.abspath(path)
logging.info('RDG path: '+path)
logging.info('Get Function: '+getFunc.func_name)
if getFunc.func_name == 'getGroupWide':
terms = getFunc(path)
else:
logging.debug('Collecting File ids...')
filenames = [f for f in os.listdir(path) if f[-4:]=='.txt']
terms = []
logging.debug('Finding terms...')
filtfname = os.path.join(path, 'filter.save')
if getFunc.func_name == 'getPatentTerms' and os.path.exists(filtfname):
Filter._get_stemdict(filtfname)
for f in filenames:
logging.debug('...'+f+'...')
terms.extend(getFunc(os.path.join(path,f)))
# if getFunc.func_name == 'getPatentTerms' and not os.path.exists(filtfname):
# Filter._save_stemdict(filtfname)
logging.debug('Clean up...')
if getFunc.func_name == 'getPatentTerms':
temp = set()
for t in terms:
temp.update(Filter.unstem(t))
terms = temp
terms = set(terms)
return terms