本文整理汇总了Python中lucene.IndexSearcher.docFreq方法的典型用法代码示例。如果您正苦于以下问题:Python IndexSearcher.docFreq方法的具体用法?Python IndexSearcher.docFreq怎么用?Python IndexSearcher.docFreq使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.IndexSearcher
的用法示例。
在下文中一共展示了IndexSearcher.docFreq方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: delete_old
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import docFreq [as 别名]
def delete_old(self, index):
existing_ids = set([book.id for book in Book.objects.all()])
reader = IndexReader.open(index.index, False)
searcher = IndexSearcher(reader)
try:
num = searcher.docFreq(Term('is_book', 'true'))
docs = searcher.search(Search.make_term_query(['true'], 'is_book'), num)
for result in docs.scoreDocs:
stored = searcher.doc(result.doc)
book_id = int(stored.get('book_id'))
if not book_id in existing_ids:
print "book id %d doesn't exist." % book_id
index.remove_book(book_id)
finally:
searcher.close()
reader.close()
示例2: x
# 需要导入模块: from lucene import IndexSearcher [as 别名]
# 或者: from lucene.IndexSearcher import docFreq [as 别名]
def x(obj, eng):
tmap = eng.getVar(eng_output, {}) # holds token2id mappings
indexer = eng.getVar(eng_input)
reader = indexer.getReader()
searcher = IndexSearcher(reader)
docs = reader.numDocs()
for i in xrange(docs):
tfv = reader.getTermFreqVector(i, fieldname)
if tfv:
terms = tfv.getTerms()
frequencies = tfv.getTermFrequencies()
for (t,f,x) in zip(terms,frequencies,xrange(max_tokens_perdoc)):
if len(t) >= min_token_len and f <= max_f_perdoc:
df= searcher.docFreq(Term(fieldname, t)) # number of docs with the given term
if df > min_f_perindex and df <= max_f_perindex:
tmap.setdefault(t, len(tmap)+1)