本文整理匯總了Python中reverend.thomas.Bayes.poolData方法的典型用法代碼示例。如果您正苦於以下問題:Python Bayes.poolData方法的具體用法?Python Bayes.poolData怎麽用?Python Bayes.poolData使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類reverend.thomas.Bayes
的用法示例。
在下文中一共展示了Bayes.poolData方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: local_search
# 需要導入模塊: from reverend.thomas import Bayes [as 別名]
# 或者: from reverend.thomas.Bayes import poolData [as 別名]
def local_search(self, cid, term_unstemmed, recent):
term = self.stem(term_unstemmed)
exemplar_pids = self.get_term_exemplars(cid, term)
if len(exemplar_pids) < 4:
return self.fulltext(cid, term, recent)
log_tmp("SEARCH: %s exemplars" % len(exemplar_pids))
guesser = Bayes()
for ex_pid in exemplar_pids:
ex = state.the.get_post(ex_pid, content=True)
log_tmp("SEARCH: exemplar tokens: [%s]" % ex.tokens())
guesser.train("relevant", ex.tokens()) # get normalized content from p.
# TODO Toss in other factors, if possible.
for neg_ex_pid in state.the.get_random_pids(len(exemplar_pids)): # probably cacheable, if we use a bigger pool
guesser.train("random", state.the.get_post(neg_ex_pid, content=True).tokens())
log_tmp("SEARCH: trained")
proportions = [
(tok, (count + 1) / (1.0 * guesser.pools["random"].get(tok, 0) + 1))
for (tok, count) in guesser.poolData("relevant")
]
proportions = [ # knock out the weak and irrelevant ones before sorting
(tok, prop) for (tok, prop) in proportions if prop > 2
]
fulltext_fallback = len(proportions) < 3
if fulltext_fallback:
query = xapian.Query(xapian.Query.OP_AND, [term])
else:
proportions.sort(key=operator.itemgetter(1), reverse=True)
log_tmp("SEARCH: proportions: " + str(proportions))
# search for the twelve best words
query = xapian.Query(xapian.Query.OP_OR, [tok for (tok, prop) in proportions[:12]])
log_tmp("SEARCH: query: " + str(query))
enq = xapian.Enquire(self.mainabase)
enq.set_query(
# xapian.Query(xapian.Query.OP_AND,
query
# , ##Something scoring for BROAD_SUPPORT##)
##Something scoring for recency, if appropriate
)
mset = enq.get_mset(0, 25)
results = []
for m in mset:
doc = m.get_document()
post = state.the.get_post(int(doc.get_data()), True)
for (pool, prob) in guesser.guess(post.tokens()):
if pool == "relevant":
rel_prob = prob
score = rel_prob
score *= post.broad_support
if recent:
score *= _post_age_score(post)
results.append(SearchResult(post, term, score))
# results.append( (post, score, "rel: %f b_s: %f root age: %f" %
# (rel_prob, post.broad_support, sqrt(age_days)) ) )
results.sort(lambda x, y: cmp(x.score, y.score), reverse=True)
return results[:10]