本文整理汇总了Python中index.Index.make_snippet方法的典型用法代码示例。如果您正苦于以下问题:Python Index.make_snippet方法的具体用法?Python Index.make_snippet怎么用?Python Index.make_snippet使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index
的用法示例。
在下文中一共展示了Index.make_snippet方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Searcher
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import make_snippet [as 别名]
class Searcher(object):
"""
Allows to search within an inverted index Index object from the index
module.
"""
def __init__(self, index_filename='index.json'):
"""
Build a Searcher object, load the inverted index from
'index_filename' file.
"""
# Get a logger assuming that the logging facility has been set up by the
# banana module.
self._logger = logging.getLogger(__name__)
# Set up other members.
if not os.path.exists(index_filename):
self._logger.info('Unable to build the Searcher, '
'no index file %s in the current directory.' % index_filename)
sys.exit(0)
else:
self._index = Index(index_filename)
def query(self, query):
# Make sure this is not an empty query.
if not query:
raise Exception('Invalid query \"%s\" in Searcher.query().' % query)
#TODO add weights for the title and full text scores.
tokenized_query = blobprocessor.make_tokens(query)
# Compute title relevance score.
# Get urls with title matching query.
matching_urls = set([])
title_index = self._index.get_title_index()
for token in tokenized_query:
for url in title_index.get_matching_urls(token):
self._logger.debug(self._index.get_title(url))
matching_urls.add(url)
urls_and_score = {}
for url in matching_urls:
score = self.compute_bm25_relevance(url, tokenized_query, title_index)
self._logger.debug('Title score for url %s: %f' % (url, score))
urls_and_score[url] = score
# Compute full text relevance score.
# Get urls with full text matching query.
matching_urls = set([])
full_text_index = self._index.get_full_text_index()
for token in tokenized_query:
for url in full_text_index.get_matching_urls(token):
matching_urls.add(url)
for url in matching_urls:
score = self.compute_bm25_relevance(url, tokenized_query,
full_text_index)
self._logger.debug('Full text score for url %s: %f' % (url, score))
if url in urls_and_score:
urls_and_score[url] += score
else:
urls_and_score[url] = score
# Sort the ranked urls.
score_sorted_urls = sorted(urls_and_score.items(),
key=lambda (k,v):(v,k), reverse=True)
self._logger.debug(score_sorted_urls)
# Build the Answer objects that will be returned.
answers = []
context_before = 5
context_after = 8
max_match_count = 5
for url_and_score in score_sorted_urls:
url = url_and_score[0]
score = url_and_score[1]
title = self._index.get_title(url)
title_highlights = self._find_highlights(tokenized_query, title)
snippet = self._index.make_snippet(url, tokenized_query,
context_before, context_after,
max_match_count)
snippet_highlights = self._find_highlights(tokenized_query, snippet)
# Add an Answer object to the answers collection.
answers.append(Answer(url, score, title, title_highlights, snippet,
snippet_highlights))
return answers
def _find_highlights(self, tokens, snippet):
"""
Build the highlights positions by parsing the given snippet.
The parsing is case insensitive. These positions are useful for instance
to highlight the matching words in the snippet.
"""
highlights = []
for token in tokens:
for position, word in enumerate(snippet.split()):
if token.lower() in word.lower():
highlights.append(position)
#.........这里部分代码省略.........