本文整理汇总了Python中index.Index.build_index方法的典型用法代码示例。如果您正苦于以下问题:Python Index.build_index方法的具体用法?Python Index.build_index怎么用?Python Index.build_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类index.Index
的用法示例。
在下文中一共展示了Index.build_index方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: SearchEngine
# 需要导入模块: from index import Index [as 别名]
# 或者: from index.Index import build_index [as 别名]
class SearchEngine(object):
"""
Create a search engine.
- Build an index on the given knowledge base
- Tokenize query
- Get intersection of postings
- Return search results accoridngly
"""
def __init__(self):
"""
Initialize a search engine instance.
"""
self.index = Index()
def build_index(self, path_to_knwoledge_base, path_to_index_dir):
"""
Take in file with articles, create an index instance.
Args:
path_to_knwoledge_base(str)
path_to_index(str)
"""
# in case index is found, load it
if io.exists(path_to_index_dir):
_logger.info('Index located at %s already exists', path_to_index_dir)
self.load_index(path_to_index_dir)
return
_logger.info('Creating index from knowledge base %s', path_to_knwoledge_base)
# otherwise, create it
raw_content = io.read(path_to_knwoledge_base)
_logger.debug('Creating postings')
self.index.build_index(raw_content)
_logger.debug('Calculating tfidf')
self.index.calculate_tfidf()
_logger.debug('Writing index')
self.index.save(path_to_index_dir)
_logger.debug('Done writing index')
def load_index(self, path_to_index_dir):
"""
Load index instance.
"""
_logger.debug('Loading index from %s', path_to_index_dir)
self._index = self.index.load(path_to_index_dir)
_logger.debug('Done loading index')
def search(self, query, num_of_results):
"""
Run the search engine for a given query.
Args:
query (str)
num_of_results (int): number of results to be returned
Returns
list[list[str]]: results as article ids and titles
"""
if self.index is None:
raise IndexNotLoadedException('You need to create or load index first')
tokens = preprocessing.tokenize(query)
frequencies = preprocessing.count_frequency(tokens)
articles = self._postings_intersections(frequencies.keys())
if len(articles) == 0:
return []
ranked_scores = self._rank(frequencies, articles)
article_ids = ranked_scores.keys()
titles = [self.index.articles[article_id].title
for article_id in article_ids]
results = zip(article_ids, titles)
return results[:num_of_results]
def _postings_intersections(self, tokens):
"""
Return intersection of postings for given tokens.
Args:
tokens (list[str])
Returns:
dict{str, str}: article ids and their titles
"""
# get article intersection for all tokens
set_list = [self.index.postings[token] for token in tokens]
intersection = set.intersection(*set_list)
# get their titles
result = {}
for article_id in intersection:
result[article_id] = self.index.articles[article_id].title
#.........这里部分代码省略.........