当前位置: 首页>>代码示例>>Python>>正文


Python index.create_in方法代码示例

本文整理汇总了Python中whoosh.index.create_in方法的典型用法代码示例。如果您正苦于以下问题:Python index.create_in方法的具体用法?Python index.create_in怎么用?Python index.create_in使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在whoosh.index的用法示例。


在下文中一共展示了index.create_in方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _init_index

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def _init_index(self):

        if not os.path.exists(self.corpus.path):
            os.mkdir(self.corpus.path)

        analyzer = self.corpus.analyzer
        self.analyzer = self.corpus.analyzer
        
        if exists_in(self.corpus.path):
            ix = open_dir(self.corpus.path)
        else:
            # may need to remove this?  how can we have a schema if we don't know the...uh...schema?
            schema = Schema(title=TEXT(stored=True,analyzer=analyzer), content=TEXT(analyzer=analyzer),
                            path=ID(stored=True))
            ix = create_in(self.corpus.path,schema)
            writer = ix.writer()            
            writer.commit()

        self.index = ix
        self.searcher = ix.searcher();
        #self.reader = IndexReader.open(self.lucene_index, True)
        self.reader = ix.reader();
        #self.analyzer = self.corpus.analyzer 
开发者ID:ChristopherLucas,项目名称:txtorg,代码行数:25,代码来源:engine.py

示例2: __init__

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, root, storeDir, analyzer, args_dir = None):
        self.args_dir = args_dir
        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        schema = Schema(name=TEXT(stored=True),
                    path=ID(stored=True),
                    txtorg_id=ID(stored=True),
                    contents=TEXT(stored=False,vector=True,analyzer=analyzer()))
        ix = create_in(storeDir, schema)
        writer = ix.writer()
        
        print 'document dir is', root
        self.indexDocs(root, writer)

        print 'optimizing index',
        writer.commit(optimize=True)
        print 'done'
        self.index = ix
        self.writer = writer
        self.reader = ix.reader() 
开发者ID:ChristopherLucas,项目名称:txtorg,代码行数:23,代码来源:indexfiles.py

示例3: rewrite_index

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def rewrite_index(self, cards: List[Card]) -> None:
        print('Rewriting index in {d}'.format(d=WhooshConstants.index_dir))
        ensure_dir_exists(WhooshConstants.index_dir)
        ix = create_in(WhooshConstants.index_dir, self.schema)
        update_index(ix, cards)

    # pylint: disable=no-self-use 
开发者ID:PennyDreadfulMTG,项目名称:Penny-Dreadful-Tools,代码行数:9,代码来源:whoosh_write.py

示例4: open_index

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def open_index(self, index_folder, create_new=False):
        self.index_folder = index_folder
        if create_new:
            if os.path.exists(index_folder):
                shutil.rmtree(index_folder)
                print "deleted index folder: " + index_folder

        if not os.path.exists(index_folder):
            os.mkdir(index_folder)

        exists = index.exists_in(index_folder)
        stemming_analyzer = StemmingAnalyzer()

        schema = Schema(
            path=ID(stored=True, unique=True)
            , filename=TEXT(stored=True, field_boost=100.0)
            , tags=KEYWORD(stored=True, scorable=True, field_boost=80.0)
            , headlines=KEYWORD(stored=True, scorable=True, field_boost=60.0)
            , doubleemphasiswords=KEYWORD(stored=True, scorable=True, field_boost=40.0)
            , emphasiswords=KEYWORD(stored=True, scorable=True, field_boost=20.0)
            , content=TEXT(stored=True, analyzer=stemming_analyzer)
            , time=STORED
        )
        if not exists:
            self.ix = index.create_in(index_folder, schema)
        else:
            self.ix = index.open_dir(index_folder) 
开发者ID:BernhardWenzel,项目名称:markdown-search,代码行数:29,代码来源:search.py

示例5: init

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def init(self):
        ix_path = os.path.join(self.path, self.name)
        if whoosh_index.exists_in(ix_path):
            return whoosh_index.open_dir(ix_path)
        if not os.path.exists(ix_path):
            os.makedirs(ix_path)
        return whoosh_index.create_in(ix_path, self.schema) 
开发者ID:honmaple,项目名称:flask-msearch,代码行数:9,代码来源:whoosh_backend.py

示例6: __init__

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, index_dir, schema=DEFAULT_SCHEMA, force_create=False):
        self.schema = schema
        if exists_in(index_dir) and not force_create:
            self.index = open_dir(index_dir, schema=schema)
        else:
            self.index = create_in(index_dir, schema=schema) 
开发者ID:rmax,项目名称:databrewer,代码行数:8,代码来源:search.py

示例7: __init__

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, db_path):
        ensuredir(db_path)
        if index.exists_in(db_path):
            self.index = index.open_dir(db_path)
        else:
            self.index = index.create_in(db_path, schema=self.schema)
        self.qparser = QueryParser('text', self.schema) 
开发者ID:luckystarufo,项目名称:pySINDy,代码行数:9,代码来源:whooshsearch.py

示例8: __init__

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def __init__(self, index_path, language):
        from whoosh import index as whoosh_index
        from whoosh.fields import Schema, TEXT, ID
        from whoosh import qparser
        from whoosh.highlight import UppercaseFormatter
        from whoosh.analysis import SimpleAnalyzer, LanguageAnalyzer
        from whoosh.lang import has_stemmer, has_stopwords
        import os

        if not has_stemmer(language) or not has_stopwords(language):
            # TODO Display a warning?
            analyzer = SimpleAnalyzer()
        else:
            analyzer = LanguageAnalyzer(language)

        self.schema = Schema(path=ID(unique=True, stored=True), body=TEXT(analyzer=analyzer))
        self.formatter = UppercaseFormatter()

        self.index_path = index_path

        if not os.path.exists(index_path):
            try:
                os.mkdir(index_path)
            except OSError as e:
                sys.exit("Error creating Whoosh index: %s" % e)

        if whoosh_index.exists_in(index_path):
            try:
                self.search_index = whoosh_index.open_dir(index_path)
            except whoosh_index.IndexError as e:
                sys.exit("Error opening whoosh index: {0}".format(e))
        else:
            self.search_index = whoosh_index.create_in(index_path, self.schema)

        self.query_parser = qparser.MultifieldParser(["body", "path"], schema=self.schema)
        self.query_parser.add_plugin(qparser.FuzzyTermPlugin()) 
开发者ID:scragg0x,项目名称:realms-wiki,代码行数:38,代码来源:models.py

示例9: delete_index

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def delete_index(self, index):
        from whoosh import index as whoosh_index
        self.search_index.close()
        self.search_index = whoosh_index.create_in(self.index_path, schema=self.schema) 
开发者ID:scragg0x,项目名称:realms-wiki,代码行数:6,代码来源:models.py

示例10: create_index

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def create_index():
    regex_tokenize = re.compile('\w+(?:-\w+)+|<[A-Z]+>[^<]+</[A-Z]+>|\w+', re.U)
    tokenizer = RegexTokenizer(regex_tokenize)
    schema = Schema(sentence=TEXT(stored=True, analyzer=tokenizer))
    if not os.path.exists("index_full"):
        os.mkdir("index_full")
        idx = create_in("index_full", schema)
    else:
        idx = open_dir("index_full")
    return idx 
开发者ID:davidsbatista,项目名称:BREDS,代码行数:12,代码来源:index_whoosh.py

示例11: cal_sim

# 需要导入模块: from whoosh import index [as 别名]
# 或者: from whoosh.index import create_in [as 别名]
def cal_sim(train_data_path, test_data_path, dst_result_path=None, save_n_best_search=1):
    schema = Schema(context=TEXT(stored=True), response=STORED, post=TEXT(stored=True))
    index_i = re.findall('\d', train_data_path)[0]

    index_path = "../tmp/ix_index/" + index_i
    if not os.path.exists(index_path):
        os.makedirs(index_path)

    ix = create_in(index_path, schema)
    writer = ix.writer()

    def get_cpr(line):
        lines = line.lower().strip().split('\t')
        context = ''
        post = lines[0]
        response = lines[1]
        return context.strip().decode('utf-8'), response.decode('utf-8'), post.decode('utf-8')

    def load_train_data(file_name, writer):
        f = open(file_name)
        for line in f:
            context, response, post = get_cpr(line)
            if context != '':
                writer.add_document(context=context, response=response, post=post)
            else:
                writer.add_document(response=response, post=post)
        writer.commit()

    def get_query(line, ix):
        lines = line.strip().split('\t')
        post = lines[0].decode('utf-8')
        q2 = QueryParser("post", ix.schema).parse(post)
        terms = list(q2.all_terms())
        query = Or([Term(*x) for x in terms])
        return query

    load_train_data(train_data_path, writer)

    f = open(test_data_path, 'r')
    fw_search = open(dst_result_path, 'w')
    with ix.searcher(weighting=scoring.TF_IDF()) as searcher:
        c = searcher.collector(limit=10)
        tlc = TimeLimitCollector(c, timelimit=10.0)
        for line in f:
            try:
                query = get_query(line, ix)
                searcher.search_with_collector(query, tlc)
                results = tlc.results()
                for i in range(min(len(results), save_n_best_search)):
                    fw_search.write(
                        line.strip() + '\t' + str(results[i]["post"]) + '\t' + str(results[i]["response"]) + '\n')
            except Exception as e:
                print('TimeLimit, ignore it!')
                print(line)
    fw_search.close() 
开发者ID:luofuli,项目名称:DualRL,代码行数:57,代码来源:get_template_based_result.py


注:本文中的whoosh.index.create_in方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。