当前位置: 首页>>代码示例>>Python>>正文


Python IndexWriterConfig.setOpenMode方法代码示例

本文整理汇总了Python中org.apache.lucene.index.IndexWriterConfig.setOpenMode方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriterConfig.setOpenMode方法的具体用法?Python IndexWriterConfig.setOpenMode怎么用?Python IndexWriterConfig.setOpenMode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.IndexWriterConfig的用法示例。


在下文中一共展示了IndexWriterConfig.setOpenMode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_index

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def build_index():

    lucene.initVM()

    # post_dir = current_app.config['LOCAL_REPO_PATH'] + '/_posts/'
    post_dir = '/Users/w3/data/github/codeif_backup'
    index_store_dir = current_app.config['INDEX_STORE_DIR']
    print post_dir
    print index_store_dir

    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    store = SimpleFSDirectory(File(index_store_dir))
    analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
    config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    writer = IndexWriter(store, config)

    indexDocs(post_dir, writer)
    ticker = Ticker()
    print 'commit index',
    threading.Thread(target=ticker.run).start()
    writer.commit()
    writer.close()
    ticker.tick = False
    print 'done'
开发者ID:wasw100,项目名称:jekyll-search,代码行数:28,代码来源:index.py

示例2: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
	def __init__(self,root,storeDir,analyzer):
		# Create the index dir if it does not exist 
		if not os.path.exists(storeDir):
			os.mkdir(storeDir)
		# the SimpleFSDirectory which the index will be written in
		store = SimpleFSDirectory(File(storeDir))
		analyzer = LimitTokenCountAnalyzer(analyzer,1048576)
		config = IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
		# create a index writer 
		# atach the index dir and config info to it
		writer = IndexWriter(store,config)

		# call the indexing procedure
		# indexing all the files in the directory specified by root
		# write the index with writer
		self.indexDocs(root,writer)
		# start a ticker
		ticker = Ticker()
		print 'commit index'
		threading.Thread(target=ticker.run).start()
		writer.commit()
		writer.close()
		# stop the ticker when the indexing procedure completes
		ticker.tick = False
		print 'Done'
开发者ID:zz-mars,项目名称:simple-search,代码行数:28,代码来源:indexer.py

示例3: _get_writer

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def _get_writer(self, analyzer=None, create=False):
     config = IndexWriterConfig(Version.LUCENE_CURRENT, self._analyzer)
     if create:
         config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     if self._similarity is not None:
         config.setSimilarity(self._similarity)
     writer = IndexWriter(self._store, config)
     return writer
开发者ID:XihuanZeng,项目名称:kaggle,代码行数:10,代码来源:LuceneCorpus.py

示例4: open_writer

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def open_writer(self):
     """Open IndexWriter."""
     if self.writer is None:
         config = IndexWriterConfig(self.get_version(), self.get_analyzer())
         config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
         self.writer = IndexWriter(self.dir, config)
     else:
         raise Exception("IndexWriter is already open")
开发者ID:renespeck,项目名称:TAGME_Reproducibility,代码行数:10,代码来源:lucene_tools.py

示例5: deleteRec

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def deleteRec(self, pid):
     config = IndexWriterConfig(self.analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
     writer = IndexWriter(self.indexDir, config)
     writer.deleteDocuments(Term('uid', pid))
     writer.commit()
     writer.close()
     self.searcher = IndexSearcher(DirectoryReader.open(self.indexDir))
     return
开发者ID:andersardo,项目名称:gedMerge,代码行数:11,代码来源:luceneDB.py

示例6: getWriter

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
    def getWriter(self, store, analyzer=None, create=False):

        if analyzer is None:
            analyzer = WhitespaceAnalyzer()
        analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
        config = IndexWriterConfig(analyzer)
        if create:
            config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        return writer
开发者ID:svn2github,项目名称:pylucene,代码行数:13,代码来源:test_PyLucene.py

示例7: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
    def __init__(self, store_dir, context, analyzer):

        if not os.path.exists(store_dir):
            os.mkdir(store_dir)

        store = SimpleFSDirectory(File(store_dir))
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        self.writer = IndexWriter(store, config)
        self.index(context)
        self.complete_index()
开发者ID:jennbing,项目名称:info-retrieval,代码行数:13,代码来源:app_index.py

示例8: Indexer

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
class Indexer(object):
    def __init__(self, **kwargs):
        """ Initialize a new instance of the Indexer

        :param output: The output directory of the underlying index
        :param anaylzer: The overloaded analyzer to work with
        """
        self.output = kwargs.get("root", "index")
        if not os.path.exists(self.output):
            os.mkdir(self.output)

        self.analyzer = kwargs.get("analyzer", StandardAnalyzer(Version.LUCENE_CURRENT))
        self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
        self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
        self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        self.store = SimpleFSDirectory(File(self.output))
        self.writer = IndexWriter(self.store, self.config)
        self.create_field_types()

    def index(self, document):
        """ Given a new document, add it to the index.

        :param document: The document to add to the indexer
        """
        try:
            self.writer.addDocument(document)
        except Exception:
            logger.exception("Failed to index the supplied document")

    def shutdown(self):
        """ Shutdown the currently processing indexer.
        """
        try:
            # self.writer.optimize()
            self.writer.close()
        except Exception:
            logger.exception("Failed to shutdown the indexer correctly")

    def create_field_types(self):
        """ Create the field types that will be used to specify
        what actions lucene should take on the various fields
        supplied to index.
        """
        self.field_clean = FieldType()
        self.field_clean.setIndexed(True)
        self.field_clean.setStored(True)
        self.field_clean.setTokenized(False)
        self.field_clean.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)

        self.field_dirty = FieldType()
        self.field_dirty.setIndexed(True)
        self.field_dirty.setStored(False)
        self.field_dirty.setTokenized(True)
        self.field_dirty.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
开发者ID:bashwork,项目名称:common,代码行数:56,代码来源:filesearch.py

示例9: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
    def __init__(self, indexDir):
        if not os.path.exists(indexDir):
            os.mkdir(indexDir)

        store = SimpleFSDirectory(File(indexDir))

        analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
        
        self.writer = IndexWriter(store, config)
开发者ID:thoughts1053,项目名称:search,代码行数:14,代码来源:indexer.py

示例10: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
    def __init__(self, root, storeDir, analyzer):
        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir))
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.indexDocs(root, writer)
        writer.commit()
        writer.close()
开发者ID:dvalcarce,项目名称:filmyou-web,代码行数:14,代码来源:build_index.py

示例11: get_writer

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def get_writer(index='index'):
    store = SimpleFSDirectory(File(index))

    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
    analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)

    config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)

    writer = IndexWriter(store, config)

    return writer
开发者ID:andrely,项目名称:vg-pipeline,代码行数:14,代码来源:indexing.py

示例12: WikiPageIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
class WikiPageIndex():
    def __init__(self, index_dir):
        #lucene.initVM(vmargs=['-Djava.awt.headless=true', '-Xmx4g'])

        self.index_dir = index_dir
        self.directory = SimpleFSDirectory(File(self.index_dir))
        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
        self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
        self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)

        self.searcher = IndexSearcher(DirectoryReader.open(self.directory))

    def createIndex(self):
        self.writer = IndexWriter(self.directory, self.config)

        if not os.path.exists(self.index_dir):
            os.mkdir(self.index_dir)

    def addDocumentToIndex(self, title, text):
        doc = Document()

        doc.add(Field("Title", title, Field.Store.YES, Field.Index.ANALYZED))
        doc.add(Field("Text", text, Field.Store.YES, Field.Index.ANALYZED))

        self.writer.addDocument(doc)

    def closeIndex(self):
        self.writer.commit()
        self.writer.close()


    def searchIndex(self, queryString, field="Text", max_results=100):
        query = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(queryString)
        scoreDocs = self.searcher.search(query, max_results).scoreDocs
        log.debug("Found {0} documents for query [{1}]".format(len(scoreDocs), queryString))

        docs = []
        for scoreDoc in scoreDocs:
            doc = self.searcher.doc(scoreDoc.doc)
            log.debug(WikiPageIndex.cleanWikiText(doc.get("Text")))

            #print("title: {0}\ncontents: {1}".format(doc.get("Title"), doc.get("Text")[:70]))
            docs.append(doc)

        return docs

    @staticmethod
    def cleanWikiText(text):
        text = text.encode('ascii', 'ignore')
        text = re.sub('(\[\[.*?\]\]|\{\{.*?\}\}|\{\|.*?\|\})', '', text)
        text = re.sub('[^\na-zA-Z0-9\n_-]+', ' ', text)
        text = re.sub('([ \t]*[\n]+[ \t]*)+', '\n', text)
        return text.strip()
开发者ID:abell25,项目名称:TestTaker,代码行数:55,代码来源:WikiPageIndex.py

示例13: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def __init__(self, startJVM=False):
     if startJVM:
         lucene.initVM(vmargs=['-Djava.awt.headless=true'])
     
     self.STORE_DIR = "index_dir"
     self.store = SimpleFSDirectory(File(self.STORE_DIR)) 
     
     tmp_analyzer = StandardAnalyzer(Version.LUCENE_CURRENT) 
     self.analyzer = LimitTokenCountAnalyzer(tmp_analyzer, 10000) 
     
     config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     self.writer = IndexWriter(self.store, config)
开发者ID:inteligencia-coletiva-lsd,项目名称:app-tabletranscriber,代码行数:15,代码来源:pylucene.py

示例14: dummyIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def dummyIndex(self):
     """
     Create a dummy index - to avoid problems updating it
     """
     config = IndexWriterConfig(self.analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     writer = IndexWriter(self.indexDir, config)
     doc = Document()
     doc.add(Field('uid', 'dummy', StringField.TYPE_STORED))
     writer.addDocument(doc)
     writer.commit()
     writer.close()
     return
开发者ID:andersardo,项目名称:gedMerge,代码行数:15,代码来源:luceneDB.py

示例15: buildIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
 def buildIndex(self, inputFile):
     analyzer = self.getAnalyzer()
     iwconf = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
     
     iwconf.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     writer = IndexWriter( SimpleFSDirectory( File(self.luceneDir) ), iwconf)
     
     # read through input file and write out to lucene
     counter = 0
     linesReadCounter = 0
     
     with open(inputFile, 'r') as lines:
         linesRead = 0
         
         for line in lines:
             try:
                 linesRead+=1
                 
                 if linesRead % 1000 == 0:
                     print "%d lines read" % linesRead
                     
                 cui, concept = line.replace("\",\"", "\t").replace("\"", "").split("\t")
                 concept = concept.strip()
                 cui = cui.strip()
                 
                 strNorm = self.normalizeCasePunct(concept)
                 strSorted = self.sortWords(strNorm)
                 strStemmed = self.stemWords(strNorm)
                 strStemmedSorted = self.stemWords(strSorted)
       
                 fdoc = Document()
                 
                 counter +=1
                 fid = counter
                 
                 fdoc.add( Field("id", unicode(fid), Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("cui", cui, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str", concept, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_norm", strNorm, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_sorted", strSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_stemmed", strStemmed, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_stemmedSorted", strStemmedSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 writer.addDocument(fdoc)
                 if fid % 1000 == 0:
                     writer.commit()
             except:
                 "Skipping line: %s" % line
                 
     writer.commit()
     writer.close()
开发者ID:fzeeshan,项目名称:SMPP,代码行数:52,代码来源:MedicalQueryParser.py


注:本文中的org.apache.lucene.index.IndexWriterConfig.setOpenMode方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。