当前位置: 首页>>代码示例>>Python>>正文


Python IndexWriter.commit方法代码示例

本文整理汇总了Python中org.apache.lucene.index.IndexWriter.commit方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.commit方法的具体用法?Python IndexWriter.commit怎么用?Python IndexWriter.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.IndexWriter的用法示例。


在下文中一共展示了IndexWriter.commit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: build_index

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def build_index():

    lucene.initVM()

    # post_dir = current_app.config['LOCAL_REPO_PATH'] + '/_posts/'
    post_dir = '/Users/w3/data/github/codeif_backup'
    index_store_dir = current_app.config['INDEX_STORE_DIR']
    print post_dir
    print index_store_dir

    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    store = SimpleFSDirectory(File(index_store_dir))
    analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
    config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
    writer = IndexWriter(store, config)

    indexDocs(post_dir, writer)
    ticker = Ticker()
    print 'commit index',
    threading.Thread(target=ticker.run).start()
    writer.commit()
    writer.close()
    ticker.tick = False
    print 'done'
开发者ID:wasw100,项目名称:jekyll-search,代码行数:28,代码来源:index.py

示例2: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
	def __init__(self,root,storeDir,analyzer):
		# Create the index dir if it does not exist 
		if not os.path.exists(storeDir):
			os.mkdir(storeDir)
		# the SimpleFSDirectory which the index will be written in
		store = SimpleFSDirectory(File(storeDir))
		analyzer = LimitTokenCountAnalyzer(analyzer,1048576)
		config = IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
		# create a index writer 
		# atach the index dir and config info to it
		writer = IndexWriter(store,config)

		# call the indexing procedure
		# indexing all the files in the directory specified by root
		# write the index with writer
		self.indexDocs(root,writer)
		# start a ticker
		ticker = Ticker()
		print 'commit index'
		threading.Thread(target=ticker.run).start()
		writer.commit()
		writer.close()
		# stop the ticker when the indexing procedure completes
		ticker.tick = False
		print 'Done'
开发者ID:zz-mars,项目名称:simple-search,代码行数:28,代码来源:indexer.py

示例3: deleteRec

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
 def deleteRec(self, pid):
     config = IndexWriterConfig(self.analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
     writer = IndexWriter(self.indexDir, config)
     writer.deleteDocuments(Term('uid', pid))
     writer.commit()
     writer.close()
     self.searcher = IndexSearcher(DirectoryReader.open(self.indexDir))
     return
开发者ID:andersardo,项目名称:gedMerge,代码行数:11,代码来源:luceneDB.py

示例4: index

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def index(self, data):
        writer = IndexWriter(
            self.d, self.conf)

        doc = self.buildDocument(data['fields'], data['record'])
        writer.addDocument(doc)

        writer.commit()
        writer.close()
开发者ID:bradleyjones,项目名称:apiary,代码行数:11,代码来源:lucenedriver.py

示例5: rebuildIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def rebuildIndex(self, data):
        writer = IndexWriter(
            self.d, self.conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE))

        for record in data['records']:
            doc = self.buildDocument(data['fields'], record)
            writer.addDocument(doc)

        writer.commit()
        writer.close()
开发者ID:bradleyjones,项目名称:apiary,代码行数:12,代码来源:lucenedriver.py

示例6: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def __init__(self, root, storeDir, analyzer):
        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir))
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.indexDocs(root, writer)
        writer.commit()
        writer.close()
开发者ID:dvalcarce,项目名称:filmyou-web,代码行数:14,代码来源:build_index.py

示例7: WikiPageIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
class WikiPageIndex():
    def __init__(self, index_dir):
        #lucene.initVM(vmargs=['-Djava.awt.headless=true', '-Xmx4g'])

        self.index_dir = index_dir
        self.directory = SimpleFSDirectory(File(self.index_dir))
        self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
        self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
        self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)

        self.searcher = IndexSearcher(DirectoryReader.open(self.directory))

    def createIndex(self):
        self.writer = IndexWriter(self.directory, self.config)

        if not os.path.exists(self.index_dir):
            os.mkdir(self.index_dir)

    def addDocumentToIndex(self, title, text):
        doc = Document()

        doc.add(Field("Title", title, Field.Store.YES, Field.Index.ANALYZED))
        doc.add(Field("Text", text, Field.Store.YES, Field.Index.ANALYZED))

        self.writer.addDocument(doc)

    def closeIndex(self):
        self.writer.commit()
        self.writer.close()


    def searchIndex(self, queryString, field="Text", max_results=100):
        query = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(queryString)
        scoreDocs = self.searcher.search(query, max_results).scoreDocs
        log.debug("Found {0} documents for query [{1}]".format(len(scoreDocs), queryString))

        docs = []
        for scoreDoc in scoreDocs:
            doc = self.searcher.doc(scoreDoc.doc)
            log.debug(WikiPageIndex.cleanWikiText(doc.get("Text")))

            #print("title: {0}\ncontents: {1}".format(doc.get("Title"), doc.get("Text")[:70]))
            docs.append(doc)

        return docs

    @staticmethod
    def cleanWikiText(text):
        text = text.encode('ascii', 'ignore')
        text = re.sub('(\[\[.*?\]\]|\{\{.*?\}\}|\{\|.*?\|\})', '', text)
        text = re.sub('[^\na-zA-Z0-9\n_-]+', ' ', text)
        text = re.sub('([ \t]*[\n]+[ \t]*)+', '\n', text)
        return text.strip()
开发者ID:abell25,项目名称:TestTaker,代码行数:55,代码来源:WikiPageIndex.py

示例8: dummyIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
 def dummyIndex(self):
     """
     Create a dummy index - to avoid problems updating it
     """
     config = IndexWriterConfig(self.analyzer)
     config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     writer = IndexWriter(self.indexDir, config)
     doc = Document()
     doc.add(Field('uid', 'dummy', StringField.TYPE_STORED))
     writer.addDocument(doc)
     writer.commit()
     writer.close()
     return
开发者ID:andersardo,项目名称:gedMerge,代码行数:15,代码来源:luceneDB.py

示例9: buildIndex

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
 def buildIndex(self, inputFile):
     analyzer = self.getAnalyzer()
     iwconf = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
     
     iwconf.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     writer = IndexWriter( SimpleFSDirectory( File(self.luceneDir) ), iwconf)
     
     # read through input file and write out to lucene
     counter = 0
     linesReadCounter = 0
     
     with open(inputFile, 'r') as lines:
         linesRead = 0
         
         for line in lines:
             try:
                 linesRead+=1
                 
                 if linesRead % 1000 == 0:
                     print "%d lines read" % linesRead
                     
                 cui, concept = line.replace("\",\"", "\t").replace("\"", "").split("\t")
                 concept = concept.strip()
                 cui = cui.strip()
                 
                 strNorm = self.normalizeCasePunct(concept)
                 strSorted = self.sortWords(strNorm)
                 strStemmed = self.stemWords(strNorm)
                 strStemmedSorted = self.stemWords(strSorted)
       
                 fdoc = Document()
                 
                 counter +=1
                 fid = counter
                 
                 fdoc.add( Field("id", unicode(fid), Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("cui", cui, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str", concept, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_norm", strNorm, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_sorted", strSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_stemmed", strStemmed, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 fdoc.add( Field("str_stemmedSorted", strStemmedSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
                 writer.addDocument(fdoc)
                 if fid % 1000 == 0:
                     writer.commit()
             except:
                 "Skipping line: %s" % line
                 
     writer.commit()
     writer.close()
开发者ID:fzeeshan,项目名称:SMPP,代码行数:52,代码来源:MedicalQueryParser.py

示例10: commit

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def commit(collection_name):
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT

	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)

	#setting writer configurations
	config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
	config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
	writer=IndexWriter(direc,config)

	writer.commit()
	writer.close()
开发者ID:karthiksenthil,项目名称:Mini-NoSQL-Database,代码行数:18,代码来源:HandlerLatest.py

示例11: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
 def __init__(self, root, storeDir, analyzer): 
     if not os.path.exists(storeDir): 
         os.mkdir(storeDir) 
     store = SimpleFSDirectory(File(storeDir)) 
     analyzer = LimitTokenCountAnalyzer(analyzer, 1000)#1048576 
     config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer) 
     config.setOpenMode(IndexWriterConfig.OpenMode.CREATE) 
     writer = IndexWriter(store, config) 
     self.indexDocs(root, writer) 
     ticker = Ticker() 
     print 'commit index', 
     threading.Thread(target=ticker.run).start() 
     writer.commit() 
     writer.close() 
     ticker.tick = False 
     print 'done'
开发者ID:ouceduxzk,项目名称:AI2-Kaggle,代码行数:18,代码来源:indexer.py

示例12: delete

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def delete(primary_keys_map,collection_name,todelete,commit=False):
	INDEX_DIR_DEFAULT="IndexFiles.index"
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT

	try:
		tofind_keyvalue_pairs=json.loads(todelete)
	except:
		return 100	
	

	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)

	#setting writer configurations
	try:
		config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
		writer=IndexWriter(direc,config)
		ireader=IndexReader.open(direc)
	except:
		return 105

	###as of now deletion of documents support is only based on indexed keys.###################3 
	tofind_primary_keyvalue_pairs={}
	tofind_nonprimary_keyvalue_pairs={}

	#separating out primary and non_primary keys
	for key in tofind_keyvalue_pairs.keys():
		if key in primary_keys_map:
			tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
		else:
			tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]

	#filtering documents according to primary keys		
	query=BooleanQuery()
	for key in tofind_primary_keyvalue_pairs.keys():
		temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key])
		query.add(BooleanClause(temp,BooleanClause.Occur.MUST))

	a=writer.deleteDocuments(query)
	if commit==True:
		writer.commit()
	writer.close()
	return 000;
开发者ID:sudeep-sureshan,项目名称:Mini-NoSQL-Database,代码行数:49,代码来源:Handler_callable.py

示例13: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def __init__(self, fileRoot, storeDir, analyzer):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store    = SimpleFSDirectory(File(storeDir))
        analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        config   = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setSimilarity(similarities.BM25Similarity())
    #Available similarity: BM25Similarity, MultiSimilarity, PerFieldSimilarityWrapper, SimilarityBase, TFIDFSimilarity
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer   = IndexWriter(store, config)

        self.indexDocs(fileRoot, writer)
        print 'commit index',
        writer.commit()
        writer.close()
        print 'done'
开发者ID:huqiang,项目名称:CS3246-Assignment1,代码行数:20,代码来源:IndexFiles.py

示例14: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def __init__(self, destination_directory, analyzer):

        if not os.path.exists(destination_directory):
            os.mkdir(destination_directory)

        store = SimpleFSDirectory(File(destination_directory))
        analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.tweetIndexer(writer)
        ticker = Ticker()
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print 'done'
开发者ID:christian91cruz,项目名称:Twitter-Search-Engine-and-Data-Retrieval,代码行数:20,代码来源:indexTweets.py

示例15: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
    def __init__(self, root, storeDir, analyzer):
        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir)) # Store index files in the file syste. try NIOFSDirectory
        analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) # maxTokenCount=1048576, this analyzer limit the number of tokens per field, not necessary for indexing MEDLINE
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)

        self.indexDocs(root, writer)
        ticker = Ticker()
        print 'commit index',
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print 'done'
开发者ID:w2wei,项目名称:XPRC,代码行数:20,代码来源:IndexFiles.py


注:本文中的org.apache.lucene.index.IndexWriter.commit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。