本文整理汇总了Python中org.apache.lucene.index.IndexWriter.commit方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.commit方法的具体用法?Python IndexWriter.commit怎么用?Python IndexWriter.commit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.IndexWriter
的用法示例。
在下文中一共展示了IndexWriter.commit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_index
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def build_index():
lucene.initVM()
# post_dir = current_app.config['LOCAL_REPO_PATH'] + '/_posts/'
post_dir = '/Users/w3/data/github/codeif_backup'
index_store_dir = current_app.config['INDEX_STORE_DIR']
print post_dir
print index_store_dir
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
store = SimpleFSDirectory(File(index_store_dir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
indexDocs(post_dir, writer)
ticker = Ticker()
print 'commit index',
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'
示例2: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self,root,storeDir,analyzer):
# Create the index dir if it does not exist
if not os.path.exists(storeDir):
os.mkdir(storeDir)
# the SimpleFSDirectory which the index will be written in
store = SimpleFSDirectory(File(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer,1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
# create a index writer
# atach the index dir and config info to it
writer = IndexWriter(store,config)
# call the indexing procedure
# indexing all the files in the directory specified by root
# write the index with writer
self.indexDocs(root,writer)
# start a ticker
ticker = Ticker()
print 'commit index'
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
# stop the ticker when the indexing procedure completes
ticker.tick = False
print 'Done'
示例3: deleteRec
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def deleteRec(self, pid):
config = IndexWriterConfig(self.analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
writer = IndexWriter(self.indexDir, config)
writer.deleteDocuments(Term('uid', pid))
writer.commit()
writer.close()
self.searcher = IndexSearcher(DirectoryReader.open(self.indexDir))
return
示例4: index
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def index(self, data):
writer = IndexWriter(
self.d, self.conf)
doc = self.buildDocument(data['fields'], data['record'])
writer.addDocument(doc)
writer.commit()
writer.close()
示例5: rebuildIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def rebuildIndex(self, data):
writer = IndexWriter(
self.d, self.conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE))
for record in data['records']:
doc = self.buildDocument(data['fields'], record)
writer.addDocument(doc)
writer.commit()
writer.close()
示例6: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir))
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(root, writer)
writer.commit()
writer.close()
示例7: WikiPageIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
class WikiPageIndex():
def __init__(self, index_dir):
#lucene.initVM(vmargs=['-Djava.awt.headless=true', '-Xmx4g'])
self.index_dir = index_dir
self.directory = SimpleFSDirectory(File(self.index_dir))
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.searcher = IndexSearcher(DirectoryReader.open(self.directory))
def createIndex(self):
self.writer = IndexWriter(self.directory, self.config)
if not os.path.exists(self.index_dir):
os.mkdir(self.index_dir)
def addDocumentToIndex(self, title, text):
doc = Document()
doc.add(Field("Title", title, Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("Text", text, Field.Store.YES, Field.Index.ANALYZED))
self.writer.addDocument(doc)
def closeIndex(self):
self.writer.commit()
self.writer.close()
def searchIndex(self, queryString, field="Text", max_results=100):
query = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(queryString)
scoreDocs = self.searcher.search(query, max_results).scoreDocs
log.debug("Found {0} documents for query [{1}]".format(len(scoreDocs), queryString))
docs = []
for scoreDoc in scoreDocs:
doc = self.searcher.doc(scoreDoc.doc)
log.debug(WikiPageIndex.cleanWikiText(doc.get("Text")))
#print("title: {0}\ncontents: {1}".format(doc.get("Title"), doc.get("Text")[:70]))
docs.append(doc)
return docs
@staticmethod
def cleanWikiText(text):
text = text.encode('ascii', 'ignore')
text = re.sub('(\[\[.*?\]\]|\{\{.*?\}\}|\{\|.*?\|\})', '', text)
text = re.sub('[^\na-zA-Z0-9\n_-]+', ' ', text)
text = re.sub('([ \t]*[\n]+[ \t]*)+', '\n', text)
return text.strip()
示例8: dummyIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def dummyIndex(self):
"""
Create a dummy index - to avoid problems updating it
"""
config = IndexWriterConfig(self.analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(self.indexDir, config)
doc = Document()
doc.add(Field('uid', 'dummy', StringField.TYPE_STORED))
writer.addDocument(doc)
writer.commit()
writer.close()
return
示例9: buildIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def buildIndex(self, inputFile):
analyzer = self.getAnalyzer()
iwconf = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
iwconf.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter( SimpleFSDirectory( File(self.luceneDir) ), iwconf)
# read through input file and write out to lucene
counter = 0
linesReadCounter = 0
with open(inputFile, 'r') as lines:
linesRead = 0
for line in lines:
try:
linesRead+=1
if linesRead % 1000 == 0:
print "%d lines read" % linesRead
cui, concept = line.replace("\",\"", "\t").replace("\"", "").split("\t")
concept = concept.strip()
cui = cui.strip()
strNorm = self.normalizeCasePunct(concept)
strSorted = self.sortWords(strNorm)
strStemmed = self.stemWords(strNorm)
strStemmedSorted = self.stemWords(strSorted)
fdoc = Document()
counter +=1
fid = counter
fdoc.add( Field("id", unicode(fid), Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("cui", cui, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str", concept, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_norm", strNorm, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_sorted", strSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_stemmed", strStemmed, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_stemmedSorted", strStemmedSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.addDocument(fdoc)
if fid % 1000 == 0:
writer.commit()
except:
"Skipping line: %s" % line
writer.commit()
writer.close()
示例10: commit
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def commit(collection_name):
if collection_name!="DEFAULT":
INDEX_DIR=collection_name
else:
INDEX_DIR=INDEX_DIR_DEFAULT
direc=SimpleFSDirectory(File(INDEX_DIR))
analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)
#setting writer configurations
config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
writer=IndexWriter(direc,config)
writer.commit()
writer.close()
示例11: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1000)#1048576
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(root, writer)
ticker = Ticker()
print 'commit index',
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'
示例12: delete
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def delete(primary_keys_map,collection_name,todelete,commit=False):
INDEX_DIR_DEFAULT="IndexFiles.index"
if collection_name!="DEFAULT":
INDEX_DIR=collection_name
else:
INDEX_DIR=INDEX_DIR_DEFAULT
try:
tofind_keyvalue_pairs=json.loads(todelete)
except:
return 100
direc=SimpleFSDirectory(File(INDEX_DIR))
analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)
#setting writer configurations
try:
config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
writer=IndexWriter(direc,config)
ireader=IndexReader.open(direc)
except:
return 105
###as of now deletion of documents support is only based on indexed keys.###################3
tofind_primary_keyvalue_pairs={}
tofind_nonprimary_keyvalue_pairs={}
#separating out primary and non_primary keys
for key in tofind_keyvalue_pairs.keys():
if key in primary_keys_map:
tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
else:
tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
#filtering documents according to primary keys
query=BooleanQuery()
for key in tofind_primary_keyvalue_pairs.keys():
temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key])
query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
a=writer.deleteDocuments(query)
if commit==True:
writer.commit()
writer.close()
return 000;
示例13: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self, fileRoot, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setSimilarity(similarities.BM25Similarity())
#Available similarity: BM25Similarity, MultiSimilarity, PerFieldSimilarityWrapper, SimilarityBase, TFIDFSimilarity
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(fileRoot, writer)
print 'commit index',
writer.commit()
writer.close()
print 'done'
示例14: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self, destination_directory, analyzer):
if not os.path.exists(destination_directory):
os.mkdir(destination_directory)
store = SimpleFSDirectory(File(destination_directory))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.tweetIndexer(writer)
ticker = Ticker()
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'
示例15: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriter [as 别名]
# 或者: from org.apache.lucene.index.IndexWriter import commit [as 别名]
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir)) # Store index files in the file syste. try NIOFSDirectory
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576) # maxTokenCount=1048576, this analyzer limit the number of tokens per field, not necessary for indexing MEDLINE
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(root, writer)
ticker = Ticker()
print 'commit index',
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'