本文整理汇总了Python中org.apache.lucene.index.IndexWriterConfig.setOpenMode方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriterConfig.setOpenMode方法的具体用法?Python IndexWriterConfig.setOpenMode怎么用?Python IndexWriterConfig.setOpenMode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.IndexWriterConfig
的用法示例。
在下文中一共展示了IndexWriterConfig.setOpenMode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: build_index
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def build_index():
lucene.initVM()
# post_dir = current_app.config['LOCAL_REPO_PATH'] + '/_posts/'
post_dir = '/Users/w3/data/github/codeif_backup'
index_store_dir = current_app.config['INDEX_STORE_DIR']
print post_dir
print index_store_dir
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
store = SimpleFSDirectory(File(index_store_dir))
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
indexDocs(post_dir, writer)
ticker = Ticker()
print 'commit index',
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
ticker.tick = False
print 'done'
示例2: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def __init__(self,root,storeDir,analyzer):
# Create the index dir if it does not exist
if not os.path.exists(storeDir):
os.mkdir(storeDir)
# the SimpleFSDirectory which the index will be written in
store = SimpleFSDirectory(File(storeDir))
analyzer = LimitTokenCountAnalyzer(analyzer,1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
# create a index writer
# atach the index dir and config info to it
writer = IndexWriter(store,config)
# call the indexing procedure
# indexing all the files in the directory specified by root
# write the index with writer
self.indexDocs(root,writer)
# start a ticker
ticker = Ticker()
print 'commit index'
threading.Thread(target=ticker.run).start()
writer.commit()
writer.close()
# stop the ticker when the indexing procedure completes
ticker.tick = False
print 'Done'
示例3: _get_writer
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def _get_writer(self, analyzer=None, create=False):
config = IndexWriterConfig(Version.LUCENE_CURRENT, self._analyzer)
if create:
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
if self._similarity is not None:
config.setSimilarity(self._similarity)
writer = IndexWriter(self._store, config)
return writer
示例4: open_writer
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def open_writer(self):
"""Open IndexWriter."""
if self.writer is None:
config = IndexWriterConfig(self.get_version(), self.get_analyzer())
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.writer = IndexWriter(self.dir, config)
else:
raise Exception("IndexWriter is already open")
示例5: deleteRec
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def deleteRec(self, pid):
config = IndexWriterConfig(self.analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
writer = IndexWriter(self.indexDir, config)
writer.deleteDocuments(Term('uid', pid))
writer.commit()
writer.close()
self.searcher = IndexSearcher(DirectoryReader.open(self.indexDir))
return
示例6: getWriter
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def getWriter(self, store, analyzer=None, create=False):
if analyzer is None:
analyzer = WhitespaceAnalyzer()
analyzer = LimitTokenCountAnalyzer(analyzer, 10000)
config = IndexWriterConfig(analyzer)
if create:
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
return writer
示例7: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def __init__(self, store_dir, context, analyzer):
if not os.path.exists(store_dir):
os.mkdir(store_dir)
store = SimpleFSDirectory(File(store_dir))
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.writer = IndexWriter(store, config)
self.index(context)
self.complete_index()
示例8: Indexer
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
class Indexer(object):
def __init__(self, **kwargs):
""" Initialize a new instance of the Indexer
:param output: The output directory of the underlying index
:param anaylzer: The overloaded analyzer to work with
"""
self.output = kwargs.get("root", "index")
if not os.path.exists(self.output):
os.mkdir(self.output)
self.analyzer = kwargs.get("analyzer", StandardAnalyzer(Version.LUCENE_CURRENT))
self.analyzer = LimitTokenCountAnalyzer(self.analyzer, 1048576)
self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.store = SimpleFSDirectory(File(self.output))
self.writer = IndexWriter(self.store, self.config)
self.create_field_types()
def index(self, document):
""" Given a new document, add it to the index.
:param document: The document to add to the indexer
"""
try:
self.writer.addDocument(document)
except Exception:
logger.exception("Failed to index the supplied document")
def shutdown(self):
""" Shutdown the currently processing indexer.
"""
try:
# self.writer.optimize()
self.writer.close()
except Exception:
logger.exception("Failed to shutdown the indexer correctly")
def create_field_types(self):
""" Create the field types that will be used to specify
what actions lucene should take on the various fields
supplied to index.
"""
self.field_clean = FieldType()
self.field_clean.setIndexed(True)
self.field_clean.setStored(True)
self.field_clean.setTokenized(False)
self.field_clean.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS)
self.field_dirty = FieldType()
self.field_dirty.setIndexed(True)
self.field_dirty.setStored(False)
self.field_dirty.setTokenized(True)
self.field_dirty.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)
示例9: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def __init__(self, indexDir):
if not os.path.exists(indexDir):
os.mkdir(indexDir)
store = SimpleFSDirectory(File(indexDir))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
self.writer = IndexWriter(store, config)
示例10: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = SimpleFSDirectory(File(storeDir))
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(store, config)
self.indexDocs(root, writer)
writer.commit()
writer.close()
示例11: get_writer
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def get_writer(index='index'):
store = SimpleFSDirectory(File(index))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
writer = IndexWriter(store, config)
return writer
示例12: WikiPageIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
class WikiPageIndex():
def __init__(self, index_dir):
#lucene.initVM(vmargs=['-Djava.awt.headless=true', '-Xmx4g'])
self.index_dir = index_dir
self.directory = SimpleFSDirectory(File(self.index_dir))
self.analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
self.config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
self.config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.searcher = IndexSearcher(DirectoryReader.open(self.directory))
def createIndex(self):
self.writer = IndexWriter(self.directory, self.config)
if not os.path.exists(self.index_dir):
os.mkdir(self.index_dir)
def addDocumentToIndex(self, title, text):
doc = Document()
doc.add(Field("Title", title, Field.Store.YES, Field.Index.ANALYZED))
doc.add(Field("Text", text, Field.Store.YES, Field.Index.ANALYZED))
self.writer.addDocument(doc)
def closeIndex(self):
self.writer.commit()
self.writer.close()
def searchIndex(self, queryString, field="Text", max_results=100):
query = QueryParser(Version.LUCENE_CURRENT, field, self.analyzer).parse(queryString)
scoreDocs = self.searcher.search(query, max_results).scoreDocs
log.debug("Found {0} documents for query [{1}]".format(len(scoreDocs), queryString))
docs = []
for scoreDoc in scoreDocs:
doc = self.searcher.doc(scoreDoc.doc)
log.debug(WikiPageIndex.cleanWikiText(doc.get("Text")))
#print("title: {0}\ncontents: {1}".format(doc.get("Title"), doc.get("Text")[:70]))
docs.append(doc)
return docs
@staticmethod
def cleanWikiText(text):
text = text.encode('ascii', 'ignore')
text = re.sub('(\[\[.*?\]\]|\{\{.*?\}\}|\{\|.*?\|\})', '', text)
text = re.sub('[^\na-zA-Z0-9\n_-]+', ' ', text)
text = re.sub('([ \t]*[\n]+[ \t]*)+', '\n', text)
return text.strip()
示例13: __init__
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def __init__(self, startJVM=False):
if startJVM:
lucene.initVM(vmargs=['-Djava.awt.headless=true'])
self.STORE_DIR = "index_dir"
self.store = SimpleFSDirectory(File(self.STORE_DIR))
tmp_analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
self.analyzer = LimitTokenCountAnalyzer(tmp_analyzer, 10000)
config = IndexWriterConfig(Version.LUCENE_CURRENT, self.analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
self.writer = IndexWriter(self.store, config)
示例14: dummyIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def dummyIndex(self):
"""
Create a dummy index - to avoid problems updating it
"""
config = IndexWriterConfig(self.analyzer)
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter(self.indexDir, config)
doc = Document()
doc.add(Field('uid', 'dummy', StringField.TYPE_STORED))
writer.addDocument(doc)
writer.commit()
writer.close()
return
示例15: buildIndex
# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setOpenMode [as 别名]
def buildIndex(self, inputFile):
analyzer = self.getAnalyzer()
iwconf = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
iwconf.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
writer = IndexWriter( SimpleFSDirectory( File(self.luceneDir) ), iwconf)
# read through input file and write out to lucene
counter = 0
linesReadCounter = 0
with open(inputFile, 'r') as lines:
linesRead = 0
for line in lines:
try:
linesRead+=1
if linesRead % 1000 == 0:
print "%d lines read" % linesRead
cui, concept = line.replace("\",\"", "\t").replace("\"", "").split("\t")
concept = concept.strip()
cui = cui.strip()
strNorm = self.normalizeCasePunct(concept)
strSorted = self.sortWords(strNorm)
strStemmed = self.stemWords(strNorm)
strStemmedSorted = self.stemWords(strSorted)
fdoc = Document()
counter +=1
fid = counter
fdoc.add( Field("id", unicode(fid), Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("cui", cui, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str", concept, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_norm", strNorm, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_sorted", strSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_stemmed", strStemmed, Field.Store.YES, Field.Index.NOT_ANALYZED))
fdoc.add( Field("str_stemmedSorted", strStemmedSorted, Field.Store.YES, Field.Index.NOT_ANALYZED))
writer.addDocument(fdoc)
if fid % 1000 == 0:
writer.commit()
except:
"Skipping line: %s" % line
writer.commit()
writer.close()