本文整理汇总了Python中lucene.IndexWriter.setMaxFieldLength方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.setMaxFieldLength方法的具体用法?Python IndexWriter.setMaxFieldLength怎么用?Python IndexWriter.setMaxFieldLength使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类lucene.IndexWriter
的用法示例。
在下文中一共展示了IndexWriter.setMaxFieldLength方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: __init__
# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
def __init__(self, root, storeDir, analyzer):
if not os.path.exists(storeDir):
os.mkdir(storeDir)
store = FSDirectory.getDirectory(storeDir, True)
writer = IndexWriter(store, analyzer, True)
writer.setMaxFieldLength(1048576)
self.indexDocs(root, writer)
print 'optimizing index',
writer.optimize()
writer.close()
print 'done'
示例2: Index
# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
def Index():
field_list, conn, _config_dict = _InitIndexer()
indexDir = _config_dict["indexDir"]
if not os.path.exists(indexDir):
os.mkdir(indexDir)
store = SimpleFSDirectory(lucene.File(indexDir))
# print store
writer = IndexWriter(
store, SmartChineseAnalyzer(lucene.Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED
)
writer.setMaxFieldLength(1048576)
try:
ticker = Ticker()
ticker.start()
_IndexDocs(writer, field_list, conn)
ticker.end()
ticker.TimeCost()
except Exception, e:
print "Failed in Indexing...", e
traceback.print_exc()
示例3: IndexCorpus
# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
class IndexCorpus(object):
def __init__(self, index_dir, analyzer, compress=False):
self.metadata = True
if not os.path.exists(index_dir):
os.mkdir(index_dir)
self.compress = compress
store = SimpleFSDirectory(File(index_dir))
self.writer = IndexWriter(store, analyzer, True,
IndexWriter.MaxFieldLength.LIMITED)
self.writer.setMaxFieldLength(1048576)
if self.compress:
self.compressor = self.get_compressor()
def get_compressor(self):
path = '/Users/tal/corpus/analyzed/5/5344'
training_data = codecs.open(path, encoding='utf8').read()
return trained_short_string_compressor(training_data.encode('utf8'))
def finalize(self):
self.writer.optimize()
self.writer.close()
def index(self, directory):
files = [x for x in os.listdir(directory) if x.isdigit()]
for filename in sorted(files, key=int):
path = os.path.join(directory, filename)
if not filename.isdigit():
continue
if os.path.isdir(path):
self.index(path)
else:
if int(filename) % 100 == 0:
print datetime.now().ctime(), filename
try:
self.index_file(path)
except Exception, e:
print "Indexing exception:", e
示例4: SimpleFSDirectory
# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
env=lucene.initVM()
print 'Using Directory: ', STORE_DIR
notExist = 0
# both the main program and the background indexer will share the same directory and analyzer
if not os.path.exists(STORE_DIR):
os.mkdir(STORE_DIR)
notExist = 1
directory = SimpleFSDirectory(File(STORE_DIR))
analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
# we will need a writer
writer = IndexWriter(directory,analyzer,True,IndexWriter.MaxFieldLength.LIMITED)
writer.setMaxFieldLength(1048576)
if notExist == 1:
writer.close()
# and start the indexer
# note the indexer thread is set to daemon causing it to terminate on a SIGINT
folder = "tweets"
indexer = Indexer(STORE_DIR,writer,folder)
indexer.setDaemon(True)
indexer.start()
print 'Starting Indexer in background...'
run(writer, analyzer)
quit_gracefully()