当前位置: 首页>>代码示例>>Python>>正文


Python IndexWriter.setMaxFieldLength方法代码示例

本文整理汇总了Python中lucene.IndexWriter.setMaxFieldLength方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriter.setMaxFieldLength方法的具体用法?Python IndexWriter.setMaxFieldLength怎么用?Python IndexWriter.setMaxFieldLength使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lucene.IndexWriter的用法示例。


在下文中一共展示了IndexWriter.setMaxFieldLength方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
    def __init__(self, root, storeDir, analyzer):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)
        store = FSDirectory.getDirectory(storeDir, True)
        writer = IndexWriter(store, analyzer, True)
        writer.setMaxFieldLength(1048576)
        self.indexDocs(root, writer)
        print 'optimizing index',
        writer.optimize()
        writer.close()
        print 'done'
开发者ID:christineyen,项目名称:location-extraction,代码行数:14,代码来源:IndexFiles.py

示例2: Index

# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
def Index():
    field_list, conn, _config_dict = _InitIndexer()

    indexDir = _config_dict["indexDir"]
    if not os.path.exists(indexDir):
        os.mkdir(indexDir)
    store = SimpleFSDirectory(lucene.File(indexDir))
    # print store
    writer = IndexWriter(
        store, SmartChineseAnalyzer(lucene.Version.LUCENE_CURRENT), True, IndexWriter.MaxFieldLength.LIMITED
    )
    writer.setMaxFieldLength(1048576)
    try:
        ticker = Ticker()
        ticker.start()
        _IndexDocs(writer, field_list, conn)
        ticker.end()
        ticker.TimeCost()
    except Exception, e:
        print "Failed in Indexing...", e
        traceback.print_exc()
开发者ID:BurnedRobot,项目名称:SearchEngine,代码行数:23,代码来源:Indexer.py

示例3: IndexCorpus

# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
class IndexCorpus(object):

    def __init__(self, index_dir, analyzer, compress=False):
        self.metadata = True
        if not os.path.exists(index_dir):
            os.mkdir(index_dir)

        self.compress = compress
        store = SimpleFSDirectory(File(index_dir))
        self.writer = IndexWriter(store, analyzer, True, 
                IndexWriter.MaxFieldLength.LIMITED)
        self.writer.setMaxFieldLength(1048576)

        if self.compress:
            self.compressor = self.get_compressor()
        
    def get_compressor(self):
        path = '/Users/tal/corpus/analyzed/5/5344'
        training_data = codecs.open(path, encoding='utf8').read()
        return trained_short_string_compressor(training_data.encode('utf8'))

    def finalize(self):
        self.writer.optimize()
        self.writer.close()

    def index(self, directory):
        files = [x for x in os.listdir(directory) if x.isdigit()]
        for filename in sorted(files, key=int):
            path = os.path.join(directory, filename)
            if not filename.isdigit():
                continue
            if os.path.isdir(path):
                self.index(path)
            else:
                if int(filename) % 100 == 0:
                    print datetime.now().ctime(), filename
                try:
                    self.index_file(path)
                except Exception, e:
                    print "Indexing exception:", e
开发者ID:TalLinzen,项目名称:hebrew-blog-corpus,代码行数:42,代码来源:build.py

示例4: SimpleFSDirectory

# 需要导入模块: from lucene import IndexWriter [as 别名]
# 或者: from lucene.IndexWriter import setMaxFieldLength [as 别名]
	env=lucene.initVM()
	print 'Using Directory: ', STORE_DIR
	
	notExist = 0
        
        # both the main program and the background indexer will share the same directory and analyzer
	if not os.path.exists(STORE_DIR):
		os.mkdir(STORE_DIR)
		notExist = 1
		
	directory = SimpleFSDirectory(File(STORE_DIR))
	analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
	
	# we will need a writer
	writer = IndexWriter(directory,analyzer,True,IndexWriter.MaxFieldLength.LIMITED)
	writer.setMaxFieldLength(1048576)
	
	if notExist == 1:
		writer.close()
	
	# and start the indexer
	# note the indexer thread is set to daemon causing it to terminate on a SIGINT
	folder = "tweets"
	indexer = Indexer(STORE_DIR,writer,folder)
	indexer.setDaemon(True)
	indexer.start()
	print 'Starting Indexer in background...'
	
	run(writer, analyzer)
	quit_gracefully()
开发者ID:greedo,项目名称:TweetCrowdRanking,代码行数:32,代码来源:crowdIndexer.py


注:本文中的lucene.IndexWriter.setMaxFieldLength方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。