当前位置: 首页>>代码示例>>Python>>正文


Python IndexWriterConfig.setSimilarity方法代码示例

本文整理汇总了Python中org.apache.lucene.index.IndexWriterConfig.setSimilarity方法的典型用法代码示例。如果您正苦于以下问题:Python IndexWriterConfig.setSimilarity方法的具体用法?Python IndexWriterConfig.setSimilarity怎么用?Python IndexWriterConfig.setSimilarity使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.IndexWriterConfig的用法示例。


在下文中一共展示了IndexWriterConfig.setSimilarity方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
    def __init__(self, path, settings):
        self._settings = settings
        self._multithreaded = settings.multithreaded
        self._checker = DirectSpellChecker()
        indexDirectory = MMapDirectory(File(join(path, 'index')))
        indexDirectory.setUseUnmap(False)
        taxoDirectory = MMapDirectory(File(join(path, 'taxo')))
        taxoDirectory.setUseUnmap(False)
        conf = IndexWriterConfig(Version.LUCENE_4_10_0, settings.analyzer)
        conf.setSimilarity(settings.similarity)
        mergePolicy = TieredMergePolicy()
        mergePolicy.setMaxMergeAtOnce(settings.maxMergeAtOnce)
        mergePolicy.setSegmentsPerTier(settings.segmentsPerTier)
        conf.setMergePolicy(mergePolicy)

        if not settings.readonly:
            self._indexWriter = IndexWriter(indexDirectory, conf)
            self._indexWriter.commit()
            self._taxoWriter = DirectoryTaxonomyWriter(taxoDirectory, IndexWriterConfig.OpenMode.CREATE_OR_APPEND, LruTaxonomyWriterCache(settings.lruTaxonomyWriterCacheSize))
            self._taxoWriter.commit()

        self._indexAndTaxonomy = IndexAndTaxonomy(settings, indexDirectory, taxoDirectory)
        self._readerSettingsWrapper = self._indexAndTaxonomy._readerSettingsWrapper

        self._facetsConfig = settings.fieldRegistry.facetsConfig

        self._ordinalsReader = CachedOrdinalsReader(DocValuesOrdinalsReader())
开发者ID:jerryba,项目名称:meresco-lucene,代码行数:29,代码来源:index.py

示例2: _get_writer

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
 def _get_writer(self, analyzer=None, create=False):
     config = IndexWriterConfig(Version.LUCENE_CURRENT, self._analyzer)
     if create:
         config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
     if self._similarity is not None:
         config.setSimilarity(self._similarity)
     writer = IndexWriter(self._store, config)
     return writer
开发者ID:XihuanZeng,项目名称:kaggle,代码行数:10,代码来源:LuceneCorpus.py

示例3: createIndexWriter

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
    def createIndexWriter(self, actual_dir, max_field_length=20000000):
        """
            Returns an IndexWriter object created for the actual_dir specified
        """
        ensureDirExists(actual_dir)
        index = SimpleFSDirectory(File(actual_dir))
        analyzer = StandardAnalyzer(LuceneVersion.LUCENE_CURRENT)

        writerConfig=IndexWriterConfig(LuceneVersion.LUCENE_CURRENT, analyzer)
        similarity=FieldAgnosticSimilarity()

        writerConfig.setSimilarity(similarity)
        writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE)

    ##    res= IndexWriter(index, analyzer, True, IndexWriter.MaxFieldLength(max_field_length))
        res= IndexWriter(index, writerConfig)
        res.deleteAll()
        return res
开发者ID:danieldmm,项目名称:minerva,代码行数:20,代码来源:lucene_index.py

示例4: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
    def __init__(self, fileRoot, storeDir, analyzer):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store    = SimpleFSDirectory(File(storeDir))
        analyzer = LimitTokenCountAnalyzer(analyzer, 1048576)
        config   = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setSimilarity(similarities.BM25Similarity())
    #Available similarity: BM25Similarity, MultiSimilarity, PerFieldSimilarityWrapper, SimilarityBase, TFIDFSimilarity
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer   = IndexWriter(store, config)

        self.indexDocs(fileRoot, writer)
        print 'commit index',
        writer.commit()
        writer.close()
        print 'done'
开发者ID:huqiang,项目名称:CS3246-Assignment1,代码行数:20,代码来源:IndexFiles.py

示例5: __init__

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
    def __init__(self, storeDir, aWrapper):

        if not os.path.exists(storeDir):
            os.mkdir(storeDir)

        store = SimpleFSDirectory(File(storeDir))
        aWrapper = LimitTokenCountAnalyzer(aWrapper, 1048576)
        bm25Sim = BM25Similarity(2.0,0.75) #BM25 with these default values: k1 = 1.2, b = 0.75.
        config = IndexWriterConfig(Version.LUCENE_CURRENT, aWrapper)
        config.setSimilarity(bm25Sim)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE)
        writer = IndexWriter(store, config)


        self.indexTable(writer)
        ticker = Ticker()
        print 'commit index'
        threading.Thread(target=ticker.run).start()
        writer.commit()
        writer.close()
        ticker.tick = False
        print 'done'
开发者ID:PhoenixZhao,项目名称:MovieSearchService,代码行数:24,代码来源:IndexMysql.py

示例6: IndexingEngine

# 需要导入模块: from org.apache.lucene.index import IndexWriterConfig [as 别名]
# 或者: from org.apache.lucene.index.IndexWriterConfig import setSimilarity [as 别名]
class IndexingEngine():

	def __init__(self):

		self.mDocumentDirectory = settings.ADMINS_ENGINE.mDocumentDirectory
		self.mIndexDirectory = settings.ADMINS_ENGINE.mIndexDirectory
		self.mAnalyzers = settings.ADMINS_ENGINE.getIndexingAnalyzers()


		############################# Writer Configurattion #####################################
		map = HashMap()
		map.put('name', self.mAnalyzers['name'])
		map.put('parent', self.mAnalyzers['parent'])
		map.put('content', self.mAnalyzers['default'])
		map.put('id', self.mAnalyzers['id'])		

		analyzerWrapper = PerFieldAnalyzerWrapper(self.mAnalyzers['default'], map)

		self.mWriterConfig = IndexWriterConfig(Version.LUCENE_CURRENT, analyzerWrapper)
		self.mWriterConfig.setOpenMode(settings.ADMINS_ENGINE.mOpenMode)

		if settings.ADMINS_ENGINE.mSimilarity != None:
			self.mWriterConfig.setSimilarity(settings.ADMINS_ENGINE.mSimilarity)
		########################################################################################


		directory = SimpleFSDirectory(File(self.mIndexDirectory))
		self.mIndexWriter = IndexWriter(directory, self.mWriterConfig)


		############################# FieldType Prepration #####################
		nameField = FieldType()
		nameField.setIndexed(True)
		nameField.setStored(True)
		nameField.setTokenized(True)
		nameField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY)

		parentField = FieldType()
		parentField.setIndexed(True)
		parentField.setStored(True)
		parentField.setTokenized(True)
		parentField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY)

		contentField = FieldType()
		contentField.setIndexed(True)
		contentField.setStored(True)
		contentField.setTokenized(True)
		contentField.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS)

		idField = FieldType()
		idField.setIndexed(True)
		idField.setStored(True)
		idField.setTokenized(False)
		idField.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY)


		self.mFieldTypes = {
			'name' 		: nameField,
			'parent'	: parentField,
			'content'	: contentField,
			'id'		: idField
		}
		#######################################################################

		self.mLog = ""

	

	def indexing(self, root = settings.ADMINS_ENGINE.mDocumentDirectory, parent = [], docID = 1, parentID = 0, id = 0):

		realPath = os.path.abspath(root)
		for i in os.listdir(realPath):

			path = os.path.join(realPath, i)
			if os.path.isfile(path):
				#index this file
				doc = Document()

				doc.add(Field('name', ("%s %s" % (' '.join(parent), i)).strip(), self.mFieldTypes['name']))
				doc.add(Field('parent', ' '.join(parent), self.mFieldTypes['parent']))
				doc.add(Field('id', str(docID), self.mFieldTypes['id']))
				doc.add(Field('parentID', str(parentID), self.mFieldTypes['id']))

				fd = open(path, 'r')
				content = fd.read()
				fd.close()

				if len(content) > 0:
					doc.add(Field('content', content, self.mFieldTypes['content']))

				self.mIndexWriter.addDocument(doc)
				##################### Logging ##############################
				if IS_DEBUG:
					nameDebug = AnalyzerDebug.debug(self.mAnalyzers['name'], ("%s %s" % (' '.join(parent), i)).strip())
					parentDebug = AnalyzerDebug.debug(self.mAnalyzers['parent'], ' '.join(parent))
					contentDebug = AnalyzerDebug.debug(self.mAnalyzers['default'], content)
					self.mLog = self.mLog + ( "File %s\n   {name - %s}: %s\n   {parent - %s}: %s\n   {content}: %s\n\n" % (path, docID, nameDebug, parentID, parentDebug, contentDebug) )



#.........这里部分代码省略.........
开发者ID:haonguyen14,项目名称:CLIFinder,代码行数:103,代码来源:IndexingEngine.py


注:本文中的org.apache.lucene.index.IndexWriterConfig.setSimilarity方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。