当前位置: 首页>>代码示例>>Java>>正文


Java IndexWriterConfig.setRAMBufferSizeMB方法代码示例

本文整理汇总了Java中org.apache.lucene.index.IndexWriterConfig.setRAMBufferSizeMB方法的典型用法代码示例。如果您正苦于以下问题:Java IndexWriterConfig.setRAMBufferSizeMB方法的具体用法?Java IndexWriterConfig.setRAMBufferSizeMB怎么用?Java IndexWriterConfig.setRAMBufferSizeMB使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.IndexWriterConfig的用法示例。


在下文中一共展示了IndexWriterConfig.setRAMBufferSizeMB方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getIndexWriterConfig

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
private IndexWriterConfig getIndexWriterConfig(boolean create) {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(deletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    MergePolicy mergePolicy = config().getMergePolicy();
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    mergePolicy = new ElasticsearchMergePolicy(mergePolicy);
    iwc.setMergePolicy(mergePolicy);
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    return iwc;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:25,代码来源:InternalEngine.java

示例2: prepareIndex

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
public void prepareIndex() throws IOException {
    File globalWFMDIr = new File(Util.GTPM_INDEX_DIR);
    if (!globalWFMDIr.exists()) {
        Util.createDirs(Util.GTPM_INDEX_DIR);
    }
    KeywordAnalyzer keywordAnalyzer = new KeywordAnalyzer();
    IndexWriterConfig wfmIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_46, keywordAnalyzer);
    wfmIndexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    wfmIndexWriterConfig.setRAMBufferSizeMB(1024);

    logger.info("PREPARE INDEX");
    try {
        wfmIndexWriter = new IndexWriter(FSDirectory.open(new File(Util.GTPM_INDEX_DIR)), wfmIndexWriterConfig);
        wfmIndexWriter.commit();
        wfmIndexer = new DocumentMaker(wfmIndexWriter);
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:Mondego,项目名称:SourcererCC,代码行数:20,代码来源:WordFrequencyStore.java

示例3: testSampler

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
/**
 * Uses the sampler aggregation to find the minimum value of a field out of the top 3 scoring documents in a search.
 */
public void testSampler() throws IOException {
    TextFieldType textFieldType = new TextFieldType();
    textFieldType.setIndexAnalyzer(new NamedAnalyzer("foo", AnalyzerScope.GLOBAL, new StandardAnalyzer()));
    MappedFieldType numericFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
    numericFieldType.setName("int");

    IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
    indexWriterConfig.setMaxBufferedDocs(100);
    indexWriterConfig.setRAMBufferSizeMB(100); // flush on open to have a single segment with predictable docIds
    try (Directory dir = newDirectory();
            IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
        for (long value : new long[] {7, 3, -10, -6, 5, 50}) {
            Document doc = new Document();
            StringBuilder text = new StringBuilder();
            for (int i = 0; i < value; i++) {
                text.append("good ");
            }
            doc.add(new Field("text", text.toString(), textFieldType));
            doc.add(new SortedNumericDocValuesField("int", value));
            w.addDocument(doc);
        }

        SamplerAggregationBuilder aggBuilder = new SamplerAggregationBuilder("sampler")
                .shardSize(3)
                .subAggregation(new MinAggregationBuilder("min")
                        .field("int"));
        try (IndexReader reader = DirectoryReader.open(w)) {
            assertEquals("test expects a single segment", 1, reader.leaves().size());
            IndexSearcher searcher = new IndexSearcher(reader);
            Sampler sampler = searchAndReduce(searcher, new TermQuery(new Term("text", "good")), aggBuilder, textFieldType,
                    numericFieldType);
            Min min = sampler.getAggregations().get("min");
            assertEquals(5.0, min.getValue(), 0);
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:SamplerAggregatorTests.java

示例4: FbEntityIndexer

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
public FbEntityIndexer(String namefile, String outputDir, String indexingStrategy) throws IOException {

    if (!indexingStrategy.equals("exact") && !indexingStrategy.equals("inexact"))
      throw new RuntimeException("Bad indexing strategy: " + indexingStrategy);

    IndexWriterConfig config =  new IndexWriterConfig(Version.LUCENE_44 , indexingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44));
    config.setOpenMode(OpenMode.CREATE);
    config.setRAMBufferSizeMB(256.0);
    indexer = new IndexWriter(new SimpleFSDirectory(new File(outputDir)), config);

    this.nameFile = namefile;
  }
 
开发者ID:cgraywang,项目名称:TextHIN,代码行数:13,代码来源:FbEntityIndexer.java

示例5: CreateIndex

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
public CreateIndex(String srcDir, String indexDir) throws IOException {
	this.srcDirName = srcDir;
	sentenceTokenizer = new SentenceTokenizer(new BufferedReader(
			new StringReader("DUMMY")));

	Directory d = new MMapDirectory(new File(indexDir));
	Analyzer a = new TreeAnalyzer(new LRDP(LRDP.PhysicalPayloadFormat.BYTE1111));
	IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_42, a);
	c.setRAMBufferSizeMB(1024);
	writer = new IndexWriter(d, c);
	ft = createFieldType();
}
 
开发者ID:arne-cl,项目名称:fangorn,代码行数:13,代码来源:CreateIndex.java

示例6: createIndexWriterConfig

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
public IndexWriterConfig createIndexWriterConfig() throws IOException {
  IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LATEST, getIndexAnalyzerInstance());
  indexWriterConfig.setRAMBufferSizeMB(48);

  MergePolicy mergePolicy = getPluginManager().getInstance(LindenConfigBuilder.MERGE_POLICY, MergePolicy.class);
  if (mergePolicy != null) {
    indexWriterConfig.setMergePolicy(mergePolicy);
  }
  LOGGER.info("Merge policy : {}", mergePolicy == null ? "Default" : mergePolicy);

  ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
  cms.setMaxMergesAndThreads(8, 1);
  indexWriterConfig.setMergeScheduler(cms);
  return indexWriterConfig;
}
 
开发者ID:XiaoMi,项目名称:linden,代码行数:16,代码来源:LindenConfig.java

示例7: openStreams

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
/**
 * Opens all the needed streams that the engine needs to work properly.
 * 
 * @throws IndexException
 */
private void openStreams() throws IndexException {
	try {
		if (_nativeLocking) {
			_storage = FSDirectory.open(new File(INDEX_DIR), new NativeFSLockFactory(INDEX_DIR));
		} else {
			_storage = FSDirectory.open(new File(INDEX_DIR));
		}

		IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_32 ,ANALYZER);
		conf.setMaxBufferedDocs(_maxDocsBuffer);
		conf.setRAMBufferSizeMB(_maxRAMBufferSize);

		_iWriter = new IndexWriter(_storage, conf);
	} catch (IOException e) {
		closeAll();

		throw new IndexException("Unable to initialize the index", e);
	}
}
 
开发者ID:drftpd-ng,项目名称:drftpd3,代码行数:25,代码来源:LuceneEngine.java

示例8: instantiateWriter

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
private LuceneIndexWriter instantiateWriter( File directory )
{
    try
    {
        IndexWriterConfig writerConfig = new IndexWriterConfig( type.analyzer );
        writerConfig.setRAMBufferSizeMB( determineGoodBufferSize( writerConfig.getRAMBufferSizeMB() ) );
        Directory luceneDir = getDirectory( directory, identifier );
        return IndexWriterFactories.batchInsert( writerConfig ).create( luceneDir );
    }
    catch ( IOException e )
    {
        throw new RuntimeException( e );
    }
}
 
开发者ID:neo4j-contrib,项目名称:neo4j-lucene5-index,代码行数:15,代码来源:LuceneBatchInserterIndex.java

示例9: create_vocabulary_index

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
public void create_vocabulary_index(File vocabulary_file) throws IOException{
	File index_dir = new File(_index_dir, "vocab");
	if(index_dir.exists()){
		LOG.info("Vocabulary index already exists in directory '{}'.", index_dir.getAbsolutePath());
		if(_overwrite){
			LOG.info("Overwriting index '{}',", index_dir);
			index_dir.delete();
		}
		else
			return;
	}
	index_dir.mkdirs();
	Analyzer analyzer = new KeywordAnalyzer();
	IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
	iwc.setOpenMode(OpenMode.CREATE);
	iwc.setRAMBufferSizeMB(1024.0);
	Directory directory = new MMapDirectory(index_dir);
	IndexWriter writer_vocab = new IndexWriter(directory, iwc);

	InputStream in = new FileInputStream(vocabulary_file);
	if(vocabulary_file.getName().endsWith(".gz"))
		in = new GZIPInputStream(in);
	LineIterator iter = new LineIterator(new BufferedReader(new InputStreamReader(in,"UTF-8")));
	Document doc = new Document();
	Field f_word = new StringField("word", "", Field.Store.YES); doc.add(f_word);
	long c = 0;
	while(iter.hasNext()){
		if(++c % 10000 == 0)
			LOG.info("Adding {}'th word.", c);
		String line = iter.next();
		try{
			String word = line.trim();
			f_word.setStringValue(word);
			writer_vocab.addDocument(doc);
		}catch(Exception e){
			LOG.warn("Could not process line '{}' in file '{}', malformed line.", line, vocabulary_file, e);
		}
	}

	writer_vocab.forceMergeDeletes();
	writer_vocab.commit();
	writer_vocab.close();
}
 
开发者ID:tudarmstadt-lt,项目名称:topicrawler,代码行数:44,代码来源:GenerateNgramIndex.java

示例10: init

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
/**
 * Builds a new {@link FSIndex}.
 *
 * @param name
 *            the index name
 * @param mbeanName
 *            the JMX MBean object name
 * @param path
 *            the directory path
 * @param analyzer
 *            the index writer analyzer
 * @param refresh
 *            the index reader refresh frequency in seconds
 * @param ramBufferMB
 *            the index writer RAM buffer size in MB
 * @param maxMergeMB
 *            the directory max merge size in MB
 * @param maxCachedMB
 *            the directory max cache size in MB
 * @param refreshTask
 *            action to be done during refresh
 */
public void init(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB,
		int maxMergeMB, int maxCachedMB, Runnable refreshTask) {
	try {

		this.path = path;
		this.name = name;

		// Open or create directory
		FSDirectory fsDirectory = FSDirectory.open(path);
		this.directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

		// Setup index writer
		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
		indexWriterConfig.setRAMBufferSizeMB(ramBufferMB);
		indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
		indexWriterConfig.setUseCompoundFile(true);
		indexWriterConfig.setMergePolicy(new TieredMergePolicy());
		this.indexWriter = new IndexWriter(this.directory, indexWriterConfig);

		// Setup NRT search
		SearcherFactory searcherFactory = new SearcherFactory() {
			@Override
			public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) {
				if (refreshTask != null) {
					refreshTask.run();
				}
				IndexSearcher searcher = new IndexSearcher(reader);
				searcher.setSimilarity(new NoIDFSimilarity());
				return searcher;
			}
		};
		TrackingIndexWriter trackingWriter = new TrackingIndexWriter(this.indexWriter);
		this.searcherManager = new SearcherManager(this.indexWriter, true, searcherFactory);
		this.searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, this.searcherManager, refresh,
				refresh);
		this.searcherReopener.start();

		// Register JMX MBean
		// mbean = new ObjectName(mbeanName);
		// ManagementFactory.getPlatformMBeanServer().registerMBean(service,
		// this.mbean);

	} catch (Exception e) {
		throw new FhirIndexException(e, "Error while creating index %s", name);
	}
}
 
开发者ID:jmiddleton,项目名称:cassandra-fhir-index,代码行数:69,代码来源:LuceneService.java

示例11: initialize

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
private void initialize(File indexPath, int kmerSize, boolean minStrandKmer, Similarity similarity, int workerThreads, int ramBufferSize) throws Exception {
    if(!indexPath.exists()) {
        indexPath.mkdirs();
    }
    
    if(indexPath.exists()) {
        cleanUpDirectory(indexPath);
    }
    
    this.indexPath = indexPath;
    this.minStrandKmer = minStrandKmer;
    this.analyzer = new KmerIndexAnalyzer(kmerSize, minStrandKmer);
    Directory dir = new MMapDirectory(this.indexPath.toPath()); 
    IndexWriterConfig config = new IndexWriterConfig(this.analyzer); 
    if(similarity != null) {
        config.setSimilarity(similarity);
    }
    
    this.workerThreads = workerThreads;
    
    if(ramBufferSize > 0) {
        config.setRAMBufferSizeMB(ramBufferSize);
    }
    
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    this.indexWriter = new IndexWriter(dir, config);
    
    this.executor = new BlockingExecutor(this.workerThreads, this.workerThreads * 2);
    
    for(int i=0;i<this.workerThreads;i++) {
        Document doc = new Document();
        Field filenameField = new StringField(IndexConstants.FIELD_FILENAME, "", Field.Store.YES);
        Field headerField = new StringField(IndexConstants.FIELD_HEADER, "", Field.Store.YES);
        Field sequenceDirectionField = new StringField(IndexConstants.FIELD_SEQUENCE_DIRECTION, "", Field.Store.YES);
        Field taxonTreeField = new StringField(IndexConstants.FIELD_TAXONOMY_TREE, "", Field.Store.YES);
        Field sequenceField = new TextField(IndexConstants.FIELD_SEQUENCE, "", Field.Store.NO);

        doc.add(filenameField);
        doc.add(headerField);
        doc.add(sequenceDirectionField);
        doc.add(taxonTreeField);
        doc.add(sequenceField);
        
        this.freeQueue.offer(doc);
    }
}
 
开发者ID:iychoi,项目名称:biospectra,代码行数:47,代码来源:Indexer.java

示例12: build

import org.apache.lucene.index.IndexWriterConfig; //导入方法依赖的package包/类
/**
 * Construye un índice a partir de una colección de documentos de texto plano.
 * Usando Lucene
 * 
 * @param inputCollectionPath ruta de la carpeta en la que se encuentran los documentos a indexar
 * @param outputIndexPath la ruta de la carpeta en la que almacenar el índice creado
 * @param textParser parser de texto que procesará el texto de los documentos para su indexación
 */
@Override
public void build(String inputCollectionPath, String outputIndexPath, TextParser textParser) {
    
    try {
        
        Directory dir = FSDirectory.open(new File(outputIndexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);
        ZipFile zip = new ZipFile(inputCollectionPath);
        Enumeration<? extends ZipEntry> entries = zip.entries();
        
        // Crear o reemplazar un indice 
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(512.0);
        IndexWriter writer = new IndexWriter(dir, iwc);

        while (entries.hasMoreElements()) {
            
            ZipEntry entry = entries.nextElement();
            
            // Si es un fichero
            if (!entry.isDirectory()) {
                final String name = entry.getName();
                
                // Y es HTML
                if (name.endsWith(".html") || name.endsWith(".htm")) {
                    
                    // Leer su contenido
                    String html = this.getDocumentText(zip.getInputStream(entry));
                    
                    // Parsearlo
                    String text = textParser.parse(html);
                    
                    // Añadirlo al índice
                    Document doc = new Document();
                    
                    // Nombre (almacenar)
                    Field nameField = new Field("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
                    doc.add(nameField);
                    
                    // Contenido (no almacenar)
                    Field textField = new Field("content", text, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
                    doc.add(textField);
                    
                    writer.addDocument(doc);
                }
            }
        }
        writer.close();

       
    } catch (Exception e) {
        System.out.println("No se pudo acceder a la colección \"" + inputCollectionPath + "\"");
    }
}
 
开发者ID:garnachod,项目名称:mineria2,代码行数:64,代码来源:LuceneIndexing.java


注:本文中的org.apache.lucene.index.IndexWriterConfig.setRAMBufferSizeMB方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。