当前位置: 首页>>代码示例>>Java>>正文


Java AtomicReader.getLiveDocs方法代码示例

本文整理汇总了Java中org.apache.lucene.index.AtomicReader.getLiveDocs方法的典型用法代码示例。如果您正苦于以下问题:Java AtomicReader.getLiveDocs方法的具体用法?Java AtomicReader.getLiveDocs怎么用?Java AtomicReader.getLiveDocs使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.AtomicReader的用法示例。


在下文中一共展示了AtomicReader.getLiveDocs方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: merge

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
/** Merges in the stored fields from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument()},
 *  {@link #writeField(FieldInfo, IndexableField)}, and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (AtomicReader reader : mergeState.readers) {
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();
    for (int i = 0; i < maxDoc; i++) {
      if (liveDocs != null && !liveDocs.get(i)) {
        // skip deleted docs
        continue;
      }
      // TODO: this could be more efficient using
      // FieldVisitor instead of loading/writing entire
      // doc; ie we just have to renumber the field number
      // on the fly?
      // NOTE: it's very important to first assign to doc then pass it to
      // fieldsWriter.addDocument; see LUCENE-1282
      Document doc = reader.document(i);
      addDocument(doc, mergeState.fieldInfos);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:33,代码来源:StoredFieldsWriter.java

示例2: merge

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
/** Merges in the term vectors from the readers in 
 *  <code>mergeState</code>. The default implementation skips
 *  over deleted documents, and uses {@link #startDocument(int)},
 *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
 *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
 *  and {@link #finish(FieldInfos, int)},
 *  returning the number of documents that were written.
 *  Implementations can override this method for more sophisticated
 *  merging (bulk-byte copying, etc). */
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    for (int docID = 0; docID < maxDoc; docID++) {
      if (liveDocs != null && !liveDocs.get(docID)) {
        // skip deleted docs
        continue;
      }
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docID);
      addAllDocVectors(vectors, mergeState);
      docCount++;
      mergeState.checkAbort.work(300);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:33,代码来源:TermVectorsWriter.java

示例3: getDeletes

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
private PackedLongValues getDeletes(List<AtomicReader> readers) {
  PackedLongValues.Builder deletes = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  int deleteCount = 0;
  for (AtomicReader reader : readers) {
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();
    for (int i = 0; i < maxDoc; ++i) {
      if (liveDocs != null && !liveDocs.get(i)) {
        ++deleteCount;
      } else {
        deletes.add(deleteCount);
      }
    }
  }
  return deletes.build();
}
 
开发者ID:europeana,项目名称:search,代码行数:17,代码来源:SortingMergePolicy.java

示例4: merge

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
@Override
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  // Used for bulk-reading raw bytes for stored fields
  int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
  int idx = 0;
  
  for (AtomicReader reader : mergeState.readers) {
    final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
    Lucene40StoredFieldsReader matchingFieldsReader = null;
    if (matchingSegmentReader != null) {
      final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
      // we can only bulk-copy if the matching reader is also a Lucene40FieldsReader
      if (fieldsReader != null && fieldsReader instanceof Lucene40StoredFieldsReader) {
        matchingFieldsReader = (Lucene40StoredFieldsReader) fieldsReader;
      }
    }
  
    if (reader.getLiveDocs() != null) {
      docCount += copyFieldsWithDeletions(mergeState,
                                          reader, matchingFieldsReader, rawDocLengths);
    } else {
      docCount += copyFieldsNoDeletions(mergeState,
                                        reader, matchingFieldsReader, rawDocLengths);
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:30,代码来源:Lucene40StoredFieldsWriter.java

示例5: merge

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
@Override
public final int merge(MergeState mergeState) throws IOException {
  // Used for bulk-reading raw bytes for term vectors
  int rawDocLengths[] = new int[MAX_RAW_MERGE_DOCS];
  int rawDocLengths2[] = new int[MAX_RAW_MERGE_DOCS];

  int idx = 0;
  int numDocs = 0;
  for (int i = 0; i < mergeState.readers.size(); i++) {
    final AtomicReader reader = mergeState.readers.get(i);

    final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
    Lucene40TermVectorsReader matchingVectorsReader = null;
    if (matchingSegmentReader != null) {
      TermVectorsReader vectorsReader = matchingSegmentReader.getTermVectorsReader();

      if (vectorsReader != null && vectorsReader instanceof Lucene40TermVectorsReader) {
          matchingVectorsReader = (Lucene40TermVectorsReader) vectorsReader;
      }
    }
    if (reader.getLiveDocs() != null) {
      numDocs += copyVectorsWithDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
    } else {
      numDocs += copyVectorsNoDeletions(mergeState, matchingVectorsReader, reader, rawDocLengths, rawDocLengths2);
    }
  }
  finish(mergeState.fieldInfos, numDocs);
  return numDocs;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:30,代码来源:Lucene40TermVectorsWriter.java

示例6: copyVectorsWithDeletions

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
private int copyVectorsWithDeletions(MergeState mergeState,
                                      final Lucene40TermVectorsReader matchingVectorsReader,
                                      final AtomicReader reader,
                                      int rawDocLengths[],
                                      int rawDocLengths2[])
        throws IOException {
  final int maxDoc = reader.maxDoc();
  final Bits liveDocs = reader.getLiveDocs();
  int totalNumDocs = 0;
  if (matchingVectorsReader != null) {
    // We can bulk-copy because the fieldInfos are "congruent"
    for (int docNum = 0; docNum < maxDoc;) {
      if (!liveDocs.get(docNum)) {
        // skip deleted docs
        ++docNum;
        continue;
      }
      // We can optimize this case (doing a bulk byte copy) since the field
      // numbers are identical
      int start = docNum, numDocs = 0;
      do {
        docNum++;
        numDocs++;
        if (docNum >= maxDoc) break;
        if (!liveDocs.get(docNum)) {
          docNum++;
          break;
        }
      } while(numDocs < MAX_RAW_MERGE_DOCS);
      
      matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs);
      addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs);
      totalNumDocs += numDocs;
      mergeState.checkAbort.work(300 * numDocs);
    }
  } else {
    for (int docNum = 0; docNum < maxDoc; docNum++) {
      if (!liveDocs.get(docNum)) {
        // skip deleted docs
        continue;
      }
      
      // NOTE: it's very important to first assign to vectors then pass it to
      // termVectorsWriter.addAllDocVectors; see LUCENE-1282
      Fields vectors = reader.getTermVectors(docNum);
      addAllDocVectors(vectors, mergeState);
      totalNumDocs++;
      mergeState.checkAbort.work(300);
    }
  }
  return totalNumDocs;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:53,代码来源:Lucene40TermVectorsWriter.java

示例7: merge

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
@Override
public int merge(MergeState mergeState) throws IOException {
  int docCount = 0;
  int idx = 0;

  for (AtomicReader reader : mergeState.readers) {
    final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
    CompressingStoredFieldsReader matchingFieldsReader = null;
    if (matchingSegmentReader != null) {
      final StoredFieldsReader fieldsReader = matchingSegmentReader.getFieldsReader();
      // we can only bulk-copy if the matching reader is also a CompressingStoredFieldsReader
      if (fieldsReader != null && fieldsReader instanceof CompressingStoredFieldsReader) {
        matchingFieldsReader = (CompressingStoredFieldsReader) fieldsReader;
      }
    }

    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();

    if (matchingFieldsReader == null
        || matchingFieldsReader.getVersion() != VERSION_CURRENT // means reader version is not the same as the writer version
        || matchingFieldsReader.getCompressionMode() != compressionMode
        || matchingFieldsReader.getChunkSize() != chunkSize) { // the way data is decompressed depends on the chunk size
      // naive merge...
      for (int i = nextLiveDoc(0, liveDocs, maxDoc); i < maxDoc; i = nextLiveDoc(i + 1, liveDocs, maxDoc)) {
        Document doc = reader.document(i);
        addDocument(doc, mergeState.fieldInfos);
        ++docCount;
        mergeState.checkAbort.work(300);
      }
    } else {
      int docID = nextLiveDoc(0, liveDocs, maxDoc);
      if (docID < maxDoc) {
        // not all docs were deleted
        final ChunkIterator it = matchingFieldsReader.chunkIterator(docID);
        int[] startOffsets = new int[0];
        do {
          // go to the next chunk that contains docID
          it.next(docID);
          // transform lengths into offsets
          if (startOffsets.length < it.chunkDocs) {
            startOffsets = new int[ArrayUtil.oversize(it.chunkDocs, 4)];
          }
          for (int i = 1; i < it.chunkDocs; ++i) {
            startOffsets[i] = startOffsets[i - 1] + it.lengths[i - 1];
          }

          // decompress
          it.decompress();
          if (startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] != it.bytes.length) {
            throw new CorruptIndexException("Corrupted: expected chunk size=" + startOffsets[it.chunkDocs - 1] + it.lengths[it.chunkDocs - 1] + ", got " + it.bytes.length);
          }
          // copy non-deleted docs
          for (; docID < it.docBase + it.chunkDocs; docID = nextLiveDoc(docID + 1, liveDocs, maxDoc)) {
            final int diff = docID - it.docBase;
            startDocument();
            bufferedDocs.writeBytes(it.bytes.bytes, it.bytes.offset + startOffsets[diff], it.lengths[diff]);
            numStoredFieldsInDoc = it.numStoredFields[diff];
            finishDocument();
            ++docCount;
            mergeState.checkAbort.work(300);
          }
        } while (docID < maxDoc);

        it.checkIntegrity();
      }
    }
  }
  finish(mergeState.fieldInfos, docCount);
  return docCount;
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:72,代码来源:CompressingStoredFieldsWriter.java

示例8: split

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
FixedBitSet[] split(AtomicReaderContext readerContext) throws IOException {
  AtomicReader reader = readerContext.reader();
  FixedBitSet[] docSets = new FixedBitSet[numPieces];
  for (int i=0; i<docSets.length; i++) {
    docSets[i] = new FixedBitSet(reader.maxDoc());
  }
  Bits liveDocs = reader.getLiveDocs();

  Fields fields = reader.fields();
  Terms terms = fields==null ? null : fields.terms(field.getName());
  TermsEnum termsEnum = terms==null ? null : terms.iterator(null);
  if (termsEnum == null) return docSets;

  BytesRef term = null;
  DocsEnum docsEnum = null;

  CharsRef idRef = new CharsRef();
  for (;;) {
    term = termsEnum.next();
    if (term == null) break;

    // figure out the hash for the term

    // FUTURE: if conversion to strings costs too much, we could
    // specialize and use the hash function that can work over bytes.
    field.getType().indexedToReadable(term, idRef);
    String idString = idRef.toString();

    if (splitKey != null) {
      // todo have composite routers support these kind of things instead
      String part1 = getRouteKey(idString);
      if (part1 == null)
        continue;
      if (!splitKey.equals(part1))  {
        continue;
      }
    }

    int hash = 0;
    if (hashRouter != null) {
      hash = hashRouter.sliceHash(idString, null, null, null);
    }

    docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
    for (;;) {
      int doc = docsEnum.nextDoc();
      if (doc == DocIdSetIterator.NO_MORE_DOCS) break;
      if (ranges == null) {
        docSets[currPartition].set(doc);
        currPartition = (currPartition + 1) % numPieces;
      } else  {
        for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
          if (rangesArr[i].includes(hash)) {
            docSets[i].set(doc);
          }
        }
      }
    }
  }

  return docSets;
}
 
开发者ID:europeana,项目名称:search,代码行数:63,代码来源:SolrIndexSplitter.java

示例9: getDocSet

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
/**
 * Returns the set of document ids matching all queries.
 * This method is cache-aware and attempts to retrieve the answer from the cache if possible.
 * If the answer was not cached, it may have been inserted into the cache as a result of this call.
 * This method can handle negative queries.
 * <p>
 * The DocSet returned should <b>not</b> be modified.
 */
public DocSet getDocSet(List<Query> queries) throws IOException {

  if(queries != null) {
    for(Query q : queries) {
      if(q instanceof ScoreFilter) {
        return getDocSetScore(queries);
      }
    }
  }

  ProcessedFilter pf = getProcessedFilter(null, queries);
  if (pf.answer != null) return pf.answer;


  DocSetCollector setCollector = new DocSetCollector(maxDoc()>>6, maxDoc());
  Collector collector = setCollector;
  if (pf.postFilter != null) {
    pf.postFilter.setLastDelegate(collector);
    collector = pf.postFilter;
  }

  for (final AtomicReaderContext leaf : leafContexts) {
    final AtomicReader reader = leaf.reader();
    final Bits liveDocs = reader.getLiveDocs();   // TODO: the filter may already only have liveDocs...
    DocIdSet idSet = null;
    if (pf.filter != null) {
      idSet = pf.filter.getDocIdSet(leaf, liveDocs);
      if (idSet == null) continue;
    }
    DocIdSetIterator idIter = null;
    if (idSet != null) {
      idIter = idSet.iterator();
      if (idIter == null) continue;
    }

    collector.setNextReader(leaf);
    int max = reader.maxDoc();

    if (idIter == null) {
      for (int docid = 0; docid<max; docid++) {
        if (liveDocs != null && !liveDocs.get(docid)) continue;
        collector.collect(docid);
      }
    } else {
      for (int docid = -1; (docid = idIter.advance(docid+1)) < max; ) {
        collector.collect(docid);
      }
    }
  }

  if(collector instanceof DelegatingCollector) {
    ((DelegatingCollector) collector).finish();
  }

  return setCollector.getDocSet();
}
 
开发者ID:europeana,项目名称:search,代码行数:65,代码来源:SolrIndexSearcher.java

示例10: getTopFilter

import org.apache.lucene.index.AtomicReader; //导入方法依赖的package包/类
@Override
public Filter getTopFilter() {
  final FixedBitSet bs = getBits();

  return new Filter() {
    @Override
    public DocIdSet getDocIdSet(final AtomicReaderContext context, Bits acceptDocs) {
      AtomicReader reader = context.reader();
      // all Solr DocSets that are used as filters only include live docs
      final Bits acceptDocs2 = acceptDocs == null ? null : (reader.getLiveDocs() == acceptDocs ? null : acceptDocs);

      if (context.isTopLevel) {
        return BitsFilteredDocIdSet.wrap(bs, acceptDocs);
      }

      final int base = context.docBase;
      final int maxDoc = reader.maxDoc();
      final int max = base + maxDoc;   // one past the max doc in this segment.

      return BitsFilteredDocIdSet.wrap(new DocIdSet() {
        @Override
        public DocIdSetIterator iterator() {
          return new DocIdSetIterator() {
            int pos=base-1;
            int adjustedDoc=-1;

            @Override
            public int docID() {
              return adjustedDoc;
            }

            @Override
            public int nextDoc() {
              pos = bs.nextSetBit(pos+1);
              return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
            }

            @Override
            public int advance(int target) {
              if (target==NO_MORE_DOCS) return adjustedDoc=NO_MORE_DOCS;
              pos = bs.nextSetBit(target+base);
              return adjustedDoc = (pos>=0 && pos<max) ? pos-base : NO_MORE_DOCS;
            }

            @Override
            public long cost() {
              return bs.length();
            }
          };
        }

        @Override
        public boolean isCacheable() {
          return true;
        }

        @Override
        public long ramBytesUsed() {
          return bs.ramBytesUsed();
        }

        @Override
        public Bits bits() {
          // sparse filters should not use random access
          return null;
        }

      }, acceptDocs2);
    }
  };
}
 
开发者ID:europeana,项目名称:search,代码行数:72,代码来源:DocSetBase.java


注:本文中的org.apache.lucene.index.AtomicReader.getLiveDocs方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。