当前位置: 首页>>代码示例>>Java>>正文


Java PostingsEnum类代码示例

本文整理汇总了Java中org.apache.lucene.index.PostingsEnum的典型用法代码示例。如果您正苦于以下问题:Java PostingsEnum类的具体用法?Java PostingsEnum怎么用?Java PostingsEnum使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


PostingsEnum类属于org.apache.lucene.index包,在下文中一共展示了PostingsEnum类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildFromTerms

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associated with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:OrdinalsBuilder.java

示例2: FreqTermsEnum

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public FreqTermsEnum(IndexReader reader, String field, boolean needDocFreq, boolean needTotalTermFreq, @Nullable Query filter, BigArrays bigArrays) throws IOException {
    super(reader, field, needTotalTermFreq ? PostingsEnum.FREQS : PostingsEnum.NONE, filter);
    this.bigArrays = bigArrays;
    this.needDocFreqs = needDocFreq;
    this.needTotalTermFreqs = needTotalTermFreq;
    if (needDocFreq) {
        termDocFreqs = bigArrays.newIntArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
    } else {
        termDocFreqs = null;
    }
    if (needTotalTermFreq) {
        termsTotalFreqs = bigArrays.newLongArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
    } else {
        termsTotalFreqs = null;
    }
    cachedTermOrds = new BytesRefHash(INITIAL_NUM_TERM_FREQS_CACHED, bigArrays);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:18,代码来源:FreqTermsEnum.java

示例3: writeTermWithDocsAndPos

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
                                                     boolean offsets, boolean payloads) throws IOException {
    docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL);
    // for each term (iterator next) in this field (field)
    // iterate over the docs (should only be one)
    int nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
    final int freq = docsAndPosEnum.freq();
    writeFreq(freq);
    for (int j = 0; j < freq; j++) {
        int curPos = docsAndPosEnum.nextPosition();
        if (positions) {
            writePosition(curPos);
        }
        if (offsets) {
            writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset());
        }
        if (payloads) {
            writePayload(docsAndPosEnum.getPayload());
        }
    }
    nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
    return docsAndPosEnum;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:TermVectorsWriter.java

示例4: buildTerm

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:TermVectorsResponse.java

示例5: initValues

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:21,代码来源:TermVectorsResponse.java

示例6: PostingsAndFreq

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public PostingsAndFreq(PostingsEnum postings, int position, Term... terms) {
  this.postings = postings;
  this.position = position;
  nTerms = terms==null ? 0 : terms.length;
  if (nTerms>0) {
    if (terms.length==1) {
      this.terms = terms;
    } else {
      Term[] terms2 = new Term[terms.length];
      System.arraycopy(terms, 0, terms2, 0, terms.length);
      Arrays.sort(terms2);
      this.terms = terms2;
    }
  } else {
    this.terms = null;
  }
}
 
开发者ID:sing1ee,项目名称:lucene-custom-query,代码行数:18,代码来源:PostingsAndFreq.java

示例7: prepareBackground

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
 * Creates the TermsEnum (if not already created) and must be called before any calls to getBackgroundFrequency
 * @param context The aggregation context 
 * @return The number of documents in the index (after an optional filter might have been applied)
 */
public long prepareBackground(AggregationContext context) {
    if (termsEnum != null) {
        // already prepared - return 
        return termsEnum.getNumDocs();
    }
    SearchContext searchContext = context.searchContext();
    IndexReader reader = searchContext.searcher().getIndexReader();
    try {
        if (numberOfAggregatorsCreated == 1) {
            // Setup a termsEnum for sole use by one aggregator
            termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
        } else {
            // When we have > 1 agg we have possibility of duplicate term frequency lookups 
            // and so use a TermsEnum that caches results of all term lookups
            termsEnum = new FreqTermsEnum(reader, indexedFieldName, true, false, filter, searchContext.bigArrays());
        }
    } catch (IOException e) {
        throw new ElasticsearchException("failed to build terms enumeration", e);
    }
    return termsEnum.getNumDocs();
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:27,代码来源:SignificantTermsAggregatorFactory.java

示例8: buildFromTerms

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associted with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:35,代码来源:OrdinalsBuilder.java

示例9: buildEntryValue

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private NamedList<Object> buildEntryValue(long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException {
  NamedList<Object> entry = new NamedList<>();
  entry.add("count", count);
  int i = -1;
  for (Entry<LeafReader, Bits> e : leaves) {
    PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS);
    Bits liveDocs = e.getValue();
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (!liveDocs.get(postings.docID())) {
        continue;
      }
      i++;
      NamedList<Object> documentEntry = new NamedList<>();
      entry.add("doc" + i, documentEntry);
      for (int j = 0; j < postings.freq(); j++) {
        postings.nextPosition();
        String extra = postings.getPayload().utf8ToString();
        documentEntry.add("position" + j, extra);
      }
    }
  }
  return entry;
}
 
开发者ID:upenn-libraries,项目名称:solrplugins,代码行数:24,代码来源:ProofOfConceptPayloadHandler.java

示例10: getEntropy

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
 * Gets the 1 - entropy (i.e. 1+ plogp) of a term,
 * a function that favors terms that are focally distributed
 * We use the definition of log-entropy weighting provided in
 * Martin and Berry (2007):
 * Entropy = 1 + sum ((Pij log2(Pij)) /  log2(n))
 * where Pij = frequency of term i in doc j / global frequency of term i
 * 		 n	 = number of documents in collection
 * @param term whose entropy you want
 * Thanks to Vidya Vasuki for adding the hash table to
 * eliminate redundant calculation
 */
private float getEntropy(Term term) {
  if (termEntropy.containsKey(term))
    return termEntropy.get(term);
  int gf = getGlobalTermFreq(term);
  double entropy = 0;
  try {
    PostingsEnum docsEnum = this.getDocsForTerm(term);
    while ((docsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
      double p = docsEnum.freq(); //frequency in this document
      p = p / gf;    //frequency across all documents
      entropy += p * (Math.log(p) / Math.log(2)); //sum of Plog(P)
    }
    int n = this.getNumDocs();
    double log2n = Math.log(n) / Math.log(2);
    entropy = entropy / log2n;
  } catch (IOException e) {
    logger.info("Couldn't get term entropy for term " + term.text());
  }
  termEntropy.put(term, 1 + (float) entropy);
  return (float) (1 + entropy);
}
 
开发者ID:semanticvectors,项目名称:semanticvectors,代码行数:34,代码来源:LuceneUtils.java

示例11: reconstructNoPositions

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{
  List<String> textList = new ArrayList<String>();
  BytesRef text;
  PostingsEnum postings = null;
  while ((text = te.next()) != null) {
    postings = te.postings(postings, PostingsEnum.FREQS);
    int iterDoc = postings.advance(docid);
    if (iterDoc == docid) {
      textList.add(text.utf8ToString());
    }
  }
  StringBuilder buf = new StringBuilder();
  for (String s : textList) {
    buf.append(s+" ");
  }
  return buf.toString();
}
 
开发者ID:javasoze,项目名称:clue,代码行数:18,代码来源:ReconstructCommand.java

示例12: postings

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
    if ((flags & PostingsEnum.POSITIONS) != 0) {
        thrower.maybeThrow(Flags.DocsAndPositionsEnum);
    } else {
        thrower.maybeThrow(Flags.DocsEnum);
    }
    return super.postings(reuse, flags);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:10,代码来源:ThrowingLeafReaderWrapper.java

示例13: getTermsEnum

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private FilterableTermsEnum getTermsEnum(String field) throws IOException {
    if (termsEnum != null) {
        return termsEnum;
    }
    IndexReader reader = context.searcher().getIndexReader();
    if (numberOfAggregatorsCreated > 1) {
        termsEnum = new FreqTermsEnum(reader, field, true, false, filter, context.bigArrays());
    } else {
        termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
    }
    return termsEnum;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:13,代码来源:SignificantTermsAggregatorFactory.java

示例14: build

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
 * Returns a DocIdSet per segments containing the matching docs for the specified slice.
 */
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:18,代码来源:TermsSliceQuery.java

示例15: convertToLuceneFlags

import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private int convertToLuceneFlags(int flags) {
    int lucenePositionsFlags = PostingsEnum.NONE;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
    lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
    return lucenePositionsFlags;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:9,代码来源:IndexFieldTerm.java


注:本文中的org.apache.lucene.index.PostingsEnum类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。