本文整理汇总了Java中org.apache.lucene.index.PostingsEnum类的典型用法代码示例。如果您正苦于以下问题:Java PostingsEnum类的具体用法?Java PostingsEnum怎么用?Java PostingsEnum使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PostingsEnum类属于org.apache.lucene.index包,在下文中一共展示了PostingsEnum类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildFromTerms
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associated with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
示例2: FreqTermsEnum
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public FreqTermsEnum(IndexReader reader, String field, boolean needDocFreq, boolean needTotalTermFreq, @Nullable Query filter, BigArrays bigArrays) throws IOException {
super(reader, field, needTotalTermFreq ? PostingsEnum.FREQS : PostingsEnum.NONE, filter);
this.bigArrays = bigArrays;
this.needDocFreqs = needDocFreq;
this.needTotalTermFreqs = needTotalTermFreq;
if (needDocFreq) {
termDocFreqs = bigArrays.newIntArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
} else {
termDocFreqs = null;
}
if (needTotalTermFreq) {
termsTotalFreqs = bigArrays.newLongArray(INITIAL_NUM_TERM_FREQS_CACHED, false);
} else {
termsTotalFreqs = null;
}
cachedTermOrds = new BytesRefHash(INITIAL_NUM_TERM_FREQS_CACHED, bigArrays);
}
示例3: writeTermWithDocsAndPos
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
boolean offsets, boolean payloads) throws IOException {
docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL);
// for each term (iterator next) in this field (field)
// iterate over the docs (should only be one)
int nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
final int freq = docsAndPosEnum.freq();
writeFreq(freq);
for (int j = 0; j < freq; j++) {
int curPos = docsAndPosEnum.nextPosition();
if (positions) {
writePosition(curPos);
}
if (offsets) {
writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset());
}
if (payloads) {
writePayload(docsAndPosEnum.getPayload());
}
}
nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
return docsAndPosEnum;
}
示例4: buildTerm
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
示例5: initValues
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
for (int j = 0; j < termFreq; j++) {
int nextPos = posEnum.nextPosition();
if (curTerms.hasPositions()) {
currentPositions[j] = nextPos;
}
if (curTerms.hasOffsets()) {
currentStartOffset[j] = posEnum.startOffset();
currentEndOffset[j] = posEnum.endOffset();
}
if (curTerms.hasPayloads()) {
BytesRef curPayload = posEnum.getPayload();
if (curPayload != null) {
currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
} else {
currentPayloads[j] = null;
}
}
}
}
示例6: PostingsAndFreq
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public PostingsAndFreq(PostingsEnum postings, int position, Term... terms) {
this.postings = postings;
this.position = position;
nTerms = terms==null ? 0 : terms.length;
if (nTerms>0) {
if (terms.length==1) {
this.terms = terms;
} else {
Term[] terms2 = new Term[terms.length];
System.arraycopy(terms, 0, terms2, 0, terms.length);
Arrays.sort(terms2);
this.terms = terms2;
}
} else {
this.terms = null;
}
}
示例7: prepareBackground
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
* Creates the TermsEnum (if not already created) and must be called before any calls to getBackgroundFrequency
* @param context The aggregation context
* @return The number of documents in the index (after an optional filter might have been applied)
*/
public long prepareBackground(AggregationContext context) {
if (termsEnum != null) {
// already prepared - return
return termsEnum.getNumDocs();
}
SearchContext searchContext = context.searchContext();
IndexReader reader = searchContext.searcher().getIndexReader();
try {
if (numberOfAggregatorsCreated == 1) {
// Setup a termsEnum for sole use by one aggregator
termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
} else {
// When we have > 1 agg we have possibility of duplicate term frequency lookups
// and so use a TermsEnum that caches results of all term lookups
termsEnum = new FreqTermsEnum(reader, indexedFieldName, true, false, filter, searchContext.bigArrays());
}
} catch (IOException e) {
throw new ElasticsearchException("failed to build terms enumeration", e);
}
return termsEnum.getNumDocs();
}
示例8: buildFromTerms
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associted with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
示例9: buildEntryValue
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private NamedList<Object> buildEntryValue(long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException {
NamedList<Object> entry = new NamedList<>();
entry.add("count", count);
int i = -1;
for (Entry<LeafReader, Bits> e : leaves) {
PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS);
Bits liveDocs = e.getValue();
while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
if (!liveDocs.get(postings.docID())) {
continue;
}
i++;
NamedList<Object> documentEntry = new NamedList<>();
entry.add("doc" + i, documentEntry);
for (int j = 0; j < postings.freq(); j++) {
postings.nextPosition();
String extra = postings.getPayload().utf8ToString();
documentEntry.add("position" + j, extra);
}
}
}
return entry;
}
示例10: getEntropy
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
* Gets the 1 - entropy (i.e. 1+ plogp) of a term,
* a function that favors terms that are focally distributed
* We use the definition of log-entropy weighting provided in
* Martin and Berry (2007):
* Entropy = 1 + sum ((Pij log2(Pij)) / log2(n))
* where Pij = frequency of term i in doc j / global frequency of term i
* n = number of documents in collection
* @param term whose entropy you want
* Thanks to Vidya Vasuki for adding the hash table to
* eliminate redundant calculation
*/
private float getEntropy(Term term) {
if (termEntropy.containsKey(term))
return termEntropy.get(term);
int gf = getGlobalTermFreq(term);
double entropy = 0;
try {
PostingsEnum docsEnum = this.getDocsForTerm(term);
while ((docsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
double p = docsEnum.freq(); //frequency in this document
p = p / gf; //frequency across all documents
entropy += p * (Math.log(p) / Math.log(2)); //sum of Plog(P)
}
int n = this.getNumDocs();
double log2n = Math.log(n) / Math.log(2);
entropy = entropy / log2n;
} catch (IOException e) {
logger.info("Couldn't get term entropy for term " + term.text());
}
termEntropy.put(term, 1 + (float) entropy);
return (float) (1 + entropy);
}
示例11: reconstructNoPositions
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
public String reconstructNoPositions(TermsEnum te, int docid, Bits liveDocs) throws IOException{
List<String> textList = new ArrayList<String>();
BytesRef text;
PostingsEnum postings = null;
while ((text = te.next()) != null) {
postings = te.postings(postings, PostingsEnum.FREQS);
int iterDoc = postings.advance(docid);
if (iterDoc == docid) {
textList.add(text.utf8ToString());
}
}
StringBuilder buf = new StringBuilder();
for (String s : textList) {
buf.append(s+" ");
}
return buf.toString();
}
示例12: postings
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
@Override
public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
if ((flags & PostingsEnum.POSITIONS) != 0) {
thrower.maybeThrow(Flags.DocsAndPositionsEnum);
} else {
thrower.maybeThrow(Flags.DocsEnum);
}
return super.postings(reuse, flags);
}
示例13: getTermsEnum
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private FilterableTermsEnum getTermsEnum(String field) throws IOException {
if (termsEnum != null) {
return termsEnum;
}
IndexReader reader = context.searcher().getIndexReader();
if (numberOfAggregatorsCreated > 1) {
termsEnum = new FreqTermsEnum(reader, field, true, false, filter, context.bigArrays());
} else {
termsEnum = new FilterableTermsEnum(reader, indexedFieldName, PostingsEnum.NONE, filter);
}
return termsEnum;
}
示例14: build
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
/**
* Returns a DocIdSet per segments containing the matching docs for the specified slice.
*/
private DocIdSet build(LeafReader reader) throws IOException {
final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
final Terms terms = reader.terms(getField());
final TermsEnum te = terms.iterator();
PostingsEnum docsEnum = null;
for (BytesRef term = te.next(); term != null; term = te.next()) {
int hashCode = term.hashCode();
if (contains(hashCode)) {
docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
builder.add(docsEnum);
}
}
return builder.build();
}
示例15: convertToLuceneFlags
import org.apache.lucene.index.PostingsEnum; //导入依赖的package包/类
private int convertToLuceneFlags(int flags) {
int lucenePositionsFlags = PostingsEnum.NONE;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
return lucenePositionsFlags;
}