本文整理汇总了Java中org.apache.lucene.index.TermsEnum类的典型用法代码示例。如果您正苦于以下问题:Java TermsEnum类的具体用法?Java TermsEnum怎么用?Java TermsEnum使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TermsEnum类属于org.apache.lucene.index包,在下文中一共展示了TermsEnum类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: DfsOnlyRequest
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
super(indices);
// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
}
}
// wrap a search request object
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
示例2: buildFromTerms
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associated with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
示例3: filter
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
if (iterator == null) {
return null;
}
int docCount = terms.getDocCount();
if (docCount == -1) {
docCount = reader.maxDoc();
}
if (docCount >= minSegmentSize) {
final int minFreq = minFrequency > 1.0
? (int) minFrequency
: (int)(docCount * minFrequency);
final int maxFreq = maxFrequency > 1.0
? (int) maxFrequency
: (int)(docCount * maxFrequency);
if (minFreq > 1 || maxFreq < docCount) {
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
}
}
return iterator;
}
示例4: writeTermWithDocsAndPos
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
boolean offsets, boolean payloads) throws IOException {
docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL);
// for each term (iterator next) in this field (field)
// iterate over the docs (should only be one)
int nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
final int freq = docsAndPosEnum.freq();
writeFreq(freq);
for (int j = 0; j < freq; j++) {
int curPos = docsAndPosEnum.nextPosition();
if (positions) {
writePosition(curPos);
}
if (offsets) {
writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset());
}
if (payloads) {
writePayload(docsAndPosEnum.getPayload());
}
}
nextDoc = docsAndPosEnum.nextDoc();
assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
return docsAndPosEnum;
}
示例5: buildTerm
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
示例6: testNRTSearchOnClosedWriter
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public void testNRTSearchOnClosedWriter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
DirectoryReader reader = DirectoryReader.open(indexWriter);
for (int i = 0; i < 100; i++) {
Document document = new Document();
TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
field.setBoost(i);
document.add(field);
indexWriter.addDocument(document);
}
reader = refreshReader(reader);
indexWriter.close();
TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
termDocs.next();
}
示例7: UnionDocsAndPositionsEnum
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
for (int i = 0; i < terms.length; i++) {
final Term term = terms[i];
TermState termState = termContexts.get(term).get(context.ord);
if (termState == null) {
// Term doesn't exist in reader
continue;
}
termsEnum.seekExact(term.bytes(), termState);
DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
if (postings == null) {
// term does exist, but has no positions
throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
}
cost += postings.cost();
docsEnums.add(postings);
}
_queue = new DocsQueue(docsEnums);
_posList = new IntQueue();
}
示例8: TermRangeTermsEnum
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
* Enumerates all terms greater/equal than <code>lowerTerm</code>
* but less/equal than <code>upperTerm</code>.
*
* If an endpoint is null, it is said to be "open". Either or both
* endpoints may be open. Open endpoints may not be exclusive
* (you can't select all but the first or last term without
* explicitly specifying the term to exclude.)
*
* @param tenum
* TermsEnum to filter
* @param lowerTerm
* The term text at the lower end of the range
* @param upperTerm
* The term text at the upper end of the range
* @param includeLower
* If true, the <code>lowerTerm</code> is included in the range.
* @param includeUpper
* If true, the <code>upperTerm</code> is included in the range.
*/
public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm,
boolean includeLower, boolean includeUpper) {
super(tenum);
// do a little bit of normalization...
// open ended range queries should always be inclusive.
if (lowerTerm == null) {
this.lowerBytesRef = new BytesRef();
this.includeLower = true;
} else {
this.lowerBytesRef = lowerTerm;
this.includeLower = includeLower;
}
if (upperTerm == null) {
this.includeUpper = true;
upperBytesRef = null;
} else {
this.includeUpper = includeUpper;
upperBytesRef = upperTerm;
}
setInitialSeekTerm(lowerBytesRef);
termComp = getComparator();
}
示例9: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例10: getTermsEnum
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
* with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
switch(type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return terms.iterator(null);
case SINGLE:
return new SingleTermsEnum(terms.iterator(null), term);
case PREFIX:
// TODO: this is very likely faster than .intersect,
// but we should test and maybe cutover
return new PrefixTermsEnum(terms.iterator(null), term);
case NORMAL:
return terms.intersect(this, null);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
示例11: visitMatchingTerms
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
}
}
示例12: createCandidateQuery
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
Query createCandidateQuery(IndexReader indexReader) throws IOException {
List<BytesRef> extractedTerms = new ArrayList<>();
LeafReader reader = indexReader.leaves().get(0).reader();
Fields fields = reader.fields();
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(builder.toBytesRef());
}
}
Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));
return new BooleanQuery.Builder()
.add(extractionSuccess, Occur.SHOULD)
.add(extractionFailure, Occur.SHOULD)
.build();
}
示例13: build
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
* Returns a DocIdSet per segments containing the matching docs for the specified slice.
*/
private DocIdSet build(LeafReader reader) throws IOException {
final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
final Terms terms = reader.terms(getField());
final TermsEnum te = terms.iterator();
PostingsEnum docsEnum = null;
for (BytesRef term = te.next(); term != null; term = te.next()) {
int hashCode = term.hashCode();
if (contains(hashCode)) {
docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
builder.add(docsEnum);
}
}
return builder.build();
}
示例14: RamAccountingTermsEnum
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
String fieldName) {
super(termsEnum);
this.breaker = breaker;
this.termsEnum = termsEnum;
this.estimator = estimator;
this.fieldName = fieldName;
this.totalBytes = 0;
this.flushBuffer = 0;
}
示例15: wrapGeoPointTerms
import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
* A {@link TermsEnum} that iterates only highest resolution geo prefix coded terms.
*
* @see #buildFromTerms(TermsEnum)
*/
public static TermsEnum wrapGeoPointTerms(TermsEnum termsEnum) {
return new FilteredTermsEnum(termsEnum, false) {
@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
// accept only the max resolution terms
// todo is this necessary?
return GeoPointField.getPrefixCodedShift(term) == GeoPointField.PRECISION_STEP * 4 ?
AcceptStatus.YES : AcceptStatus.END;
}
};
}