本文整理汇总了Java中org.apache.lucene.index.Fields.terms方法的典型用法代码示例。如果您正苦于以下问题:Java Fields.terms方法的具体用法?Java Fields.terms怎么用?Java Fields.terms使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.Fields
的用法示例。
在下文中一共展示了Fields.terms方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: DfsOnlyRequest
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
super(indices);
// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
}
}
// wrap a search request object
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
示例2: estimateStringFieldData
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* @return the estimate for loading the entire term set into field data, or 0 if unavailable
*/
public long estimateStringFieldData() {
try {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldName());
Fields fields = reader.fields();
final Terms fieldTerms = fields.terms(getFieldName());
if (fieldTerms instanceof FieldReader) {
final Stats stats = ((FieldReader) fieldTerms).getStats();
long totalTermBytes = stats.totalTermBytes;
if (logger.isTraceEnabled()) {
logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
totalTermBytes, terms.size(), terms.getSumDocFreq());
}
long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
return totalBytes;
}
} catch (Exception e) {
logger.warn("Unable to estimate memory overhead", e);
}
return 0;
}
示例3: getAnyTokenStream
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* A convenience method that tries to first get a TermPositionVector for the
* specified docId, then, falls back to using the passed in
* {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
* This is useful when you already have the document, but would prefer to use
* the vector first.
*
* @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
* and get the vector from
* @param docId The docId to retrieve.
* @param field The field to retrieve on the document
* @param doc The document to fall back on
* @param analyzer The analyzer to use for creating the TokenStream if the
* vector doesn't exist
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
* {@link org.apache.lucene.index.IndexableField} on the
* {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document doc, Analyzer analyzer) throws IOException {
TokenStream ts = null;
Fields vectors = reader.getTermVectors(docId);
if (vectors != null) {
Terms vector = vectors.terms(field);
if (vector != null) {
ts = getTokenStream(vector);
}
}
// No token info stored so fall back to analyzing raw content
if (ts == null) {
ts = getTokenStream(doc, field, analyzer);
}
return ts;
}
示例4: getTokenStreamWithOffsets
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* Returns a {@link TokenStream} with positions and offsets constructed from
* field termvectors. If the field has no termvectors, or positions or offsets
* are not included in the termvector, return null.
* @param reader the {@link IndexReader} to retrieve term vectors from
* @param docId the document to retrieve termvectors for
* @param field the field to retrieve termvectors for
* @return a {@link TokenStream}, or null if positions and offsets are not available
* @throws IOException If there is a low-level I/O error
*/
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
String field) throws IOException {
Fields vectors = reader.getTermVectors(docId);
if (vectors == null) {
return null;
}
Terms vector = vectors.terms(field);
if (vector == null) {
return null;
}
if (!vector.hasPositions() || !vector.hasOffsets()) {
return null;
}
return getTokenStream(vector);
}
示例5: createWeight
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
long sumTotalTermFreq = 0;
for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
Fields fields = readerContext.reader().fields();
if (fields == null) continue;
Terms terms = fields.terms(indexedField);
if (terms == null) continue;
long v = terms.getSumTotalTermFreq();
if (v == -1) {
sumTotalTermFreq = -1;
break;
} else {
sumTotalTermFreq += v;
}
}
final long ttf = sumTotalTermFreq;
context.put(this, new LongDocValues(this) {
@Override
public long longVal(int doc) {
return ttf;
}
});
}
示例6: getFirstMatch
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* Returns the first document number containing the term <code>t</code>
* Returns -1 if no document was found.
* This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if (docs == null) return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例7: getFirstMatch
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例8: getFirstMatch
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* Returns the first document number containing the term <code>t</code> Returns -1 if no
* document was found. This method is primarily intended for clients that want to fetch
* documents using a unique identifier."
*
* @return the first document number containing the term
*/
public int getFirstMatch(Term t) throws IOException {
Fields fields = atomicReader.fields();
if(fields == null)
return -1;
Terms terms = fields.terms(t.field());
if(terms == null)
return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if(!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
if(docs == null)
return -1;
int id = docs.nextDoc();
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例9: getFirstMatch
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
Fields fields = MultiFields.getFields(r);
if (fields == null) return -1;
Terms terms = fields.terms(t.field());
if (terms == null) return -1;
BytesRef termBytes = t.bytes();
final TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(termBytes, false)) {
return -1;
}
DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
int id = docs.nextDoc();
if (id != DocIdSetIterator.NO_MORE_DOCS) {
int next = docs.nextDoc();
assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
}
return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
示例10: getIterableRow
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
IndexReader indexReader = searcher.getIndexReader();
BytesRef rowIdRef = new BytesRef(rowId);
List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
AtomicReader atomicReader = atomicReaderContext.reader();
Fields fields = atomicReader.fields();
if (fields == null) {
continue;
}
Terms terms = fields.terms(BlurConstants.ROW_ID);
if (terms == null) {
continue;
}
TermsEnum termsEnum = terms.iterator(null);
if (!termsEnum.seekExact(rowIdRef, true)) {
continue;
}
// need atomic read as well...
possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
}
if (possibleRowIds.isEmpty()) {
return null;
}
return new IterableRow(rowId, getRecords(possibleRowIds));
}
示例11: createCandidateQuery
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
Query createCandidateQuery(IndexReader indexReader) throws IOException {
List<BytesRef> extractedTerms = new ArrayList<>();
LeafReader reader = indexReader.leaves().get(0).reader();
Fields fields = reader.fields();
for (String field : fields) {
Terms terms = fields.terms(field);
if (terms == null) {
continue;
}
BytesRef fieldBr = new BytesRef(field);
TermsEnum tenum = terms.iterator();
for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
BytesRefBuilder builder = new BytesRefBuilder();
builder.append(fieldBr);
builder.append(FIELD_VALUE_SEPARATOR);
builder.append(term);
extractedTerms.add(builder.toBytesRef());
}
}
Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
// include extractionResultField:failed, because docs with this term have no extractedTermsField
// and otherwise we would fail to return these docs. Docs that failed query term extraction
// always need to be verified by MemoryIndex:
Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));
return new BooleanQuery.Builder()
.add(extractionSuccess, Occur.SHOULD)
.add(extractionFailure, Occur.SHOULD)
.build();
}
示例12: completionStats
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* Returns total in-heap bytes used by all suggesters. This method has CPU cost <code>O(numIndexedFields)</code>.
*
* @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
* separately in the returned {@link CompletionStats}
*/
public static CompletionStats completionStats(IndexReader indexReader, String ... fieldNamePatterns) {
long sizeInBytes = 0;
ObjectLongHashMap<String> completionFields = null;
if (fieldNamePatterns != null && fieldNamePatterns.length > 0) {
completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
}
for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
LeafReader atomicReader = atomicReaderContext.reader();
try {
Fields fields = atomicReader.fields();
for (String fieldName : fields) {
Terms terms = fields.terms(fieldName);
if (terms instanceof CompletionTerms) {
// TODO: currently we load up the suggester for reporting its size
long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
if (fieldNamePatterns != null && fieldNamePatterns.length > 0 && Regex.simpleMatch(fieldNamePatterns, fieldName)) {
completionFields.addTo(fieldName, fstSize);
}
sizeInBytes += fstSize;
}
}
} catch (IOException ioe) {
throw new ElasticsearchException(ioe);
}
}
return new CompletionStats(sizeInBytes, completionFields == null ? null : new FieldMemoryStats(completionFields));
}
示例13: retrieveTerms
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
* Find words for a more-like-this query former.
*
* @param docNum the id of the lucene document from which to find terms
*/
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
Map<String, Int> termFreqMap = new HashMap<>();
for (String fieldName : fieldNames) {
final Fields vectors = ir.getTermVectors(docNum);
final Terms vector;
if (vectors != null) {
vector = vectors.terms(fieldName);
} else {
vector = null;
}
// field does not store term vector info
if (vector == null) {
Document d = ir.document(docNum);
IndexableField fields[] = d.getFields(fieldName);
for (IndexableField field : fields) {
final String stringValue = field.stringValue();
if (stringValue != null) {
addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
}
}
} else {
addTermFrequencies(termFreqMap, vector, fieldName);
}
}
return createQueue(termFreqMap);
}
示例14: buildField
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
String fieldName = fieldIter.next();
builder.startObject(fieldName);
Terms curTerms = theFields.terms(fieldName);
// write field statistics
buildFieldStatistics(builder, curTerms);
builder.startObject(FieldStrings.TERMS);
TermsEnum termIter = curTerms.iterator();
BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
for (int i = 0; i < curTerms.size(); i++) {
buildTerm(builder, spare, curTerms, termIter, boostAtt);
}
builder.endObject();
builder.endObject();
}
示例15: checkBrownFoxTermVector
import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
Terms terms = fields.terms(fieldName);
assertThat(terms.size(), equalTo(8L));
TermsEnum iterator = terms.iterator();
for (int j = 0; j < values.length; j++) {
String string = values[j];
BytesRef next = iterator.next();
assertThat(next, notNullValue());
assertThat("expected " + string, string, equalTo(next.utf8ToString()));
assertThat(next, notNullValue());
// do not test ttf or doc frequency, because here we have many
// shards and do not know how documents are distributed
PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
assertThat(docsAndPositions.nextDoc(), equalTo(0));
assertThat(freq[j], equalTo(docsAndPositions.freq()));
int[] termPos = pos[j];
int[] termStartOffset = startOffset[j];
int[] termEndOffset = endOffset[j];
assertThat(termPos.length, equalTo(freq[j]));
assertThat(termStartOffset.length, equalTo(freq[j]));
assertThat(termEndOffset.length, equalTo(freq[j]));
for (int k = 0; k < freq[j]; k++) {
int nextPosition = docsAndPositions.nextPosition();
assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
if (withPayloads) {
assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
}
}
}
assertThat(iterator.next(), nullValue());
}