本文整理汇总了Java中org.apache.lucene.index.Terms类的典型用法代码示例。如果您正苦于以下问题:Java Terms类的具体用法?Java Terms怎么用?Java Terms使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Terms类属于org.apache.lucene.index包,在下文中一共展示了Terms类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: DfsOnlyRequest
import org.apache.lucene.index.Terms; //导入依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
super(indices);
// build a search request with a query of all the terms
final BoolQueryBuilder boolBuilder = boolQuery();
for (String fieldName : termVectorsFields) {
if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
continue;
}
Terms terms = termVectorsFields.terms(fieldName);
TermsEnum iterator = terms.iterator();
while (iterator.next() != null) {
String text = iterator.term().utf8ToString();
boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
}
}
// wrap a search request object
this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
示例2: buildTerm
import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
示例3: estimateStringFieldData
import org.apache.lucene.index.Terms; //导入依赖的package包/类
/**
* @return the estimate for loading the entire term set into field data, or 0 if unavailable
*/
public long estimateStringFieldData() {
try {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldName());
Fields fields = reader.fields();
final Terms fieldTerms = fields.terms(getFieldName());
if (fieldTerms instanceof FieldReader) {
final Stats stats = ((FieldReader) fieldTerms).getStats();
long totalTermBytes = stats.totalTermBytes;
if (logger.isTraceEnabled()) {
logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
totalTermBytes, terms.size(), terms.getSumDocFreq());
}
long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
return totalBytes;
}
} catch (Exception e) {
logger.warn("Unable to estimate memory overhead", e);
}
return 0;
}
示例4: filter
import org.apache.lucene.index.Terms; //导入依赖的package包/类
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
if (iterator == null) {
return null;
}
int docCount = terms.getDocCount();
if (docCount == -1) {
docCount = reader.maxDoc();
}
if (docCount >= minSegmentSize) {
final int minFreq = minFrequency > 1.0
? (int) minFrequency
: (int)(docCount * minFrequency);
final int maxFreq = maxFrequency > 1.0
? (int) maxFrequency
: (int)(docCount * maxFrequency);
if (minFreq > 1 || maxFreq < docCount) {
iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
}
}
return iterator;
}
示例5: rewrite
import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten = super.rewrite(reader);
if (rewritten != this) {
return rewritten;
}
boolean hasPayloads = false;
for (LeafReaderContext context : reader.leaves()) {
final Terms terms = context.reader().terms(term.field());
if (terms != null) {
if (terms.hasPayloads()) {
hasPayloads = true;
break;
}
}
}
// if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter
// which rewrites query with an empty reader.
if (hasPayloads == false) {
return new TermQuery(term);
}
return this;
}
示例6: wrap
import org.apache.lucene.index.Terms; //导入依赖的package包/类
static CodecReader wrap(CodecReader reader) throws IOException {
final FieldInfos fieldInfos = reader.getFieldInfos();
final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
// the reader is a recent one, it has versions and they are stored
// in a numeric doc values field
return reader;
}
// The segment is an old one, look at the _uid field
final Terms terms = reader.terms(UidFieldMapper.NAME);
if (terms == null || !terms.hasPayloads()) {
// The segment doesn't have an _uid field or doesn't have payloads
// don't try to do anything clever. If any other segment has versions
// all versions of this segment will be initialized to 0
return reader;
}
// convert _uid payloads -> _version docvalues
return new VersionFieldUpgrader(reader);
}
示例7: buildFieldStatistics
import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildFieldStatistics(XContentBuilder builder, Terms curTerms) throws IOException {
long sumDocFreq = curTerms.getSumDocFreq();
int docCount = curTerms.getDocCount();
long sumTotalTermFrequencies = curTerms.getSumTotalTermFreq();
if (docCount > 0) {
assert ((sumDocFreq > 0)) : "docCount >= 0 but sumDocFreq ain't!";
assert ((sumTotalTermFrequencies > 0)) : "docCount >= 0 but sumTotalTermFrequencies ain't!";
builder.startObject(FieldStrings.FIELD_STATISTICS);
builder.field(FieldStrings.SUM_DOC_FREQ, sumDocFreq);
builder.field(FieldStrings.DOC_COUNT, docCount);
builder.field(FieldStrings.SUM_TTF, sumTotalTermFrequencies);
builder.endObject();
} else if (docCount == -1) { // this should only be -1 if the field
// statistics were not requested at all. In
// this case all 3 values should be -1
assert ((sumDocFreq == -1)) : "docCount was -1 but sumDocFreq ain't!";
assert ((sumTotalTermFrequencies == -1)) : "docCount was -1 but sumTotalTermFrequencies ain't!";
} else {
throw new IllegalStateException(
"Something is wrong with the field statistics of the term vector request: Values are " + "\n"
+ FieldStrings.SUM_DOC_FREQ + " " + sumDocFreq + "\n" + FieldStrings.DOC_COUNT + " " + docCount + "\n"
+ FieldStrings.SUM_TTF + " " + sumTotalTermFrequencies);
}
}
示例8: buildValues
import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildValues(XContentBuilder builder, Terms curTerms, int termFreq) throws IOException {
if (!(curTerms.hasPayloads() || curTerms.hasOffsets() || curTerms.hasPositions())) {
return;
}
builder.startArray(FieldStrings.TOKENS);
for (int i = 0; i < termFreq; i++) {
builder.startObject();
if (curTerms.hasPositions()) {
builder.field(FieldStrings.POS, currentPositions[i]);
}
if (curTerms.hasOffsets()) {
builder.field(FieldStrings.START_OFFSET, currentStartOffset[i]);
builder.field(FieldStrings.END_OFFSET, currentEndOffset[i]);
}
if (curTerms.hasPayloads() && (currentPayloads[i].length() > 0)) {
builder.field(FieldStrings.PAYLOAD, currentPayloads[i]);
}
builder.endObject();
}
builder.endArray();
}
示例9: initValues
import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
for (int j = 0; j < termFreq; j++) {
int nextPos = posEnum.nextPosition();
if (curTerms.hasPositions()) {
currentPositions[j] = nextPos;
}
if (curTerms.hasOffsets()) {
currentStartOffset[j] = posEnum.startOffset();
currentEndOffset[j] = posEnum.endOffset();
}
if (curTerms.hasPayloads()) {
BytesRef curPayload = posEnum.getPayload();
if (curPayload != null) {
currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
} else {
currentPayloads[j] = null;
}
}
}
}
示例10: terms
import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public Terms terms(String field) throws IOException {
final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
if (fieldInfo == null) {
// No such field
return null;
}
final Integer fieldIndex = fieldNumberToIndex.get(fieldInfo.number);
if (fieldIndex == null) {
// Term vectors were not indexed for this field
return null;
}
return new TVTerms(fieldFPs[fieldIndex]);
}
示例11: shardOperation
import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
ShardId shardId = request.shardId();
Map<String, FieldStats> fieldStats = new HashMap<>();
IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
MapperService mapperService = indexServices.mapperService();
IndexShard shard = indexServices.shardSafe(shardId.id());
try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
for (String field : request.getFields()) {
MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType != null) {
IndexReader reader = searcher.reader();
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
}
} else {
throw new IllegalArgumentException("field [" + field + "] doesn't exist");
}
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
return new FieldStatsShardResponse(shardId, fieldStats);
}
示例12: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.Terms; //导入依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例13: getTermsEnum
import org.apache.lucene.index.Terms; //导入依赖的package包/类
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
* with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
switch(type) {
case NONE:
return TermsEnum.EMPTY;
case ALL:
return terms.iterator(null);
case SINGLE:
return new SingleTermsEnum(terms.iterator(null), term);
case PREFIX:
// TODO: this is very likely faster than .intersect,
// but we should test and maybe cutover
return new PrefixTermsEnum(terms.iterator(null), term);
case NORMAL:
return terms.intersect(this, null);
default:
// unreachable
throw new RuntimeException("unhandled case");
}
}
示例14: visitMatchingTerms
import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
}
}
示例15: loadDirect
import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
LeafReader reader = context.reader();
Terms terms = reader.terms(getFieldNames().indexName());
AtomicGeoPointFieldData data = null;
// TODO: Use an actual estimator to estimate before loading.
NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
if (terms == null) {
data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
estimator.afterLoad(null, data.ramBytesUsed());
return data;
}
return (Version.indexCreated(indexSettings).before(Version.V_2_2_0)) ?
loadLegacyFieldData(reader, estimator, terms, data) : loadFieldData22(reader, estimator, terms, data);
}