当前位置: 首页>>代码示例>>Java>>正文


Java Terms类代码示例

本文整理汇总了Java中org.apache.lucene.index.Terms的典型用法代码示例。如果您正苦于以下问题:Java Terms类的具体用法?Java Terms怎么用?Java Terms使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


Terms类属于org.apache.lucene.index包,在下文中一共展示了Terms类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: DfsOnlyRequest

import org.apache.lucene.index.Terms; //导入依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:20,代码来源:DfsOnlyRequest.java

示例2: buildTerm

import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:17,代码来源:TermVectorsResponse.java

示例3: estimateStringFieldData

import org.apache.lucene.index.Terms; //导入依赖的package包/类
/**
 * @return the estimate for loading the entire term set into field data, or 0 if unavailable
 */
public long estimateStringFieldData() {
    try {
        LeafReader reader = context.reader();
        Terms terms = reader.terms(getFieldName());

        Fields fields = reader.fields();
        final Terms fieldTerms = fields.terms(getFieldName());

        if (fieldTerms instanceof FieldReader) {
            final Stats stats = ((FieldReader) fieldTerms).getStats();
            long totalTermBytes = stats.totalTermBytes;
            if (logger.isTraceEnabled()) {
                logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
                        totalTermBytes, terms.size(), terms.getSumDocFreq());
            }
            long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
            return totalBytes;
        }
    } catch (Exception e) {
        logger.warn("Unable to estimate memory overhead", e);
    }
    return 0;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:27,代码来源:PagedBytesIndexFieldData.java

示例4: filter

import org.apache.lucene.index.Terms; //导入依赖的package包/类
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
    if (iterator == null) {
        return null;
    }
    int docCount = terms.getDocCount();
    if (docCount == -1) {
        docCount = reader.maxDoc();
    }
    if (docCount >= minSegmentSize) {
        final int minFreq = minFrequency > 1.0
                ? (int) minFrequency
                : (int)(docCount * minFrequency);
        final int maxFreq = maxFrequency > 1.0
                ? (int) maxFrequency
                : (int)(docCount * maxFrequency);
        if (minFreq > 1 || maxFreq < docCount) {
            iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
        }
    }
    return iterator;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:AbstractIndexOrdinalsFieldData.java

示例5: rewrite

import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public Query rewrite(IndexReader reader) throws IOException {
    Query rewritten = super.rewrite(reader);
    if (rewritten != this) {
        return rewritten;
    }
    boolean hasPayloads = false;
    for (LeafReaderContext context : reader.leaves()) {
        final Terms terms = context.reader().terms(term.field());
        if (terms != null) {
            if (terms.hasPayloads()) {
                hasPayloads = true;
                break;
            }
        }
    }
    // if the terms does not exist we could return a MatchNoDocsQuery but this would break the unified highlighter
    // which rewrites query with an empty reader.
    if (hasPayloads == false) {
        return new TermQuery(term);
    }
    return this;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:24,代码来源:AllTermQuery.java

示例6: wrap

import org.apache.lucene.index.Terms; //导入依赖的package包/类
static CodecReader wrap(CodecReader reader) throws IOException {
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
    if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
        // the reader is a recent one, it has versions and they are stored
        // in a numeric doc values field
        return reader;
    }
    // The segment is an old one, look at the _uid field
    final Terms terms = reader.terms(UidFieldMapper.NAME);
    if (terms == null || !terms.hasPayloads()) {
        // The segment doesn't have an _uid field or doesn't have payloads
        // don't try to do anything clever. If any other segment has versions
        // all versions of this segment will be initialized to 0
        return reader;
    }
    // convert _uid payloads -> _version docvalues
    return new VersionFieldUpgrader(reader);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:20,代码来源:VersionFieldUpgrader.java

示例7: buildFieldStatistics

import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildFieldStatistics(XContentBuilder builder, Terms curTerms) throws IOException {
    long sumDocFreq = curTerms.getSumDocFreq();
    int docCount = curTerms.getDocCount();
    long sumTotalTermFrequencies = curTerms.getSumTotalTermFreq();
    if (docCount > 0) {
        assert ((sumDocFreq > 0)) : "docCount >= 0 but sumDocFreq ain't!";
        assert ((sumTotalTermFrequencies > 0)) : "docCount >= 0 but sumTotalTermFrequencies ain't!";
        builder.startObject(FieldStrings.FIELD_STATISTICS);
        builder.field(FieldStrings.SUM_DOC_FREQ, sumDocFreq);
        builder.field(FieldStrings.DOC_COUNT, docCount);
        builder.field(FieldStrings.SUM_TTF, sumTotalTermFrequencies);
        builder.endObject();
    } else if (docCount == -1) { // this should only be -1 if the field
        // statistics were not requested at all. In
        // this case all 3 values should be -1
        assert ((sumDocFreq == -1)) : "docCount was -1 but sumDocFreq ain't!";
        assert ((sumTotalTermFrequencies == -1)) : "docCount was -1 but sumTotalTermFrequencies ain't!";
    } else {
        throw new IllegalStateException(
                "Something is wrong with the field statistics of the term vector request: Values are " + "\n"
                        + FieldStrings.SUM_DOC_FREQ + " " + sumDocFreq + "\n" + FieldStrings.DOC_COUNT + " " + docCount + "\n"
                        + FieldStrings.SUM_TTF + " " + sumTotalTermFrequencies);
    }
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:25,代码来源:TermVectorsResponse.java

示例8: buildValues

import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void buildValues(XContentBuilder builder, Terms curTerms, int termFreq) throws IOException {
    if (!(curTerms.hasPayloads() || curTerms.hasOffsets() || curTerms.hasPositions())) {
        return;
    }

    builder.startArray(FieldStrings.TOKENS);
    for (int i = 0; i < termFreq; i++) {
        builder.startObject();
        if (curTerms.hasPositions()) {
            builder.field(FieldStrings.POS, currentPositions[i]);
        }
        if (curTerms.hasOffsets()) {
            builder.field(FieldStrings.START_OFFSET, currentStartOffset[i]);
            builder.field(FieldStrings.END_OFFSET, currentEndOffset[i]);
        }
        if (curTerms.hasPayloads() && (currentPayloads[i].length() > 0)) {
            builder.field(FieldStrings.PAYLOAD, currentPayloads[i]);
        }
        builder.endObject();
    }
    builder.endArray();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:23,代码来源:TermVectorsResponse.java

示例9: initValues

import org.apache.lucene.index.Terms; //导入依赖的package包/类
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:21,代码来源:TermVectorsResponse.java

示例10: terms

import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public Terms terms(String field) throws IOException {
  final FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
  if (fieldInfo == null) {
    // No such field
    return null;
  }

  final Integer fieldIndex = fieldNumberToIndex.get(fieldInfo.number);
  if (fieldIndex == null) {
    // Term vectors were not indexed for this field
    return null;
  }

  return new TVTerms(fieldFPs[fieldIndex]);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:17,代码来源:Lucene3xTermVectorsReader.java

示例11: shardOperation

import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:26,代码来源:TransportFieldStatsTransportAction.java

示例12: QueryAutoStopWordAnalyzer

import org.apache.lucene.index.Terms; //导入依赖的package包/类
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;
  
  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:QueryAutoStopWordAnalyzer.java

示例13: getTermsEnum

import org.apache.lucene.index.Terms; //导入依赖的package包/类
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
 *  with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
  switch(type) {
  case NONE:
    return TermsEnum.EMPTY;
  case ALL:
    return terms.iterator(null);
  case SINGLE:
    return new SingleTermsEnum(terms.iterator(null), term);
  case PREFIX:
    // TODO: this is very likely faster than .intersect,
    // but we should test and maybe cutover
    return new PrefixTermsEnum(terms.iterator(null), term);
  case NORMAL:
    return terms.intersect(this, null);
  default:
    // unreachable
    throw new RuntimeException("unhandled case");
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:CompiledAutomaton.java

示例14: visitMatchingTerms

import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:18,代码来源:SrndTermQuery.java

示例15: loadDirect

import org.apache.lucene.index.Terms; //导入依赖的package包/类
@Override
public AtomicGeoPointFieldData loadDirect(LeafReaderContext context) throws Exception {
    LeafReader reader = context.reader();

    Terms terms = reader.terms(getFieldNames().indexName());
    AtomicGeoPointFieldData data = null;
    // TODO: Use an actual estimator to estimate before loading.
    NonEstimatingEstimator estimator = new NonEstimatingEstimator(breakerService.getBreaker(CircuitBreaker.FIELDDATA));
    if (terms == null) {
        data = AbstractAtomicGeoPointFieldData.empty(reader.maxDoc());
        estimator.afterLoad(null, data.ramBytesUsed());
        return data;
    }
    return (Version.indexCreated(indexSettings).before(Version.V_2_2_0)) ?
        loadLegacyFieldData(reader, estimator, terms, data) : loadFieldData22(reader, estimator, terms, data);
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:17,代码来源:GeoPointArrayIndexFieldData.java


注:本文中的org.apache.lucene.index.Terms类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。