当前位置: 首页>>代码示例>>Java>>正文


Java Terms.size方法代码示例

本文整理汇总了Java中org.apache.lucene.index.Terms.size方法的典型用法代码示例。如果您正苦于以下问题:Java Terms.size方法的具体用法?Java Terms.size怎么用?Java Terms.size使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.Terms的用法示例。


在下文中一共展示了Terms.size方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: estimateStringFieldData

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
/**
 * @return the estimate for loading the entire term set into field data, or 0 if unavailable
 */
public long estimateStringFieldData() {
    try {
        LeafReader reader = context.reader();
        Terms terms = reader.terms(getFieldName());

        Fields fields = reader.fields();
        final Terms fieldTerms = fields.terms(getFieldName());

        if (fieldTerms instanceof FieldReader) {
            final Stats stats = ((FieldReader) fieldTerms).getStats();
            long totalTermBytes = stats.totalTermBytes;
            if (logger.isTraceEnabled()) {
                logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
                        totalTermBytes, terms.size(), terms.getSumDocFreq());
            }
            long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
            return totalBytes;
        }
    } catch (Exception e) {
        logger.warn("Unable to estimate memory overhead", e);
    }
    return 0;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:27,代码来源:PagedBytesIndexFieldData.java

示例2: WordScorer

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
    this.field = field;
    if (terms == null) {
        throw new IllegalArgumentException("Field: [" + field + "] does not exist");
    }
    this.terms = terms;
    final long vocSize = terms.getSumTotalTermFreq();
    this.vocabluarySize =  vocSize == -1 ? reader.maxDoc() : vocSize;
    this.useTotalTermFreq = vocSize != -1;
    this.numTerms = terms.size();
    this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
    this.reader = reader;
    this.realWordLikelyhood = realWordLikelyHood;
    this.separator = separator;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:WordScorer.java

示例3: buildField

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:TermVectorsResponse.java

示例4: compareTermVectors

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
    Terms terms0 = fields0.terms(fieldName);
    Terms terms1 = fields1.terms(fieldName);
    assertThat(terms0, notNullValue());
    assertThat(terms1, notNullValue());
    assertThat(terms0.size(), equalTo(terms1.size()));

    TermsEnum iter0 = terms0.iterator();
    TermsEnum iter1 = terms1.iterator();
    for (int i = 0; i < terms0.size(); i++) {
        BytesRef next0 = iter0.next();
        assertThat(next0, notNullValue());
        BytesRef next1 = iter1.next();
        assertThat(next1, notNullValue());

        // compare field value
        String string0 = next0.utf8ToString();
        String string1 = next1.utf8ToString();
        assertThat("expected: " + string0, string0, equalTo(string1));

        // compare df and ttf
        assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
        assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));

        // compare freq and docs
        PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL);
        PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL);
        assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
        assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));

        // compare position, start offsets and end offsets
        for (int j = 0; j < docsAndPositions0.freq(); j++) {
            assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition()));
            assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset()));
            assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset()));
        }
    }
    assertThat(iter0.next(), nullValue());
    assertThat(iter1.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:41,代码来源:GetTermVectorsIT.java

示例5: createValue

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
@Override
protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */)
    throws IOException {

  final int maxDoc = reader.maxDoc();

  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final PagedBytes bytes = new PagedBytes(15);

  int startTermsBPV;

  final int termCountHardLimit;
  if (maxDoc == Integer.MAX_VALUE) {
    termCountHardLimit = Integer.MAX_VALUE;
  } else {
    termCountHardLimit = maxDoc+1;
  }

  // TODO: use Uninvert?
  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > termCountHardLimit) {
        // app is misusing the API (there is more than
        // one term per doc); in this case we make best
        // effort to load what we can (see LUCENE-2142)
        numUniqueTerms = termCountHardLimit;
      }

      startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
    } else {
      startTermsBPV = 1;
    }
  } else {
    startTermsBPV = 1;
  }

  PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

  int termOrd = 0;

  // TODO: use Uninvert?

  if (terms != null) {
    final TermsEnum termsEnum = terms.iterator(null);
    DocsEnum docs = null;

    while(true) {
      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      if (termOrd >= termCountHardLimit) {
        break;
      }

      termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
      docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        // Store 1+ ord into packed bits
        docToTermOrd.set(docID, 1+termOrd);
      }
      termOrd++;
    }
  }

  // maybe an int-only impl?
  return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:81,代码来源:FieldCacheImpl.java


注:本文中的org.apache.lucene.index.Terms.size方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。