当前位置: 首页>>代码示例>>Java>>正文


Java TermsEnum类代码示例

本文整理汇总了Java中org.apache.lucene.index.TermsEnum的典型用法代码示例。如果您正苦于以下问题:Java TermsEnum类的具体用法?Java TermsEnum怎么用?Java TermsEnum使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TermsEnum类属于org.apache.lucene.index包,在下文中一共展示了TermsEnum类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: DfsOnlyRequest

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:20,代码来源:DfsOnlyRequest.java

示例2: buildFromTerms

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associated with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:OrdinalsBuilder.java

示例3: filter

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
protected TermsEnum filter(Terms terms, TermsEnum iterator, LeafReader reader) throws IOException {
    if (iterator == null) {
        return null;
    }
    int docCount = terms.getDocCount();
    if (docCount == -1) {
        docCount = reader.maxDoc();
    }
    if (docCount >= minSegmentSize) {
        final int minFreq = minFrequency > 1.0
                ? (int) minFrequency
                : (int)(docCount * minFrequency);
        final int maxFreq = maxFrequency > 1.0
                ? (int) maxFrequency
                : (int)(docCount * maxFrequency);
        if (minFreq > 1 || maxFreq < docCount) {
            iterator = new FrequencyFilter(iterator, minFreq, maxFreq);
        }
    }
    return iterator;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:22,代码来源:AbstractIndexOrdinalsFieldData.java

示例4: writeTermWithDocsAndPos

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
private PostingsEnum writeTermWithDocsAndPos(TermsEnum iterator, PostingsEnum docsAndPosEnum, boolean positions,
                                                     boolean offsets, boolean payloads) throws IOException {
    docsAndPosEnum = iterator.postings(docsAndPosEnum, PostingsEnum.ALL);
    // for each term (iterator next) in this field (field)
    // iterate over the docs (should only be one)
    int nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc != DocIdSetIterator.NO_MORE_DOCS;
    final int freq = docsAndPosEnum.freq();
    writeFreq(freq);
    for (int j = 0; j < freq; j++) {
        int curPos = docsAndPosEnum.nextPosition();
        if (positions) {
            writePosition(curPos);
        }
        if (offsets) {
            writeOffsets(docsAndPosEnum.startOffset(), docsAndPosEnum.endOffset());
        }
        if (payloads) {
            writePayload(docsAndPosEnum.getPayload());
        }
    }
    nextDoc = docsAndPosEnum.nextDoc();
    assert nextDoc == DocIdSetIterator.NO_MORE_DOCS;
    return docsAndPosEnum;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:26,代码来源:TermVectorsWriter.java

示例5: buildTerm

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:TermVectorsResponse.java

示例6: testNRTSearchOnClosedWriter

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public void testNRTSearchOnClosedWriter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    DirectoryReader reader = DirectoryReader.open(indexWriter);

    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
        field.setBoost(i);
        document.add(field);
        indexWriter.addDocument(document);
    }
    reader = refreshReader(reader);

    indexWriter.close();

    TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
    termDocs.next();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleLuceneTests.java

示例7: UnionDocsAndPositionsEnum

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map<Term,TermContext> termContexts, TermsEnum termsEnum) throws IOException {
  List<DocsAndPositionsEnum> docsEnums = new LinkedList<>();
  for (int i = 0; i < terms.length; i++) {
    final Term term = terms[i];
    TermState termState = termContexts.get(term).get(context.ord);
    if (termState == null) {
      // Term doesn't exist in reader
      continue;
    }
    termsEnum.seekExact(term.bytes(), termState);
    DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE);
    if (postings == null) {
      // term does exist, but has no positions
      throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")");
    }
    cost += postings.cost();
    docsEnums.add(postings);
  }

  _queue = new DocsQueue(docsEnums);
  _posList = new IntQueue();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:23,代码来源:MultiPhraseQuery.java

示例8: TermRangeTermsEnum

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
 * Enumerates all terms greater/equal than <code>lowerTerm</code>
 * but less/equal than <code>upperTerm</code>. 
 * 
 * If an endpoint is null, it is said to be "open". Either or both 
 * endpoints may be open.  Open endpoints may not be exclusive 
 * (you can't select all but the first or last term without 
 * explicitly specifying the term to exclude.)
 * 
 * @param tenum
 *          TermsEnum to filter
 * @param lowerTerm
 *          The term text at the lower end of the range
 * @param upperTerm
 *          The term text at the upper end of the range
 * @param includeLower
 *          If true, the <code>lowerTerm</code> is included in the range.
 * @param includeUpper
 *          If true, the <code>upperTerm</code> is included in the range.
 */
public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm, 
  boolean includeLower, boolean includeUpper) {
  super(tenum);

  // do a little bit of normalization...
  // open ended range queries should always be inclusive.
  if (lowerTerm == null) {
    this.lowerBytesRef = new BytesRef();
    this.includeLower = true;
  } else {
    this.lowerBytesRef = lowerTerm;
    this.includeLower = includeLower;
  }

  if (upperTerm == null) {
    this.includeUpper = true;
    upperBytesRef = null;
  } else {
    this.includeUpper = includeUpper;
    upperBytesRef = upperTerm;
  }

  setInitialSeekTerm(lowerBytesRef);
  termComp = getComparator();
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:46,代码来源:TermRangeTermsEnum.java

示例9: QueryAutoStopWordAnalyzer

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;
  
  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:QueryAutoStopWordAnalyzer.java

示例10: getTermsEnum

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
 *  with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
  switch(type) {
  case NONE:
    return TermsEnum.EMPTY;
  case ALL:
    return terms.iterator(null);
  case SINGLE:
    return new SingleTermsEnum(terms.iterator(null), term);
  case PREFIX:
    // TODO: this is very likely faster than .intersect,
    // but we should test and maybe cutover
    return new PrefixTermsEnum(terms.iterator(null), term);
  case NORMAL:
    return terms.intersect(this, null);
  default:
    // unreachable
    throw new RuntimeException("unhandled case");
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:CompiledAutomaton.java

示例11: visitMatchingTerms

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:18,代码来源:SrndTermQuery.java

示例12: createCandidateQuery

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
Query createCandidateQuery(IndexReader indexReader) throws IOException {
    List<BytesRef> extractedTerms = new ArrayList<>();
    LeafReader reader = indexReader.leaves().get(0).reader();
    Fields fields = reader.fields();
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
            BytesRefBuilder builder = new BytesRefBuilder();
            builder.append(fieldBr);
            builder.append(FIELD_VALUE_SEPARATOR);
            builder.append(term);
            extractedTerms.add(builder.toBytesRef());
        }
    }
    Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
    // include extractionResultField:failed, because docs with this term have no extractedTermsField
    // and otherwise we would fail to return these docs. Docs that failed query term extraction
    // always need to be verified by MemoryIndex:
    Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));

    return new BooleanQuery.Builder()
            .add(extractionSuccess, Occur.SHOULD)
            .add(extractionFailure, Occur.SHOULD)
            .build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:PercolatorFieldMapper.java

示例13: build

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
 * Returns a DocIdSet per segments containing the matching docs for the specified slice.
 */
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:18,代码来源:TermsSliceQuery.java

示例14: RamAccountingTermsEnum

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
public RamAccountingTermsEnum(TermsEnum termsEnum, CircuitBreaker breaker, AbstractIndexFieldData.PerValueEstimator estimator,
                              String fieldName) {
    super(termsEnum);
    this.breaker = breaker;
    this.termsEnum = termsEnum;
    this.estimator = estimator;
    this.fieldName = fieldName;
    this.totalBytes = 0;
    this.flushBuffer = 0;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:11,代码来源:RamAccountingTermsEnum.java

示例15: wrapGeoPointTerms

import org.apache.lucene.index.TermsEnum; //导入依赖的package包/类
/**
 * A {@link TermsEnum} that iterates only highest resolution geo prefix coded terms.
 *
 * @see #buildFromTerms(TermsEnum)
 */
public static TermsEnum wrapGeoPointTerms(TermsEnum termsEnum) {
    return new FilteredTermsEnum(termsEnum, false) {
        @Override
        protected AcceptStatus accept(BytesRef term) throws IOException {
            // accept only the max resolution terms
            // todo is this necessary?
            return GeoPointField.getPrefixCodedShift(term) == GeoPointField.PRECISION_STEP * 4 ?
                AcceptStatus.YES : AcceptStatus.END;
        }
    };
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:OrdinalsBuilder.java


注:本文中的org.apache.lucene.index.TermsEnum类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。