当前位置: 首页>>代码示例>>Java>>正文


Java TermsEnum.next方法代码示例

本文整理汇总了Java中org.apache.lucene.index.TermsEnum.next方法的典型用法代码示例。如果您正苦于以下问题:Java TermsEnum.next方法的具体用法?Java TermsEnum.next怎么用?Java TermsEnum.next使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.TermsEnum的用法示例。


在下文中一共展示了TermsEnum.next方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildTerm

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:17,代码来源:TermVectorsResponse.java

示例2: buildFromTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associated with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:35,代码来源:OrdinalsBuilder.java

示例3: testNRTSearchOnClosedWriter

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testNRTSearchOnClosedWriter() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    DirectoryReader reader = DirectoryReader.open(indexWriter);

    for (int i = 0; i < 100; i++) {
        Document document = new Document();
        TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
        field.setBoost(i);
        document.add(field);
        indexWriter.addDocument(document);
    }
    reader = refreshReader(reader);

    indexWriter.close();

    TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
    termDocs.next();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:20,代码来源:SimpleLuceneTests.java

示例4: buildFromTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
 * This method iterates all terms in the given {@link TermsEnum} and
 * associates each terms ordinal with the terms documents. The caller must
 * exhaust the returned {@link BytesRefIterator} which returns all values
 * where the first returned value is associted with the ordinal <tt>1</tt>
 * etc.
 * <p>
 * If the {@link TermsEnum} contains prefix coded numerical values the terms
 * enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
 * or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
 * the {@link TermsEnum} is not wrapped the returned
 * {@link BytesRefIterator} will contain partial precision terms rather than
 * only full-precision terms.
 * </p>
 */
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
    return new BytesRefIterator() {
        private PostingsEnum docsEnum = null;

        @Override
        public BytesRef next() throws IOException {
            BytesRef ref;
            if ((ref = termsEnum.next()) != null) {
                docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
                nextOrdinal();
                int docId;
                while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                    addDoc(docId);
                }
            }
            return ref;
        }
    };
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:35,代码来源:OrdinalsBuilder.java

示例5: QueryAutoStopWordAnalyzer

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;
  
  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:QueryAutoStopWordAnalyzer.java

示例6: build

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
 * Returns a DocIdSet per segments containing the matching docs for the specified slice.
 */
private DocIdSet build(LeafReader reader) throws IOException {
    final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
    final Terms terms = reader.terms(getField());
    final TermsEnum te = terms.iterator();
    PostingsEnum docsEnum = null;
    for (BytesRef term = te.next(); term != null; term = te.next()) {
        int hashCode = term.hashCode();
        if (contains(hashCode)) {
            docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
            builder.add(docsEnum);
        }
    }
    return builder.build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:18,代码来源:TermsSliceQuery.java

示例7: addTermFrequencies

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:39,代码来源:XMoreLikeThis.java

示例8: getPrefixTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:29,代码来源:MultiPhrasePrefixQuery.java

示例9: testTermsEnum

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testTermsEnum() throws Exception {
    fillExtendedMvSet();
    writer.forceMerge(1);
    List<LeafReaderContext> atomicReaderContexts = refreshReader();

    IndexOrdinalsFieldData ifd = getForField("value");
    for (LeafReaderContext atomicReaderContext : atomicReaderContexts) {
        AtomicOrdinalsFieldData afd = ifd.load(atomicReaderContext);

        TermsEnum termsEnum = afd.getOrdinalsValues().termsEnum();
        int size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(12));

        assertThat(termsEnum.seekExact(new BytesRef("10")), is(true));
        assertThat(termsEnum.term().utf8ToString(), equalTo("10"));
        assertThat(termsEnum.next(), nullValue());

        assertThat(termsEnum.seekExact(new BytesRef("08")), is(true));
        assertThat(termsEnum.term().utf8ToString(), equalTo("08"));
        size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(2));

        termsEnum.seekExact(8);
        assertThat(termsEnum.term().utf8ToString(), equalTo("07"));
        size = 0;
        while (termsEnum.next() != null) {
            size++;
        }
        assertThat(size, equalTo(3));
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:38,代码来源:AbstractStringFieldDataTestCase.java

示例10: visitMatchingTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* inspired by PrefixQuery.rewrite(): */
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator(null);

    boolean skip = false;
    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
    } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
      if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
        mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
      } else {
        skip = true;
      }
    } else {
      // EOF
      skip = true;
    }

    if (!skip) {
      while(true) {
        BytesRef text = termsEnum.next();
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
        } else {
          break;
        }
      }
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:39,代码来源:SrndPrefixQuery.java

示例11: checkBrownFoxTermVector

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
    String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
    int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
    int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
    int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
    int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8L));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:GetTermVectorsIT.java

示例12: compareTermVectors

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
    Terms terms0 = fields0.terms(fieldName);
    Terms terms1 = fields1.terms(fieldName);
    assertThat(terms0, notNullValue());
    assertThat(terms1, notNullValue());
    assertThat(terms0.size(), equalTo(terms1.size()));

    TermsEnum iter0 = terms0.iterator();
    TermsEnum iter1 = terms1.iterator();
    for (int i = 0; i < terms0.size(); i++) {
        BytesRef next0 = iter0.next();
        assertThat(next0, notNullValue());
        BytesRef next1 = iter1.next();
        assertThat(next1, notNullValue());

        // compare field value
        String string0 = next0.utf8ToString();
        String string1 = next1.utf8ToString();
        assertThat("expected: " + string0, string0, equalTo(string1));

        // compare df and ttf
        assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
        assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));

        // compare freq and docs
        PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL);
        PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL);
        assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
        assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));

        // compare position, start offsets and end offsets
        for (int j = 0; j < docsAndPositions0.freq(); j++) {
            assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition()));
            assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset()));
            assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset()));
        }
    }
    assertThat(iter0.next(), nullValue());
    assertThat(iter1.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:41,代码来源:GetTermVectorsIT.java

示例13: checkBestTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
    final TermsEnum termsEnum = terms.iterator();
    List<String> bestTerms = new ArrayList<>();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        bestTerms.add(text.utf8ToString());
    }
    Collections.sort(expectedTerms);
    Collections.sort(bestTerms);
    assertArrayEquals(expectedTerms.toArray(), bestTerms.toArray());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:12,代码来源:GetTermVectorsIT.java

示例14: visitMatchingTerms

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:41,代码来源:SrndTruncQuery.java

示例15: testArtificialDocWithPreference

import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testArtificialDocWithPreference() throws ExecutionException, InterruptedException, IOException {
    // setup indices
    Settings.Builder settings = Settings.builder()
            .put(indexSettings())
            .put("index.analysis.analyzer", "standard");
    assertAcked(prepareCreate("test")
            .setSettings(settings)
            .addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets"));
    ensureGreen();

    // index document
    indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "random permutation"));

    // Get search shards
    ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards("test").get();
    List<Integer> shardIds = Arrays.stream(searchShardsResponse.getGroups()).map(s -> s.getShardId().id()).collect(Collectors.toList());

    // request termvectors of artificial document from each shard
    int sumTotalTermFreq = 0;
    int sumDocFreq = 0;
    for (Integer shardId : shardIds) {
        TermVectorsResponse tvResponse = client().prepareTermVectors()
                .setIndex("test")
                .setType("type1")
                .setPreference("_shards:" + shardId)
                .setDoc(jsonBuilder().startObject().field("field1", "random permutation").endObject())
                .setFieldStatistics(true)
                .setTermStatistics(true)
                .get();
        Fields fields = tvResponse.getFields();
        Terms terms = fields.terms("field1");
        assertNotNull(terms);
        TermsEnum termsEnum = terms.iterator();
        while (termsEnum.next() != null) {
            sumTotalTermFreq += termsEnum.totalTermFreq();
            sumDocFreq += termsEnum.docFreq();
        }
    }
    assertEquals("expected to find term statistics in exactly one shard!", 2, sumTotalTermFreq);
    assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:42,代码来源:GetTermVectorsIT.java


注:本文中的org.apache.lucene.index.TermsEnum.next方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。