当前位置: 首页>>代码示例>>Java>>正文


Java Terms.iterator方法代码示例

本文整理汇总了Java中org.apache.lucene.index.Terms.iterator方法的典型用法代码示例。如果您正苦于以下问题:Java Terms.iterator方法的具体用法?Java Terms.iterator怎么用?Java Terms.iterator使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.Terms的用法示例。


在下文中一共展示了Terms.iterator方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: DfsOnlyRequest

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:20,代码来源:DfsOnlyRequest.java

示例2: DirectCandidateGenerator

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader,
        double nonErrorLikelihood, int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:21,代码来源:DirectCandidateGenerator.java

示例3: QueryAutoStopWordAnalyzer

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency greater than
 * the given maxDocFreq
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxDocFreq Document frequency terms should be above in order to be stopwords
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    int maxDocFreq) throws IOException {
  super(delegate.getReuseStrategy());
  this.delegate = delegate;
  
  for (String field : fields) {
    Set<String> stopWords = new HashSet<>();
    Terms terms = MultiFields.getTerms(indexReader, field);
    CharsRefBuilder spare = new CharsRefBuilder();
    if (terms != null) {
      TermsEnum te = terms.iterator(null);
      BytesRef text;
      while ((text = te.next()) != null) {
        if (te.docFreq() > maxDocFreq) {
          spare.copyUTF8Bytes(text);
          stopWords.add(spare.toString());
        }
      }
    }
    stopWordsPerField.put(field, stopWords);
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:37,代码来源:QueryAutoStopWordAnalyzer.java

示例4: getTermsEnum

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
/** Return a {@link TermsEnum} intersecting the provided {@link Terms}
 *  with the terms accepted by this automaton. */
public TermsEnum getTermsEnum(Terms terms) throws IOException {
  switch(type) {
  case NONE:
    return TermsEnum.EMPTY;
  case ALL:
    return terms.iterator(null);
  case SINGLE:
    return new SingleTermsEnum(terms.iterator(null), term);
  case PREFIX:
    // TODO: this is very likely faster than .intersect,
    // but we should test and maybe cutover
    return new PrefixTermsEnum(terms.iterator(null), term);
  case NORMAL:
    return terms.intersect(this, null);
  default:
    // unreachable
    throw new RuntimeException("unhandled case");
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:22,代码来源:CompiledAutomaton.java

示例5: createCandidateQuery

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
Query createCandidateQuery(IndexReader indexReader) throws IOException {
    List<BytesRef> extractedTerms = new ArrayList<>();
    LeafReader reader = indexReader.leaves().get(0).reader();
    Fields fields = reader.fields();
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
            BytesRefBuilder builder = new BytesRefBuilder();
            builder.append(fieldBr);
            builder.append(FIELD_VALUE_SEPARATOR);
            builder.append(term);
            extractedTerms.add(builder.toBytesRef());
        }
    }
    Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
    // include extractionResultField:failed, because docs with this term have no extractedTermsField
    // and otherwise we would fail to return these docs. Docs that failed query term extraction
    // always need to be verified by MemoryIndex:
    Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));

    return new BooleanQuery.Builder()
            .add(extractionSuccess, Occur.SHOULD)
            .add(extractionFailure, Occur.SHOULD)
            .build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:PercolatorFieldMapper.java

示例6: beforeLoad

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
/**
 * Determine whether the BlockTreeTermsReader.FieldReader can be used
 * for estimating the field data, adding the estimate to the circuit
 * breaker if it can, otherwise wrapping the terms in a
 * RamAccountingTermsEnum to be estimated on a per-term basis.
 *
 * @param terms terms to be estimated
 * @return A possibly wrapped TermsEnum for the terms
 */
@Override
public TermsEnum beforeLoad(Terms terms) throws IOException {
    LeafReader reader = context.reader();

    TermsEnum iterator = terms.iterator();
    TermsEnum filteredIterator = filter(terms, iterator, reader);
    final boolean filtered = iterator != filteredIterator;
    iterator = filteredIterator;

    if (filtered) {
        if (logger.isTraceEnabled()) {
            logger.trace("Filter exists, can't circuit break normally, using RamAccountingTermsEnum");
        }
        return new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName);
    } else {
        estimatedBytes = this.estimateStringFieldData();
        // If we weren't able to estimate, wrap in the RamAccountingTermsEnum
        if (estimatedBytes == 0) {
            iterator = new RamAccountingTermsEnum(iterator, breaker, this, this.fieldName);
        } else {
            breaker.addEstimateBytesAndMaybeBreak(estimatedBytes, fieldName);
        }

        return iterator;
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:36,代码来源:PagedBytesIndexFieldData.java

示例7: getTermsEnum

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
@Override  
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  TermsEnum tenum = terms.iterator(null);
  
  if (prefix.bytes().length == 0) {
    // no prefix -- match all terms for this field:
    return tenum;
  }
  return new PrefixTermsEnum(tenum, prefix.bytes());
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:11,代码来源:PrefixQuery.java

示例8: getPrefixTerms

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:29,代码来源:MultiPhrasePrefixQuery.java

示例9: buildField

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:TermVectorsResponse.java

示例10: checkBrownFoxTermVector

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
    String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
    int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
    int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
    int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
    int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8L));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:GetTermVectorsIT.java

示例11: testArtificialNoDoc

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
public void testArtificialNoDoc() throws IOException {
    // setup indices
    Settings.Builder settings = Settings.builder()
            .put(indexSettings())
            .put("index.analysis.analyzer", "standard");
    assertAcked(prepareCreate("test")
            .setSettings(settings)
            .addMapping("type1", "field1", "type=text"));
    ensureGreen();

    // request tvs from artificial document
    String text = "the quick brown fox jumps over the lazy dog";
    TermVectorsResponse resp = client().prepareTermVectors()
            .setIndex("test")
            .setType("type1")
            .setDoc(jsonBuilder()
                    .startObject()
                    .field("field1", text)
                    .endObject())
            .setOffsets(true)
            .setPositions(true)
            .setFieldStatistics(true)
            .setTermStatistics(true)
            .get();
    assertThat(resp.isExists(), equalTo(true));
    checkBrownFoxTermVector(resp.getFields(), "field1", false);

    // Since the index is empty, all of artificial document's "term_statistics" should be 0/absent
    Terms terms = resp.getFields().terms("field1");
    assertEquals("sumDocFreq should be 0 for a non-existing field!", 0, terms.getSumDocFreq());
    assertEquals("sumTotalTermFreq should be 0 for a non-existing field!", 0, terms.getSumTotalTermFreq());
    TermsEnum termsEnum = terms.iterator(); // we're guaranteed to receive terms for that field
    while (termsEnum.next() != null) {
        String term = termsEnum.term().utf8ToString();
        assertEquals("term [" + term + "] does not exist in the index; ttf should be 0!", 0, termsEnum.totalTermFreq());
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:38,代码来源:GetTermVectorsIT.java

示例12: visitMatchingTerms

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  int prefixLength = prefix.length();
  Terms terms = MultiFields.getTerms(reader, fieldName);
  if (terms != null) {
    Matcher matcher = pattern.matcher("");
    try {
      TermsEnum termsEnum = terms.iterator(null);

      TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
      BytesRef text;
      if (status == TermsEnum.SeekStatus.FOUND) {
        text = prefixRef;
      } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
        text = termsEnum.term();
      } else {
        text = null;
      }

      while(text != null) {
        if (text != null && StringHelper.startsWith(text, prefixRef)) {
          String textString = text.utf8ToString();
          matcher.reset(textString.substring(prefixLength));
          if (matcher.matches()) {
            mtv.visitMatchingTerm(new Term(fieldName, textString));
          }
        } else {
          break;
        }
        text = termsEnum.next();
      }
    } finally {
      matcher.reset();
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:41,代码来源:SrndTruncQuery.java

示例13: getTermsEnum

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
@Override
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
  if (maxEdits == 0 || prefixLength >= term.text().length()) {  // can only match if it's exact
    return new SingleTermsEnum(terms.iterator(null), term.bytes());
  }
  return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:8,代码来源:FuzzyQuery.java

示例14: termsEnum

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
@Override
public TermsEnum termsEnum(Terms terms) throws IOException {
  return terms.iterator(null);
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:5,代码来源:FieldCache.java

示例15: checkTermTexts

import org.apache.lucene.index.Terms; //导入方法依赖的package包/类
private void checkTermTexts(Terms terms, String[] expectedTexts) throws IOException {
    final TermsEnum termsEnum = terms.iterator();
    for (String expectedText : expectedTexts) {
        assertThat(termsEnum.next().utf8ToString(), equalTo(expectedText));
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:7,代码来源:MultiTermVectorsIT.java


注:本文中的org.apache.lucene.index.Terms.iterator方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。