当前位置: 首页>>代码示例>>Java>>正文


Java Fields.terms方法代码示例

本文整理汇总了Java中org.apache.lucene.index.Fields.terms方法的典型用法代码示例。如果您正苦于以下问题:Java Fields.terms方法的具体用法?Java Fields.terms怎么用?Java Fields.terms使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.Fields的用法示例。


在下文中一共展示了Fields.terms方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: DfsOnlyRequest

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
开发者ID:baidu,项目名称:Elasticsearch,代码行数:20,代码来源:DfsOnlyRequest.java

示例2: estimateStringFieldData

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * @return the estimate for loading the entire term set into field data, or 0 if unavailable
 */
public long estimateStringFieldData() {
    try {
        LeafReader reader = context.reader();
        Terms terms = reader.terms(getFieldName());

        Fields fields = reader.fields();
        final Terms fieldTerms = fields.terms(getFieldName());

        if (fieldTerms instanceof FieldReader) {
            final Stats stats = ((FieldReader) fieldTerms).getStats();
            long totalTermBytes = stats.totalTermBytes;
            if (logger.isTraceEnabled()) {
                logger.trace("totalTermBytes: {}, terms.size(): {}, terms.getSumDocFreq(): {}",
                        totalTermBytes, terms.size(), terms.getSumDocFreq());
            }
            long totalBytes = totalTermBytes + (2 * terms.size()) + (4 * terms.getSumDocFreq());
            return totalBytes;
        }
    } catch (Exception e) {
        logger.warn("Unable to estimate memory overhead", e);
    }
    return 0;
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:27,代码来源:PagedBytesIndexFieldData.java

示例3: getAnyTokenStream

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * A convenience method that tries to first get a TermPositionVector for the
 * specified docId, then, falls back to using the passed in
 * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
 * This is useful when you already have the document, but would prefer to use
 * the vector first.
 * 
 * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
 *        and get the vector from
 * @param docId The docId to retrieve.
 * @param field The field to retrieve on the document
 * @param doc The document to fall back on
 * @param analyzer The analyzer to use for creating the TokenStream if the
 *        vector doesn't exist
 * @return The {@link org.apache.lucene.analysis.TokenStream} for the
 *         {@link org.apache.lucene.index.IndexableField} on the
 *         {@link org.apache.lucene.document.Document}
 * @throws IOException if there was an error loading
 */

public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Document doc, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(doc, field, analyzer);
  }
  return ts;
}
 
开发者ID:europeana,项目名称:search,代码行数:39,代码来源:TokenSources.java

示例4: getTokenStreamWithOffsets

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * Returns a {@link TokenStream} with positions and offsets constructed from
 * field termvectors.  If the field has no termvectors, or positions or offsets
 * are not included in the termvector, return null.
 * @param reader the {@link IndexReader} to retrieve term vectors from
 * @param docId the document to retrieve termvectors for
 * @param field the field to retrieve termvectors for
 * @return a {@link TokenStream}, or null if positions and offsets are not available
 * @throws IOException If there is a low-level I/O error
 */
public static TokenStream getTokenStreamWithOffsets(IndexReader reader, int docId,
                                                    String field) throws IOException {

  Fields vectors = reader.getTermVectors(docId);
  if (vectors == null) {
    return null;
  }

  Terms vector = vectors.terms(field);
  if (vector == null) {
    return null;
  }

  if (!vector.hasPositions() || !vector.hasOffsets()) {
    return null;
  }
  
  return getTokenStream(vector);
}
 
开发者ID:europeana,项目名称:search,代码行数:30,代码来源:TokenSources.java

示例5: createWeight

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
@Override
public void createWeight(Map context, IndexSearcher searcher) throws IOException {
  long sumTotalTermFreq = 0;
  for (AtomicReaderContext readerContext : searcher.getTopReaderContext().leaves()) {
    Fields fields = readerContext.reader().fields();
    if (fields == null) continue;
    Terms terms = fields.terms(indexedField);
    if (terms == null) continue;
    long v = terms.getSumTotalTermFreq();
    if (v == -1) {
      sumTotalTermFreq = -1;
      break;
    } else {
      sumTotalTermFreq += v;
    }
  }
  final long ttf = sumTotalTermFreq;
  context.put(this, new LongDocValues(this) {
    @Override
    public long longVal(int doc) {
      return ttf;
    }
  });
}
 
开发者ID:europeana,项目名称:search,代码行数:25,代码来源:SumTotalTermFreqValueSource.java

示例6: getFirstMatch

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * Returns the first document number containing the term <code>t</code>
 * Returns -1 if no document was found.
 * This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
  Fields fields = atomicReader.fields();
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
  if (docs == null) return -1;
  int id = docs.nextDoc();
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:SolrIndexSearcher.java

示例7: getFirstMatch

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:europeana,项目名称:search,代码行数:19,代码来源:TestRTGBase.java

示例8: getFirstMatch

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * Returns the first document number containing the term <code>t</code> Returns -1 if no
 * document was found. This method is primarily intended for clients that want to fetch
 * documents using a unique identifier."
 * 
 * @return the first document number containing the term
 */
public int getFirstMatch(Term t) throws IOException {
	Fields fields = atomicReader.fields();
	if(fields == null)
		return -1;
	Terms terms = fields.terms(t.field());
	if(terms == null)
		return -1;
	BytesRef termBytes = t.bytes();
	final TermsEnum termsEnum = terms.iterator(null);
	if(!termsEnum.seekExact(termBytes, false)) {
		return -1;
	}
	DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE);
	if(docs == null)
		return -1;
	int id = docs.nextDoc();
	return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:netboynb,项目名称:search-core,代码行数:26,代码来源:SolrIndexSearcher.java

示例9: getFirstMatch

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Fields fields = MultiFields.getFields(r);
  if (fields == null) return -1;
  Terms terms = fields.terms(t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator(null);
  if (!termsEnum.seekExact(termBytes, false)) {
    return -1;
  }
  DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE);
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:19,代码来源:TestRTGBase.java

示例10: getIterableRow

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
开发者ID:apache,项目名称:incubator-blur,代码行数:27,代码来源:MutatableAction.java

示例11: createCandidateQuery

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
Query createCandidateQuery(IndexReader indexReader) throws IOException {
    List<BytesRef> extractedTerms = new ArrayList<>();
    LeafReader reader = indexReader.leaves().get(0).reader();
    Fields fields = reader.fields();
    for (String field : fields) {
        Terms terms = fields.terms(field);
        if (terms == null) {
            continue;
        }

        BytesRef fieldBr = new BytesRef(field);
        TermsEnum tenum = terms.iterator();
        for (BytesRef term = tenum.next(); term != null; term = tenum.next()) {
            BytesRefBuilder builder = new BytesRefBuilder();
            builder.append(fieldBr);
            builder.append(FIELD_VALUE_SEPARATOR);
            builder.append(term);
            extractedTerms.add(builder.toBytesRef());
        }
    }
    Query extractionSuccess = new TermInSetQuery(queryTermsField.name(), extractedTerms);
    // include extractionResultField:failed, because docs with this term have no extractedTermsField
    // and otherwise we would fail to return these docs. Docs that failed query term extraction
    // always need to be verified by MemoryIndex:
    Query extractionFailure = new TermQuery(new Term(extractionResultField.name(), EXTRACTION_FAILED));

    return new BooleanQuery.Builder()
            .add(extractionSuccess, Occur.SHOULD)
            .add(extractionFailure, Occur.SHOULD)
            .build();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:32,代码来源:PercolatorFieldMapper.java

示例12: completionStats

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * Returns total in-heap bytes used by all suggesters.  This method has CPU cost <code>O(numIndexedFields)</code>.
 *
 * @param fieldNamePatterns if non-null, any completion field name matching any of these patterns will break out its in-heap bytes
 * separately in the returned {@link CompletionStats}
 */
public static CompletionStats completionStats(IndexReader indexReader, String ... fieldNamePatterns) {
    long sizeInBytes = 0;
    ObjectLongHashMap<String> completionFields = null;
    if (fieldNamePatterns != null  && fieldNamePatterns.length > 0) {
        completionFields = new ObjectLongHashMap<>(fieldNamePatterns.length);
    }
    for (LeafReaderContext atomicReaderContext : indexReader.leaves()) {
        LeafReader atomicReader = atomicReaderContext.reader();
        try {
            Fields fields = atomicReader.fields();
            for (String fieldName : fields) {
                Terms terms = fields.terms(fieldName);
                if (terms instanceof CompletionTerms) {
                    // TODO: currently we load up the suggester for reporting its size
                    long fstSize = ((CompletionTerms) terms).suggester().ramBytesUsed();
                    if (fieldNamePatterns != null && fieldNamePatterns.length > 0 && Regex.simpleMatch(fieldNamePatterns, fieldName)) {
                        completionFields.addTo(fieldName, fstSize);
                    }
                    sizeInBytes += fstSize;
                }
            }
        } catch (IOException ioe) {
            throw new ElasticsearchException(ioe);
        }
    }
    return new CompletionStats(sizeInBytes, completionFields == null ? null : new FieldMemoryStats(completionFields));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:34,代码来源:CompletionFieldStats.java

示例13: retrieveTerms

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
        final Fields vectors = ir.getTermVectors(docNum);
        final Terms vector;
        if (vectors != null) {
            vector = vectors.terms(fieldName);
        } else {
            vector = null;
        }

        // field does not store term vector info
        if (vector == null) {
            Document d = ir.document(docNum);
            IndexableField fields[] = d.getFields(fieldName);
            for (IndexableField field : fields) {
                final String stringValue = field.stringValue();
                if (stringValue != null) {
                    addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
                }
            }
        } else {
            addTermFrequencies(termFreqMap, vector, fieldName);
        }
    }

    return createQueue(termFreqMap);
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:34,代码来源:XMoreLikeThis.java

示例14: buildField

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException {
    String fieldName = fieldIter.next();
    builder.startObject(fieldName);
    Terms curTerms = theFields.terms(fieldName);
    // write field statistics
    buildFieldStatistics(builder, curTerms);
    builder.startObject(FieldStrings.TERMS);
    TermsEnum termIter = curTerms.iterator();
    BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class);
    for (int i = 0; i < curTerms.size(); i++) {
        buildTerm(builder, spare, curTerms, termIter, boostAtt);
    }
    builder.endObject();
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:16,代码来源:TermVectorsResponse.java

示例15: checkBrownFoxTermVector

import org.apache.lucene.index.Fields; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
    String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
    int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
    int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
    int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
    int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8L));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:GetTermVectorsIT.java


注:本文中的org.apache.lucene.index.Fields.terms方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。