本文整理汇总了Java中org.apache.lucene.index.TermsEnum.docFreq方法的典型用法代码示例。如果您正苦于以下问题:Java TermsEnum.docFreq方法的具体用法?Java TermsEnum.docFreq怎么用?Java TermsEnum.docFreq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.TermsEnum
的用法示例。
在下文中一共展示了TermsEnum.docFreq方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例2: writeTermStatistics
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException {
int docFreq = topLevelIterator.docFreq();
assert (docFreq >= -1);
writePotentiallyNegativeVInt(docFreq);
long ttf = topLevelIterator.totalTermFreq();
assert (ttf >= -1);
writePotentiallyNegativeVLong(ttf);
}
示例3: getTermStatistics
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private TermStatistics getTermStatistics(TermsEnum termsEnum, Term term) throws IOException {
if (dfs != null) {
return dfs.termStatistics().get(term);
}
return new TermStatistics(termsEnum.term(), termsEnum.docFreq(), termsEnum.totalTermFreq());
}
示例4: testArtificialDocWithPreference
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testArtificialDocWithPreference() throws ExecutionException, InterruptedException, IOException {
// setup indices
Settings.Builder settings = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer", "standard");
assertAcked(prepareCreate("test")
.setSettings(settings)
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets"));
ensureGreen();
// index document
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "random permutation"));
// Get search shards
ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards("test").get();
List<Integer> shardIds = Arrays.stream(searchShardsResponse.getGroups()).map(s -> s.getShardId().id()).collect(Collectors.toList());
// request termvectors of artificial document from each shard
int sumTotalTermFreq = 0;
int sumDocFreq = 0;
for (Integer shardId : shardIds) {
TermVectorsResponse tvResponse = client().prepareTermVectors()
.setIndex("test")
.setType("type1")
.setPreference("_shards:" + shardId)
.setDoc(jsonBuilder().startObject().field("field1", "random permutation").endObject())
.setFieldStatistics(true)
.setTermStatistics(true)
.get();
Fields fields = tvResponse.getFields();
Terms terms = fields.terms("field1");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
sumTotalTermFreq += termsEnum.totalTermFreq();
sumDocFreq += termsEnum.docFreq();
}
}
assertEquals("expected to find term statistics in exactly one shard!", 2, sumTotalTermFreq);
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
}