本文整理匯總了Java中org.apache.lucene.index.TermsEnum.docFreq方法的典型用法代碼示例。如果您正苦於以下問題:Java TermsEnum.docFreq方法的具體用法?Java TermsEnum.docFreq怎麽用?Java TermsEnum.docFreq使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.lucene.index.TermsEnum
的用法示例。
在下文中一共展示了TermsEnum.docFreq方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.TermsEnum; //導入方法依賴的package包/類
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例2: writeTermStatistics
import org.apache.lucene.index.TermsEnum; //導入方法依賴的package包/類
private void writeTermStatistics(TermsEnum topLevelIterator) throws IOException {
int docFreq = topLevelIterator.docFreq();
assert (docFreq >= -1);
writePotentiallyNegativeVInt(docFreq);
long ttf = topLevelIterator.totalTermFreq();
assert (ttf >= -1);
writePotentiallyNegativeVLong(ttf);
}
示例3: getTermStatistics
import org.apache.lucene.index.TermsEnum; //導入方法依賴的package包/類
private TermStatistics getTermStatistics(TermsEnum termsEnum, Term term) throws IOException {
if (dfs != null) {
return dfs.termStatistics().get(term);
}
return new TermStatistics(termsEnum.term(), termsEnum.docFreq(), termsEnum.totalTermFreq());
}
示例4: testArtificialDocWithPreference
import org.apache.lucene.index.TermsEnum; //導入方法依賴的package包/類
public void testArtificialDocWithPreference() throws ExecutionException, InterruptedException, IOException {
// setup indices
Settings.Builder settings = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer", "standard");
assertAcked(prepareCreate("test")
.setSettings(settings)
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets"));
ensureGreen();
// index document
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "random permutation"));
// Get search shards
ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards("test").get();
List<Integer> shardIds = Arrays.stream(searchShardsResponse.getGroups()).map(s -> s.getShardId().id()).collect(Collectors.toList());
// request termvectors of artificial document from each shard
int sumTotalTermFreq = 0;
int sumDocFreq = 0;
for (Integer shardId : shardIds) {
TermVectorsResponse tvResponse = client().prepareTermVectors()
.setIndex("test")
.setType("type1")
.setPreference("_shards:" + shardId)
.setDoc(jsonBuilder().startObject().field("field1", "random permutation").endObject())
.setFieldStatistics(true)
.setTermStatistics(true)
.get();
Fields fields = tvResponse.getFields();
Terms terms = fields.terms("field1");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
sumTotalTermFreq += termsEnum.totalTermFreq();
sumDocFreq += termsEnum.docFreq();
}
}
assertEquals("expected to find term statistics in exactly one shard!", 2, sumTotalTermFreq);
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
}