本文整理汇总了Java中org.apache.lucene.index.MultiFields.getTerms方法的典型用法代码示例。如果您正苦于以下问题:Java MultiFields.getTerms方法的具体用法?Java MultiFields.getTerms怎么用?Java MultiFields.getTerms使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.MultiFields
的用法示例。
在下文中一共展示了MultiFields.getTerms方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例2: visitMatchingTerms
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* check term presence in index here for symmetry with other SimpleTerm's */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLuceneTerm(fieldName));
}
}
}
示例3: shardOperation
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
ShardId shardId = request.shardId();
Map<String, FieldStats> fieldStats = new HashMap<>();
IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
MapperService mapperService = indexServices.mapperService();
IndexShard shard = indexServices.shardSafe(shardId.id());
try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
for (String field : request.getFields()) {
MappedFieldType fieldType = mapperService.fullName(field);
if (fieldType != null) {
IndexReader reader = searcher.reader();
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
}
} else {
throw new IllegalArgumentException("field [" + field + "] doesn't exist");
}
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
}
return new FieldStatsShardResponse(shardId, fieldStats);
}
示例4: assignClass
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* {@inheritDoc}
*/
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
if (atomicReader == null) {
throw new IOException("You must first call Classifier#train");
}
double max = - Double.MAX_VALUE;
BytesRef foundClass = new BytesRef();
Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
TermsEnum termsEnum = terms.iterator(null);
BytesRef next;
String[] tokenizedDoc = tokenizeDoc(inputDocument);
while ((next = termsEnum.next()) != null) {
double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
if (clVal > max) {
max = clVal;
foundClass = BytesRef.deepCopyOf(next);
}
}
double score = 10 / Math.abs(max);
return new ClassificationResult<>(foundClass, score);
}
示例5: countTerms
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private int countTerms(MultiTermQuery q) throws Exception {
final Terms terms = MultiFields.getTerms(reader, q.getField());
if (terms == null)
return 0;
final TermsEnum termEnum = q.getTermsEnum(terms);
assertNotNull(termEnum);
int count = 0;
BytesRef cur, last = null;
while ((cur = termEnum.next()) != null) {
count++;
if (last != null) {
assertTrue(last.compareTo(cur) < 0);
}
last = BytesRef.deepCopyOf(cur);
}
// LUCENE-3314: the results after next() already returned null are undefined,
// assertNull(termEnum.next());
return count;
}
示例6: testAllDocs
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
public void testAllDocs() throws Exception {
initializeIndex(new String[]{"A", "B", "C", "D"});
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = newSearcher(reader);
TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
query = new TermRangeQuery("content", null, null, false, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
query = TermRangeQuery.newStringRange("content", "", null, true, false);
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(4, searcher.search(query, null, 1000).scoreDocs.length);
// and now anothe one
query = TermRangeQuery.newStringRange("content", "B", null, true, false);
assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
assertEquals(3, searcher.search(query, null, 1000).scoreDocs.length);
reader.close();
}
示例7: testPrefixTerm
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Tests if a WildcardQuery that has only a trailing * in the term is
* rewritten to a single PrefixQuery. The boost and rewriteMethod should be
* preserved.
*/
public void testPrefixTerm() throws IOException {
Directory indexStore = getIndexStore("field", new String[]{"prefix", "prefixx"});
IndexReader reader = DirectoryReader.open(indexStore);
IndexSearcher searcher = newSearcher(reader);
MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
assertMatches(searcher, wq, 2);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
assertTrue(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
wq = new WildcardQuery(new Term("field", "*"));
assertMatches(searcher, wq, 2);
assertFalse(wq.getTermsEnum(terms) instanceof PrefixTermsEnum);
assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
reader.close();
indexStore.close();
}
示例8: assignClass
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* {@inheritDoc}
*/
@Override
public ClassificationResult<BytesRef> assignClass(String inputDocument) throws IOException {
if (atomicReader == null) {
throw new IOException("You must first call Classifier#train");
}
double max = - Double.MAX_VALUE;
BytesRef foundClass = new BytesRef();
Terms terms = MultiFields.getTerms(atomicReader, classFieldName);
TermsEnum termsEnum = terms.iterator(null);
BytesRef next;
String[] tokenizedDoc = tokenizeDoc(inputDocument);
while ((next = termsEnum.next()) != null) {
double clVal = calculateLogPrior(next) + calculateLogLikelihood(tokenizedDoc, next);
if (clVal > max) {
max = clVal;
foundClass = BytesRef.deepCopyOf(next);
}
}
double score = 10 / Math.abs(max);
return new ClassificationResult<BytesRef>(foundClass, score);
}
示例9: visitMatchingTerms
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
int prefixLength = prefix.length();
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
Matcher matcher = pattern.matcher("");
try {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
BytesRef text;
if (status == TermsEnum.SeekStatus.FOUND) {
text = prefixRef;
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
text = termsEnum.term();
} else {
text = null;
}
while(text != null) {
if (text != null && StringHelper.startsWith(text, prefixRef)) {
String textString = text.utf8ToString();
matcher.reset(textString.substring(prefixLength));
if (matcher.matches()) {
mtv.visitMatchingTerm(new Term(fieldName, textString));
}
} else {
break;
}
text = termsEnum.next();
}
} finally {
matcher.reset();
}
}
}
示例10: visitMatchingTerms
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* inspired by PrefixQuery.rewrite(): */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
boolean skip = false;
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
} else {
skip = true;
}
} else {
// EOF
skip = true;
}
if (!skip) {
while(true) {
BytesRef text = termsEnum.next();
if (text != null && StringHelper.startsWith(text, prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
} else {
break;
}
}
}
}
}
示例11: testEmptyOptimization
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
/**
* Test handling of the empty language
*/
public void testEmptyOptimization() throws IOException {
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
// not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
// instanceof EmptyTermEnum);
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
assertEquals(0, automatonQueryNrHits(aq));
}
示例12: getValues
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException
{
final BinaryDocValues terms = cache.getTerms(readerContext.reader(), field, false, PackedInts.FAST);
final IndexReader top = ReaderUtil.getTopLevelContext(readerContext).reader();
Terms t = MultiFields.getTerms(top, qfield);
final TermsEnum termsEnum = t == null ? TermsEnum.EMPTY : t.iterator(null);
return new IntDocValues(this) {
@Override
public int intVal(int doc)
{
try {
final BytesRef term = terms.get(doc);
if (termsEnum.seekExact(term)) {
return termsEnum.docFreq();
} else {
return 0;
}
}
catch (IOException e) {
throw new RuntimeException("caught exception in function "+description()+" : doc="+doc, e);
}
}
};
}
示例13: getEntryIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
@Override
public final InputIterator getEntryIterator() throws IOException {
final Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
return new InputIterator.InputIteratorWrapper(terms.iterator(null));
} else {
return InputIterator.EMPTY;
}
}
示例14: HighFrequencyIterator
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
HighFrequencyIterator() throws IOException {
Terms terms = MultiFields.getTerms(reader, field);
if (terms != null) {
termsEnum = terms.iterator(null);
} else {
termsEnum = null;
}
minNumDocs = (int)(thresh * (float)reader.numDocs());
}
示例15: getTextTermFreqForClass
import org.apache.lucene.index.MultiFields; //导入方法依赖的package包/类
private double getTextTermFreqForClass(BytesRef c) throws IOException {
double avgNumberOfUniqueTerms = 0;
for (String textFieldName : textFieldNames) {
Terms terms = MultiFields.getTerms(atomicReader, textFieldName);
long numPostings = terms.getSumDocFreq(); // number of term/doc pairs
avgNumberOfUniqueTerms += numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc
}
int docsWithC = atomicReader.docFreq(new Term(classFieldName, c));
return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
}