本文整理汇总了Java中org.apache.lucene.index.TermsEnum.next方法的典型用法代码示例。如果您正苦于以下问题:Java TermsEnum.next方法的具体用法?Java TermsEnum.next怎么用?Java TermsEnum.next使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.index.TermsEnum
的用法示例。
在下文中一共展示了TermsEnum.next方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildTerm
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
// start term, optimized writing
BytesRef term = termIter.next();
spare.copyUTF8Bytes(term);
builder.startObject(spare.toString());
buildTermStatistics(builder, termIter);
// finally write the term vectors
PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
int termFreq = posEnum.freq();
builder.field(FieldStrings.TERM_FREQ, termFreq);
initMemory(curTerms, termFreq);
initValues(curTerms, posEnum, termFreq);
buildValues(builder, curTerms, termFreq);
buildScore(builder, boostAtt);
builder.endObject();
}
示例2: buildFromTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associated with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
示例3: testNRTSearchOnClosedWriter
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testNRTSearchOnClosedWriter() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
DirectoryReader reader = DirectoryReader.open(indexWriter);
for (int i = 0; i < 100; i++) {
Document document = new Document();
TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES);
field.setBoost(i);
document.add(field);
indexWriter.addDocument(document);
}
reader = refreshReader(reader);
indexWriter.close();
TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator();
termDocs.next();
}
示例4: buildFromTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* This method iterates all terms in the given {@link TermsEnum} and
* associates each terms ordinal with the terms documents. The caller must
* exhaust the returned {@link BytesRefIterator} which returns all values
* where the first returned value is associted with the ordinal <tt>1</tt>
* etc.
* <p>
* If the {@link TermsEnum} contains prefix coded numerical values the terms
* enum should be wrapped with either {@link #wrapNumeric32Bit(TermsEnum)}
* or {@link #wrapNumeric64Bit(TermsEnum)} depending on its precision. If
* the {@link TermsEnum} is not wrapped the returned
* {@link BytesRefIterator} will contain partial precision terms rather than
* only full-precision terms.
* </p>
*/
public BytesRefIterator buildFromTerms(final TermsEnum termsEnum) throws IOException {
return new BytesRefIterator() {
private PostingsEnum docsEnum = null;
@Override
public BytesRef next() throws IOException {
BytesRef ref;
if ((ref = termsEnum.next()) != null) {
docsEnum = termsEnum.postings(docsEnum, PostingsEnum.NONE);
nextOrdinal();
int docId;
while ((docId = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
addDoc(docId);
}
}
return ref;
}
};
}
示例5: QueryAutoStopWordAnalyzer
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
* given selection of fields from terms with a document frequency greater than
* the given maxDocFreq
*
* @param delegate Analyzer whose TokenStream will be filtered
* @param indexReader IndexReader to identify the stopwords from
* @param fields Selection of fields to calculate stopwords for
* @param maxDocFreq Document frequency terms should be above in order to be stopwords
* @throws IOException Can be thrown while reading from the IndexReader
*/
public QueryAutoStopWordAnalyzer(
Analyzer delegate,
IndexReader indexReader,
Collection<String> fields,
int maxDocFreq) throws IOException {
super(delegate.getReuseStrategy());
this.delegate = delegate;
for (String field : fields) {
Set<String> stopWords = new HashSet<>();
Terms terms = MultiFields.getTerms(indexReader, field);
CharsRefBuilder spare = new CharsRefBuilder();
if (terms != null) {
TermsEnum te = terms.iterator(null);
BytesRef text;
while ((text = te.next()) != null) {
if (te.docFreq() > maxDocFreq) {
spare.copyUTF8Bytes(text);
stopWords.add(spare.toString());
}
}
}
stopWordsPerField.put(field, stopWords);
}
}
示例6: build
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* Returns a DocIdSet per segments containing the matching docs for the specified slice.
*/
private DocIdSet build(LeafReader reader) throws IOException {
final DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
final Terms terms = reader.terms(getField());
final TermsEnum te = terms.iterator();
PostingsEnum docsEnum = null;
for (BytesRef term = te.next(); term != null; term = te.next()) {
int hashCode = term.hashCode();
if (contains(hashCode)) {
docsEnum = te.postings(docsEnum, PostingsEnum.NONE);
builder.add(docsEnum);
}
}
return builder.build();
}
示例7: addTermFrequencies
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
/**
* Adds terms and frequencies found in vector into the Map termFreqMap
*
* @param termFreqMap a Map of terms and their frequencies
* @param vector List of terms and their frequencies for a doc/field
* @param fieldName Optional field name of the terms for skip terms
*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
final TermsEnum termsEnum = vector.iterator();
final CharsRefBuilder spare = new CharsRefBuilder();
BytesRef text;
while((text = termsEnum.next()) != null) {
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (isNoiseWord(term)) {
continue;
}
if (isSkipTerm(fieldName, term)) {
continue;
}
final PostingsEnum docs = termsEnum.postings(null);
int freq = 0;
while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
freq += docs.freq();
}
// increment frequency
Int cnt = termFreqMap.get(term);
if (cnt == null) {
cnt = new Int();
termFreqMap.put(term, cnt);
cnt.x = freq;
} else {
cnt.x += freq;
}
}
}
示例8: getPrefixTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
// SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
// instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
List<LeafReaderContext> leaves = reader.leaves();
for (LeafReaderContext leaf : leaves) {
Terms _terms = leaf.reader().terms(field);
if (_terms == null) {
continue;
}
TermsEnum termsEnum = _terms.iterator();
TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
if (TermsEnum.SeekStatus.END == seekStatus) {
continue;
}
for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
if (!StringHelper.startsWith(term, prefix.bytes())) {
break;
}
terms.add(new Term(field, BytesRef.deepCopyOf(term)));
if (terms.size() >= maxExpansions) {
return;
}
}
}
}
示例9: testTermsEnum
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testTermsEnum() throws Exception {
fillExtendedMvSet();
writer.forceMerge(1);
List<LeafReaderContext> atomicReaderContexts = refreshReader();
IndexOrdinalsFieldData ifd = getForField("value");
for (LeafReaderContext atomicReaderContext : atomicReaderContexts) {
AtomicOrdinalsFieldData afd = ifd.load(atomicReaderContext);
TermsEnum termsEnum = afd.getOrdinalsValues().termsEnum();
int size = 0;
while (termsEnum.next() != null) {
size++;
}
assertThat(size, equalTo(12));
assertThat(termsEnum.seekExact(new BytesRef("10")), is(true));
assertThat(termsEnum.term().utf8ToString(), equalTo("10"));
assertThat(termsEnum.next(), nullValue());
assertThat(termsEnum.seekExact(new BytesRef("08")), is(true));
assertThat(termsEnum.term().utf8ToString(), equalTo("08"));
size = 0;
while (termsEnum.next() != null) {
size++;
}
assertThat(size, equalTo(2));
termsEnum.seekExact(8);
assertThat(termsEnum.term().utf8ToString(), equalTo("07"));
size = 0;
while (termsEnum.next() != null) {
size++;
}
assertThat(size, equalTo(3));
}
}
示例10: visitMatchingTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
/* inspired by PrefixQuery.rewrite(): */
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
boolean skip = false;
TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getPrefix()));
if (status == TermsEnum.SeekStatus.FOUND) {
mtv.visitMatchingTerm(getLucenePrefixTerm(fieldName));
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
if (StringHelper.startsWith(termsEnum.term(), prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, termsEnum.term().utf8ToString()));
} else {
skip = true;
}
} else {
// EOF
skip = true;
}
if (!skip) {
while(true) {
BytesRef text = termsEnum.next();
if (text != null && StringHelper.startsWith(text, prefixRef)) {
mtv.visitMatchingTerm(new Term(fieldName, text.utf8ToString()));
} else {
break;
}
}
}
}
}
示例11: checkBrownFoxTermVector
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};
Terms terms = fields.terms(fieldName);
assertThat(terms.size(), equalTo(8L));
TermsEnum iterator = terms.iterator();
for (int j = 0; j < values.length; j++) {
String string = values[j];
BytesRef next = iterator.next();
assertThat(next, notNullValue());
assertThat("expected " + string, string, equalTo(next.utf8ToString()));
assertThat(next, notNullValue());
// do not test ttf or doc frequency, because here we have many
// shards and do not know how documents are distributed
PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
assertThat(docsAndPositions.nextDoc(), equalTo(0));
assertThat(freq[j], equalTo(docsAndPositions.freq()));
int[] termPos = pos[j];
int[] termStartOffset = startOffset[j];
int[] termEndOffset = endOffset[j];
assertThat(termPos.length, equalTo(freq[j]));
assertThat(termStartOffset.length, equalTo(freq[j]));
assertThat(termEndOffset.length, equalTo(freq[j]));
for (int k = 0; k < freq[j]; k++) {
int nextPosition = docsAndPositions.nextPosition();
assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
if (withPayloads) {
assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
}
}
}
assertThat(iterator.next(), nullValue());
}
示例12: compareTermVectors
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
Terms terms0 = fields0.terms(fieldName);
Terms terms1 = fields1.terms(fieldName);
assertThat(terms0, notNullValue());
assertThat(terms1, notNullValue());
assertThat(terms0.size(), equalTo(terms1.size()));
TermsEnum iter0 = terms0.iterator();
TermsEnum iter1 = terms1.iterator();
for (int i = 0; i < terms0.size(); i++) {
BytesRef next0 = iter0.next();
assertThat(next0, notNullValue());
BytesRef next1 = iter1.next();
assertThat(next1, notNullValue());
// compare field value
String string0 = next0.utf8ToString();
String string1 = next1.utf8ToString();
assertThat("expected: " + string0, string0, equalTo(string1));
// compare df and ttf
assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));
// compare freq and docs
PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL);
PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL);
assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));
// compare position, start offsets and end offsets
for (int j = 0; j < docsAndPositions0.freq(); j++) {
assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition()));
assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset()));
assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset()));
}
}
assertThat(iter0.next(), nullValue());
assertThat(iter1.next(), nullValue());
}
示例13: checkBestTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
private void checkBestTerms(Terms terms, List<String> expectedTerms) throws IOException {
final TermsEnum termsEnum = terms.iterator();
List<String> bestTerms = new ArrayList<>();
BytesRef text;
while((text = termsEnum.next()) != null) {
bestTerms.add(text.utf8ToString());
}
Collections.sort(expectedTerms);
Collections.sort(bestTerms);
assertArrayEquals(expectedTerms.toArray(), bestTerms.toArray());
}
示例14: visitMatchingTerms
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
@Override
public void visitMatchingTerms(
IndexReader reader,
String fieldName,
MatchingTermVisitor mtv) throws IOException
{
int prefixLength = prefix.length();
Terms terms = MultiFields.getTerms(reader, fieldName);
if (terms != null) {
Matcher matcher = pattern.matcher("");
try {
TermsEnum termsEnum = terms.iterator(null);
TermsEnum.SeekStatus status = termsEnum.seekCeil(prefixRef);
BytesRef text;
if (status == TermsEnum.SeekStatus.FOUND) {
text = prefixRef;
} else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
text = termsEnum.term();
} else {
text = null;
}
while(text != null) {
if (text != null && StringHelper.startsWith(text, prefixRef)) {
String textString = text.utf8ToString();
matcher.reset(textString.substring(prefixLength));
if (matcher.matches()) {
mtv.visitMatchingTerm(new Term(fieldName, textString));
}
} else {
break;
}
text = termsEnum.next();
}
} finally {
matcher.reset();
}
}
}
示例15: testArtificialDocWithPreference
import org.apache.lucene.index.TermsEnum; //导入方法依赖的package包/类
public void testArtificialDocWithPreference() throws ExecutionException, InterruptedException, IOException {
// setup indices
Settings.Builder settings = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer", "standard");
assertAcked(prepareCreate("test")
.setSettings(settings)
.addMapping("type1", "field1", "type=text,term_vector=with_positions_offsets"));
ensureGreen();
// index document
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "random permutation"));
// Get search shards
ClusterSearchShardsResponse searchShardsResponse = client().admin().cluster().prepareSearchShards("test").get();
List<Integer> shardIds = Arrays.stream(searchShardsResponse.getGroups()).map(s -> s.getShardId().id()).collect(Collectors.toList());
// request termvectors of artificial document from each shard
int sumTotalTermFreq = 0;
int sumDocFreq = 0;
for (Integer shardId : shardIds) {
TermVectorsResponse tvResponse = client().prepareTermVectors()
.setIndex("test")
.setType("type1")
.setPreference("_shards:" + shardId)
.setDoc(jsonBuilder().startObject().field("field1", "random permutation").endObject())
.setFieldStatistics(true)
.setTermStatistics(true)
.get();
Fields fields = tvResponse.getFields();
Terms terms = fields.terms("field1");
assertNotNull(terms);
TermsEnum termsEnum = terms.iterator();
while (termsEnum.next() != null) {
sumTotalTermFreq += termsEnum.totalTermFreq();
sumDocFreq += termsEnum.docFreq();
}
}
assertEquals("expected to find term statistics in exactly one shard!", 2, sumTotalTermFreq);
assertEquals("expected to find term statistics in exactly one shard!", 2, sumDocFreq);
}