本文整理汇总了Java中org.apache.lucene.search.spans.Spans.next方法的典型用法代码示例。如果您正苦于以下问题:Java Spans.next方法的具体用法?Java Spans.next怎么用?Java Spans.next使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.lucene.search.spans.Spans
的用法示例。
在下文中一共展示了Spans.next方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getPayloads
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException {
Map<Term,TermContext> termContexts = new HashMap<>();
TreeSet<Term> terms = new TreeSet<>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term));
}
for (AtomicReaderContext atomicReaderContext : context.leaves()) {
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
}
}
}
示例2: setCandidateList
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
protected boolean setCandidateList (List<CandidateSpan> candidateList,
Spans candidate, boolean hasMoreCandidates,
List<CandidateSpan> targetList) throws IOException {
if (!targetList.isEmpty()) {
CandidateSpan cs;
CandidateSpan target = targetList.get(0);
int position;
while (hasMoreCandidates && candidate.doc() == target.getDoc()) {
position = findElementPosition(candidate);
if (position != -1) {
cs = new CandidateSpan(candidate, position);
if (isWithinMaxDistance(target, cs)) {
candidateList.add(cs);
}
else
break;
}
hasMoreCandidates = candidate.next();
}
}
return hasMoreCandidates;
}
示例3: getPayloads
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException {
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term, true));
}
for (AtomicReaderContext atomicReaderContext : context.leaves()) {
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
}
}
}
示例4: getPayloads
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
throws IOException {
Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
TreeSet<Term> terms = new TreeSet<Term>();
query.extractTerms(terms);
for (Term term : terms) {
termContexts.put(term, TermContext.build(context, term));
}
for (AtomicReaderContext atomicReaderContext : context.leaves()) {
final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
while (spans.next() == true) {
if (spans.isPayloadAvailable()) {
Collection<byte[]> payload = spans.getPayload();
for (byte [] bytes : payload) {
payloads.add(bytes);
}
}
}
}
}
示例5: setCandidateList
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
protected boolean setCandidateList (List<CandidateSpan> candidateList,
Spans candidate, boolean hasMoreCandidates,
List<CandidateSpan> targetList) throws IOException {
if (!targetList.isEmpty()) {
CandidateSpan target = targetList.get(0);
while (hasMoreCandidates && candidate.doc() == target.getDoc()
&& isWithinMaxDistance(target, candidate)) {
candidateList.add(new CandidateSpan(candidate));
hasMoreCandidates = candidate.next();
}
}
return hasMoreCandidates;
}
示例6: getSpanInfo
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public static List<String> getSpanInfo (IndexReader reader, SpanQuery query)
throws IOException {
Map<Term, TermContext> termContexts = new HashMap<>();
List<String> spanArray = new ArrayList<>();
for (LeafReaderContext atomic : reader.leaves()) {
Bits bitset = atomic.reader().getLiveDocs();
// Spans spans = NearSpansOrdered();
Spans spans = query.getSpans(atomic, bitset, termContexts);
while (spans.next()) {
StringBuffer payloadString = new StringBuffer();
int docid = atomic.docBase + spans.doc();
if (spans.isPayloadAvailable()) {
for (byte[] payload : spans.getPayload()) {
/* retrieve payload for current matching span */
payloadString.append(byte2int(payload)).append(",");
payloadString.append(byte2int(payload, 2));
// payloadString.append(byte2int(payload, 1));
payloadString.append(" (" + payload.length + ")");
payloadString.append(" | ");
};
};
spanArray.add("Doc: " + docid + " with " + spans.start() + "-"
+ spans.end() + " || " + payloadString.toString());
};
};
return spanArray;
}
示例7: testMultipleMatchesPerDoc
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testMultipleMatchesPerDoc() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
new MaxPayloadFunction());
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
//they should all have the exact same score, because they all contain seventy once, and we set
//all the other similarity factors to be 1
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
//there should be exactly 10 items that score a 4, all the rest should score a 2
//The 10 items are: 70 + i*100 where i in [0-9]
int numTens = 0;
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc doc = hits.scoreDocs[i];
if (doc.doc % 10 == 0) {
numTens++;
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
} else {
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
}
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.next()) {
count++;
}
assertTrue(count + " does not equal: " + 200, count == 200);
}
示例8: testIgnoreSpanScorer
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testIgnoreSpanScorer() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
new MaxPayloadFunction(), false);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher theSearcher = newSearcher(reader);
theSearcher.setSimilarity(new FullSimilarity());
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
//they should all have the exact same score, because they all contain seventy once, and we set
//all the other similarity factors to be 1
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
//there should be exactly 10 items that score a 4, all the rest should score a 2
//The 10 items are: 70 + i*100 where i in [0-9]
int numTens = 0;
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc doc = hits.scoreDocs[i];
if (doc.doc % 10 == 0) {
numTens++;
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
} else {
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
}
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.next()) {
count++;
}
reader.close();
}
示例9: getSnippet
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
public String getSnippet() throws InterruptedException {
try {
List<Query> stqs = new ArrayList<>();
List<Query> nstqs = new ArrayList<>();
getSpanTermQueries(query, stqs, nstqs);
List<Span> formSpans = new ArrayList<>();
for (Query q : stqs) {
for (AtomicReaderContext context : indexReader.leaves()) {
if (Thread.currentThread().isInterrupted()) {
throw new InterruptedException("Snippet extraction thread interrupted during processing");
}
Spans spans = ((SpanTermQuery) q).getSpans(context, null, new HashMap());
spans.skipTo(docNumber - context.docBase - 1);
boolean cont = true;
boolean contextFound = false;
while (cont) {
int contextSpanDocNumber = context.docBase + spans.doc();
// System.out.println(contextSpanDocNumber);
if (docNumber == contextSpanDocNumber) {
contextFound = true;
cont = docNumber - contextSpanDocNumber >= 0;
Collection<byte[]> payloads = spans.getPayload();
formSpans.add(new Span(spans.doc(), q.toString(), spans.start(), cz.muni.fi.mias.math.PayloadHelper.decodeFloatFromShortBytes(payloads.iterator().next())));
}
if (!spans.next()) {
cont = false;
}
}
if (contextFound) {
break;
}
}
}
return getSnippet(formSpans, nstqs);
} catch (IOException ex) {
LOG.fatal(ex);
}
return "";
}
示例10: testIgnoreSpanScorer
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testIgnoreSpanScorer() throws Exception {
PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
new MaxPayloadFunction(), false);
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher theSearcher = new IndexSearcher(reader);
theSearcher.setSimilarity(new FullSimilarity());
TopDocs hits = searcher.search(query, null, 100);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);
//they should all have the exact same score, because they all contain seventy once, and we set
//all the other similarity factors to be 1
//System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
//there should be exactly 10 items that score a 4, all the rest should score a 2
//The 10 items are: 70 + i*100 where i in [0-9]
int numTens = 0;
for (int i = 0; i < hits.scoreDocs.length; i++) {
ScoreDoc doc = hits.scoreDocs[i];
if (doc.doc % 10 == 0) {
numTens++;
assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
} else {
assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
}
}
assertTrue(numTens + " does not equal: " + 10, numTens == 10);
CheckHits.checkExplanations(query, "field", searcher, true);
Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
assertTrue("spans is null and it shouldn't be", spans != null);
//should be two matches per document
int count = 0;
//100 hits times 2 matches per hit, we should have 200 in count
while (spans.next()) {
count++;
}
reader.close();
}
示例11: extract
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public ExtractedRecordCollector extract(IndexSearcher searcher, PrePostProcessor prePostProcessor)
throws Exception {
ExtractedRecordCollector erc = new ExtractedRecordCollector(prePostProcessor);
IndexReader ir = searcher.getIndexReader();
Map<Integer, List<HitSpan>> tabooSpans = null;
if (polite) {
tabooSpans = new HashMap<Integer, List<HitSpan>>();
TopDocs topDocs = searcher.search(tabooSQ, 5000);
if (topDocs.totalHits > 0) {
Spans spans = tabooSQ.getSpans(ir);
while (spans.next()) {
List<HitSpan> ms = tabooSpans.get(spans.doc());
if (ms != null) {
ms.add(new HitSpan(spans.start(), spans.end()));
} else {
ms = new LinkedList<HitSpan>();
ms.add(new HitSpan(spans.start(), spans.end()));
tabooSpans.put(spans.doc(), ms);
}
}
}
}
runSpan(searcher, hardOverridesSQ, -1, "", erc, null,
ExtractedReviewRecord.TYPE_OVERRIDE,
null);
for (String domain : loadedDomains) {
logger.debug("Domain: " + domain);
Map<Byte, SpanQuery> type2query = queries.get(domain);
SpanQuery sq = type2query.get(PLUS_ONE);
if (sq != null) {
runSpan(searcher, sq, 1, domain, erc,
formSetForHighLighting(sq,
vocabs.get(domain).getWordsOfInterest()),
ExtractedReviewRecord.TYPE_CONTENT,
tabooSpans);
}
sq = type2query.get(MINUS_ONE);
if (sq != null) {
runSpan(searcher, sq, -1, domain, erc,
formSetForHighLighting(sq,
vocabs.get(domain).getWordsOfInterest()),
ExtractedReviewRecord.TYPE_CONTENT,
tabooSpans);
}
sq = type2query.get(TITLE_PLUS_ONE);
if (sq != null) {
runSpan(searcher, sq, 1, domain, erc, null,
ExtractedReviewRecord.TYPE_TITLE,
null);
}
sq = type2query.get(TITLE_MINUS_ONE);
if (sq != null) {
runSpan(searcher, sq, -1, domain, erc, null,
ExtractedReviewRecord.TYPE_TITLE,
null);
}
}
try {
searcher.close();
} catch (IOException ioe) {
logger.error("Problem closing indexSearcher", ioe);
}
return erc;
}
示例12: addSpan
import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
/**
* Adds all the spans occurring in the current document, as
* CandidateSpans to the specified candidate list, and tells if
* the enumeration of the spans has finished, or not.
*
* @param span
* a Span
* @param list
* a candidateList
* @param hasMoreSpan
* a boolean describing if the span enumeration has
* finished or not.
* @return <code>true</code> if the the span enumeration has
* finished,
* <code>false</code> otherwise.
* @throws IOException
*/
private boolean addSpan (Spans span, List<CandidateSpan> list,
boolean hasMoreSpan) throws IOException {
int position;
while (hasMoreSpan && span.doc() == currentDocNum) {
position = findElementPosition(span);
if (position != -1) {
list.add(new CandidateSpan(span, position));
hasMoreSpan = span.next();
return hasMoreSpan;
}
hasMoreSpan = span.next();
}
return hasMoreSpan;
}