当前位置: 首页>>代码示例>>Java>>正文


Java Spans.next方法代码示例

本文整理汇总了Java中org.apache.lucene.search.spans.Spans.next方法的典型用法代码示例。如果您正苦于以下问题:Java Spans.next方法的具体用法?Java Spans.next怎么用?Java Spans.next使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.search.spans.Spans的用法示例。


在下文中一共展示了Spans.next方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getPayloads

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
    throws IOException {
  Map<Term,TermContext> termContexts = new HashMap<>();
  TreeSet<Term> terms = new TreeSet<>();
  query.extractTerms(terms);
  for (Term term : terms) {
    termContexts.put(term, TermContext.build(context, term));
  }
  for (AtomicReaderContext atomicReaderContext : context.leaves()) {
    final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
    while (spans.next() == true) {
      if (spans.isPayloadAvailable()) {
        Collection<byte[]> payload = spans.getPayload();
        for (byte [] bytes : payload) {
          payloads.add(bytes);
        }
      }
    }
  }
}
 
开发者ID:lamsfoundation,项目名称:lams,代码行数:21,代码来源:PayloadSpanUtil.java

示例2: setCandidateList

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
protected boolean setCandidateList (List<CandidateSpan> candidateList,
        Spans candidate, boolean hasMoreCandidates,
        List<CandidateSpan> targetList) throws IOException {

    if (!targetList.isEmpty()) {
        CandidateSpan cs;
        CandidateSpan target = targetList.get(0);
        int position;
        while (hasMoreCandidates && candidate.doc() == target.getDoc()) {
            position = findElementPosition(candidate);
            if (position != -1) {
                cs = new CandidateSpan(candidate, position);

                if (isWithinMaxDistance(target, cs)) {
                    candidateList.add(cs);
                }
                else
                    break;
            }
            hasMoreCandidates = candidate.next();
        }
    }
    return hasMoreCandidates;
}
 
开发者ID:KorAP,项目名称:Krill,代码行数:26,代码来源:UnorderedElementDistanceSpans.java

示例3: getPayloads

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
    throws IOException {
  Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
  TreeSet<Term> terms = new TreeSet<Term>();
  query.extractTerms(terms);
  for (Term term : terms) {
    termContexts.put(term, TermContext.build(context, term, true));
  }
  for (AtomicReaderContext atomicReaderContext : context.leaves()) {
    final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
    while (spans.next() == true) {
      if (spans.isPayloadAvailable()) {
        Collection<byte[]> payload = spans.getPayload();
        for (byte [] bytes : payload) {
          payloads.add(bytes);
        }
      }
    }
  }
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:21,代码来源:PayloadSpanUtil.java

示例4: getPayloads

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
private void getPayloads(Collection<byte []> payloads, SpanQuery query)
    throws IOException {
  Map<Term,TermContext> termContexts = new HashMap<Term,TermContext>();
  TreeSet<Term> terms = new TreeSet<Term>();
  query.extractTerms(terms);
  for (Term term : terms) {
    termContexts.put(term, TermContext.build(context, term));
  }
  for (AtomicReaderContext atomicReaderContext : context.leaves()) {
    final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts);
    while (spans.next() == true) {
      if (spans.isPayloadAvailable()) {
        Collection<byte[]> payload = spans.getPayload();
        for (byte [] bytes : payload) {
          payloads.add(bytes);
        }
      }
    }
  }
}
 
开发者ID:yintaoxue,项目名称:read-open-source-code,代码行数:21,代码来源:PayloadSpanUtil.java

示例5: setCandidateList

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
protected boolean setCandidateList (List<CandidateSpan> candidateList,
        Spans candidate, boolean hasMoreCandidates,
        List<CandidateSpan> targetList) throws IOException {

    if (!targetList.isEmpty()) {
        CandidateSpan target = targetList.get(0);
        while (hasMoreCandidates && candidate.doc() == target.getDoc()
                && isWithinMaxDistance(target, candidate)) {
            candidateList.add(new CandidateSpan(candidate));
            hasMoreCandidates = candidate.next();
        }
    }
    return hasMoreCandidates;
}
 
开发者ID:KorAP,项目名称:Krill,代码行数:16,代码来源:UnorderedTokenDistanceSpans.java

示例6: getSpanInfo

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public static List<String> getSpanInfo (IndexReader reader, SpanQuery query)
        throws IOException {
    Map<Term, TermContext> termContexts = new HashMap<>();
    List<String> spanArray = new ArrayList<>();

    for (LeafReaderContext atomic : reader.leaves()) {
        Bits bitset = atomic.reader().getLiveDocs();
        // Spans spans = NearSpansOrdered();
        Spans spans = query.getSpans(atomic, bitset, termContexts);

        while (spans.next()) {
            StringBuffer payloadString = new StringBuffer();
            int docid = atomic.docBase + spans.doc();
            if (spans.isPayloadAvailable()) {
                for (byte[] payload : spans.getPayload()) {
                    /* retrieve payload for current matching span */

                    payloadString.append(byte2int(payload)).append(",");
                    payloadString.append(byte2int(payload, 2));
                    //			payloadString.append(byte2int(payload, 1));
                    payloadString.append(" (" + payload.length + ")");
                    payloadString.append(" | ");
                };
            };
            spanArray.add("Doc: " + docid + " with " + spans.start() + "-"
                    + spans.end() + " || " + payloadString.toString());
        };
    };
    return spanArray;
}
 
开发者ID:KorAP,项目名称:Krill,代码行数:31,代码来源:TestSimple.java

示例7: testMultipleMatchesPerDoc

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testMultipleMatchesPerDoc() throws Exception {
  PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
          new MaxPayloadFunction());
  TopDocs hits = searcher.search(query, null, 100);
  assertTrue("hits is null and it shouldn't be", hits != null);
  assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);

  //they should all have the exact same score, because they all contain seventy once, and we set
  //all the other similarity factors to be 1

  //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
  assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
  //there should be exactly 10 items that score a 4, all the rest should score a 2
  //The 10 items are: 70 + i*100 where i in [0-9]
  int numTens = 0;
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    ScoreDoc doc = hits.scoreDocs[i];
    if (doc.doc % 10 == 0) {
      numTens++;
      assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
    } else {
      assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
    }
  }
  assertTrue(numTens + " does not equal: " + 10, numTens == 10);
  CheckHits.checkExplanations(query, "field", searcher, true);
  Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
  assertTrue("spans is null and it shouldn't be", spans != null);
  //should be two matches per document
  int count = 0;
  //100 hits times 2 matches per hit, we should have 200 in count
  while (spans.next()) {
    count++;
  }
  assertTrue(count + " does not equal: " + 200, count == 200);
}
 
开发者ID:europeana,项目名称:search,代码行数:37,代码来源:TestPayloadTermQuery.java

示例8: testIgnoreSpanScorer

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testIgnoreSpanScorer() throws Exception {
  PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
          new MaxPayloadFunction(), false);

  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher theSearcher = newSearcher(reader);
  theSearcher.setSimilarity(new FullSimilarity());
  TopDocs hits = searcher.search(query, null, 100);
  assertTrue("hits is null and it shouldn't be", hits != null);
  assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);

  //they should all have the exact same score, because they all contain seventy once, and we set
  //all the other similarity factors to be 1

  //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
  assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
  //there should be exactly 10 items that score a 4, all the rest should score a 2
  //The 10 items are: 70 + i*100 where i in [0-9]
  int numTens = 0;
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    ScoreDoc doc = hits.scoreDocs[i];
    if (doc.doc % 10 == 0) {
      numTens++;
      assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
    } else {
      assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
    }
  }
  assertTrue(numTens + " does not equal: " + 10, numTens == 10);
  CheckHits.checkExplanations(query, "field", searcher, true);
  Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
  assertTrue("spans is null and it shouldn't be", spans != null);
  //should be two matches per document
  int count = 0;
  //100 hits times 2 matches per hit, we should have 200 in count
  while (spans.next()) {
    count++;
  }
  reader.close();
}
 
开发者ID:europeana,项目名称:search,代码行数:41,代码来源:TestPayloadTermQuery.java

示例9: getSnippet

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
@Override
    public String getSnippet() throws InterruptedException {
        try {
            List<Query> stqs = new ArrayList<>();
            List<Query> nstqs = new ArrayList<>();
            getSpanTermQueries(query, stqs, nstqs);
            List<Span> formSpans = new ArrayList<>();
            for (Query q : stqs) {
                for (AtomicReaderContext context : indexReader.leaves()) {
                    if (Thread.currentThread().isInterrupted()) {
                        throw new InterruptedException("Snippet extraction thread interrupted during processing");
                    }
                    Spans spans = ((SpanTermQuery) q).getSpans(context, null, new HashMap());
                    spans.skipTo(docNumber - context.docBase - 1);
                    boolean cont = true;
                    boolean contextFound = false;
                    while (cont) {
                        int contextSpanDocNumber = context.docBase + spans.doc();
//                        System.out.println(contextSpanDocNumber);
                        if (docNumber == contextSpanDocNumber) {
                            contextFound = true;
                            cont = docNumber - contextSpanDocNumber >= 0;
                            Collection<byte[]> payloads = spans.getPayload();
                            formSpans.add(new Span(spans.doc(), q.toString(), spans.start(), cz.muni.fi.mias.math.PayloadHelper.decodeFloatFromShortBytes(payloads.iterator().next())));
                        }
                        if (!spans.next()) {
                            cont = false;
                        }
                    }
                    if (contextFound) {
                        break;
                    }
                }
            }
            return getSnippet(formSpans, nstqs);
        } catch (IOException ex) {
            LOG.fatal(ex);
        }
        return "";
    }
 
开发者ID:martinliska,项目名称:MIaS,代码行数:41,代码来源:NiceSnippetExtractor.java

示例10: testIgnoreSpanScorer

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public void testIgnoreSpanScorer() throws Exception {
  PayloadTermQuery query = new PayloadTermQuery(new Term(PayloadHelper.MULTI_FIELD, "seventy"),
          new MaxPayloadFunction(), false);

  IndexReader reader = DirectoryReader.open(directory);
  IndexSearcher theSearcher = new IndexSearcher(reader);
  theSearcher.setSimilarity(new FullSimilarity());
  TopDocs hits = searcher.search(query, null, 100);
  assertTrue("hits is null and it shouldn't be", hits != null);
  assertTrue("hits Size: " + hits.totalHits + " is not: " + 100, hits.totalHits == 100);

  //they should all have the exact same score, because they all contain seventy once, and we set
  //all the other similarity factors to be 1

  //System.out.println("Hash: " + seventyHash + " Twice Hash: " + 2*seventyHash);
  assertTrue(hits.getMaxScore() + " does not equal: " + 4.0, hits.getMaxScore() == 4.0);
  //there should be exactly 10 items that score a 4, all the rest should score a 2
  //The 10 items are: 70 + i*100 where i in [0-9]
  int numTens = 0;
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    ScoreDoc doc = hits.scoreDocs[i];
    if (doc.doc % 10 == 0) {
      numTens++;
      assertTrue(doc.score + " does not equal: " + 4.0, doc.score == 4.0);
    } else {
      assertTrue(doc.score + " does not equal: " + 2, doc.score == 2);
    }
  }
  assertTrue(numTens + " does not equal: " + 10, numTens == 10);
  CheckHits.checkExplanations(query, "field", searcher, true);
  Spans spans = MultiSpansWrapper.wrap(searcher.getTopReaderContext(), query);
  assertTrue("spans is null and it shouldn't be", spans != null);
  //should be two matches per document
  int count = 0;
  //100 hits times 2 matches per hit, we should have 200 in count
  while (spans.next()) {
    count++;
  }
  reader.close();
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:41,代码来源:TestPayloadTermQuery.java

示例11: extract

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
public ExtractedRecordCollector extract(IndexSearcher searcher, PrePostProcessor prePostProcessor)
        throws Exception {

    ExtractedRecordCollector erc = new ExtractedRecordCollector(prePostProcessor);
    IndexReader ir = searcher.getIndexReader();

    Map<Integer, List<HitSpan>> tabooSpans = null;

    if (polite) {
        tabooSpans = new HashMap<Integer, List<HitSpan>>();

        TopDocs topDocs = searcher.search(tabooSQ, 5000);
        if (topDocs.totalHits > 0) {
            Spans spans = tabooSQ.getSpans(ir);
            while (spans.next()) {
                List<HitSpan> ms = tabooSpans.get(spans.doc());
                if (ms != null) {
                    ms.add(new HitSpan(spans.start(), spans.end()));
                } else {
                    ms = new LinkedList<HitSpan>();
                    ms.add(new HitSpan(spans.start(), spans.end()));
                    tabooSpans.put(spans.doc(), ms);
                }
            }
        }
    }

    runSpan(searcher, hardOverridesSQ, -1, "", erc, null,
            ExtractedReviewRecord.TYPE_OVERRIDE,
            null);

    for (String domain : loadedDomains) {
        logger.debug("Domain: " + domain);
        Map<Byte, SpanQuery> type2query = queries.get(domain);

        SpanQuery sq = type2query.get(PLUS_ONE);
        if (sq != null) {
            runSpan(searcher, sq, 1, domain, erc,
                    formSetForHighLighting(sq,
                            vocabs.get(domain).getWordsOfInterest()),
                    ExtractedReviewRecord.TYPE_CONTENT,
                    tabooSpans);
        }

        sq = type2query.get(MINUS_ONE);
        if (sq != null) {
            runSpan(searcher, sq, -1, domain, erc,
                    formSetForHighLighting(sq,
                            vocabs.get(domain).getWordsOfInterest()),
                    ExtractedReviewRecord.TYPE_CONTENT,
                    tabooSpans);
        }

        sq = type2query.get(TITLE_PLUS_ONE);
        if (sq != null) {
            runSpan(searcher, sq, 1, domain, erc, null,
                    ExtractedReviewRecord.TYPE_TITLE,
                    null);
        }

        sq = type2query.get(TITLE_MINUS_ONE);
        if (sq != null) {
            runSpan(searcher, sq, -1, domain, erc, null,
                    ExtractedReviewRecord.TYPE_TITLE,
                    null);
        }
    }

    try {
        searcher.close();
    } catch (IOException ioe) {
        logger.error("Problem closing indexSearcher", ioe);
    }
    return erc;
}
 
开发者ID:groupon,项目名称:nakala,代码行数:76,代码来源:ReviewExtractor.java

示例12: addSpan

import org.apache.lucene.search.spans.Spans; //导入方法依赖的package包/类
/**
 * Adds all the spans occurring in the current document, as
 * CandidateSpans to the specified candidate list, and tells if
 * the enumeration of the spans has finished, or not.
 * 
 * @param span
 *            a Span
 * @param list
 *            a candidateList
 * @param hasMoreSpan
 *            a boolean describing if the span enumeration has
 *            finished or not.
 * @return <code>true</code> if the the span enumeration has
 *         finished,
 *         <code>false</code> otherwise.
 * @throws IOException
 */
private boolean addSpan (Spans span, List<CandidateSpan> list,
        boolean hasMoreSpan) throws IOException {
    int position;
    while (hasMoreSpan && span.doc() == currentDocNum) {
        position = findElementPosition(span);
        if (position != -1) {
            list.add(new CandidateSpan(span, position));
            hasMoreSpan = span.next();
            return hasMoreSpan;
        }
        hasMoreSpan = span.next();
    }
    return hasMoreSpan;
}
 
开发者ID:KorAP,项目名称:Krill,代码行数:32,代码来源:UnorderedElementDistanceSpans.java


注:本文中的org.apache.lucene.search.spans.Spans.next方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。