当前位置: 首页>>代码示例>>Java>>正文


Java PostingsEnum.nextPosition方法代码示例

本文整理汇总了Java中org.apache.lucene.index.PostingsEnum.nextPosition方法的典型用法代码示例。如果您正苦于以下问题:Java PostingsEnum.nextPosition方法的具体用法?Java PostingsEnum.nextPosition怎么用?Java PostingsEnum.nextPosition使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.PostingsEnum的用法示例。


在下文中一共展示了PostingsEnum.nextPosition方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initValues

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void initValues(Terms curTerms, PostingsEnum posEnum, int termFreq) throws IOException {
    for (int j = 0; j < termFreq; j++) {
        int nextPos = posEnum.nextPosition();
        if (curTerms.hasPositions()) {
            currentPositions[j] = nextPos;
        }
        if (curTerms.hasOffsets()) {
            currentStartOffset[j] = posEnum.startOffset();
            currentEndOffset[j] = posEnum.endOffset();
        }
        if (curTerms.hasPayloads()) {
            BytesRef curPayload = posEnum.getPayload();
            if (curPayload != null) {
                currentPayloads[j] = new BytesArray(curPayload.bytes, 0, curPayload.length);
            } else {
                currentPayloads[j] = null;
            }
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:21,代码来源:TermVectorsResponse.java

示例2: buildEntryValue

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private NamedList<Object> buildEntryValue(long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException {
  NamedList<Object> entry = new NamedList<>();
  entry.add("count", count);
  int i = -1;
  for (Entry<LeafReader, Bits> e : leaves) {
    PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS);
    Bits liveDocs = e.getValue();
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (!liveDocs.get(postings.docID())) {
        continue;
      }
      i++;
      NamedList<Object> documentEntry = new NamedList<>();
      entry.add("doc" + i, documentEntry);
      for (int j = 0; j < postings.freq(); j++) {
        postings.nextPosition();
        String extra = postings.getPayload().utf8ToString();
        documentEntry.add("position" + j, extra);
      }
    }
  }
  return entry;
}
 
开发者ID:upenn-libraries,项目名称:solrplugins,代码行数:24,代码来源:ProofOfConceptPayloadHandler.java

示例3: checkBrownFoxTermVector

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void checkBrownFoxTermVector(Fields fields, String fieldName, boolean withPayloads) throws IOException {
    String[] values = {"brown", "dog", "fox", "jumps", "lazy", "over", "quick", "the"};
    int[] freq = {1, 1, 1, 1, 1, 1, 1, 2};
    int[][] pos = {{2}, {8}, {3}, {4}, {7}, {5}, {1}, {0, 6}};
    int[][] startOffset = {{10}, {40}, {16}, {20}, {35}, {26}, {4}, {0, 31}};
    int[][] endOffset = {{15}, {43}, {19}, {25}, {39}, {30}, {9}, {3, 34}};

    Terms terms = fields.terms(fieldName);
    assertThat(terms.size(), equalTo(8L));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, notNullValue());
        // do not test ttf or doc frequency, because here we have many
        // shards and do not know how documents are distributed
        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            if (withPayloads) {
                assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
            }
        }
    }
    assertThat(iterator.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:40,代码来源:GetTermVectorsIT.java

示例4: getFeatures

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
static double[] getFeatures(IndexReader ir, String fieldName, BytesRef rawPhrase, int docId, int docSize, int numDocs, boolean inc)
    throws IOException {
  PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
  int ret = de.advance(docId);
  if(ret == PostingsEnum.NO_MORE_DOCS){
    throw new RuntimeException("no more docs...");
  }
  else{
    int freq = de.freq();
    if(freq < 2) return null;
    
    PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
    int ret2 = pe.advance(docId);
    if(ret2 == PostingsEnum.NO_MORE_DOCS){
      throw new RuntimeException("no more docs...");
    }
    else{
      double[] features = new double[2];
      int pos = pe.nextPosition();
      int docFreq = ir.docFreq(new Term(fieldName, rawPhrase));
      if(inc){
        docFreq++;
        numDocs++;
      }
      features[0] = Commons.calcTfIdf(freq, docSize, docFreq, numDocs);
      features[1] = Commons.calcFirstOccurrence(pos, docSize);
      
      return features;
    }
  }
}
 
开发者ID:kojisekig,项目名称:KEA-lucene,代码行数:32,代码来源:KeyphraseExtractor2.java

示例5: printFieldTermsWithInfo

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:37,代码来源:IndexUtils.java

示例6: printAnnotations

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
public static void printAnnotations(LeafReader reader, Term term) throws IOException {
    System.out.println("Annotations for " + term);
    final ByteArrayDataInput in = new ByteArrayDataInput();
    final PostingsEnum postings = reader.postings(term, PostingsEnum.PAYLOADS);
    for (int docID = postings.nextDoc(); docID != DocIdSetIterator.NO_MORE_DOCS; docID = postings.nextDoc()) {
        final int freq = postings.freq();
        System.out.println("  doc=" + docID + ", freq=" + freq);
        for (int i = 0; i < freq; i++) {
            postings.nextPosition();
            final BytesRef payload = postings.getPayload();
            in.reset(payload.bytes, payload.offset, payload.length);
            System.out.println("    start=" + in.readVInt() + ", length=" + in.readVInt());
        }
    }
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:16,代码来源:AnnotationsUtils.java

示例7: getTermVectorWithException

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private Map<Integer,String> getTermVectorWithException(String field, String id) throws IOException {
    TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id)
            .setOffsets(false).setPositions(true).setFieldStatistics(false)
            .setTermStatistics(false)
            .setSelectedFields(field).
                    execute().actionGet();

    Map<Integer,String> map = new HashMap<>();
    Terms terms = response.getFields().terms(field);
    if (terms==null){
        return map;
    }
    TermsEnum iterator = terms.iterator();
    PostingsEnum postings = null;
    
    for (BytesRef termBytes = null; (termBytes = iterator.next()) != null; ) {
    	String term = termBytes.utf8ToString();
    	
    	postings = iterator.postings(postings, PostingsEnum.ALL);
    	
    	//there can only be one doc since we are getting with id. get the doc and the position 
    	postings.nextDoc();
    	
    	int tf = postings.freq();
    	
    	for (int i = 0; i < tf; i++) {
    		int pos = postings.nextPosition();
            map.put(pos,term);
    	}
    	
    }
    
    return map;
}
 
开发者ID:cheng-li,项目名称:pyramid,代码行数:35,代码来源:ESIndex.java

示例8: collectTermOffsets

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException {
    List<MWESentenceContext> result = new ArrayList<>();

    TermsEnum tiRef= termVectorLookup.iterator();
    BytesRef luceneTerm = tiRef.next();
    while (luceneTerm != null) {
        if (luceneTerm.length == 0) {
            luceneTerm = tiRef.next();
            continue;
        }
        String tString = luceneTerm.utf8ToString();
        if(!allCandidates.contains(tString)) {
            luceneTerm=tiRef.next();
            continue;
        }


        PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
        //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);

        int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            int totalOccurrence = postingsEnum.freq();
            for (int i = 0; i < totalOccurrence; i++) {
                postingsEnum.nextPosition();
                int start = postingsEnum.startOffset();
                int end = postingsEnum.endOffset();
                BytesRef payload=postingsEnum.getPayload();
                int sentenceId=-1;
                if(payload!=null){
                    sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId();
                }
                result.add(new MWESentenceContext(tString,sentenceId, start, end));
            }
        }
        luceneTerm = tiRef.next();
    }
    Collections.sort(result);
    return result;
}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:41,代码来源:FrequencyCtxSentenceBasedFBWorker.java

示例9: executeNeedleTests

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void executeNeedleTests(Analyzer analyzer) throws Exception {

    String needle = getNeedle(analyzer);
    int numFieldValues = 23;

    Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues);

    IndexReader reader = DirectoryReader.open(directory);

    LeafReaderContext ctx = reader.leaves().get(0);
    LeafReader r = ctx.reader();

    PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL);
    int numTests = 0;
    try {
      while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int frq = dpe.freq();
        int advanced = 0;

        String[] fieldValues = r.document(dpe.docID()).getValues(FIELD);
        while (++advanced < frq) {
          dpe.nextPosition();
          String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(),
              dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | ");
          assertEquals(needle, rebuilt);
          numTests++;
        }
      }
    } finally {
      reader.close();
      directory.close();
    }
    assertEquals("number of tests", numFieldValues - 1, numTests);
  }
 
开发者ID:tballison,项目名称:lucene-addons,代码行数:35,代码来源:TestSimpleAnalyzerUtil.java

示例10: validateResponse

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<>(
            Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:69,代码来源:AbstractTermVectorsTestCase.java

示例11: checkWithoutFieldStatistics

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void checkWithoutFieldStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset,
        int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true)
            .setPositions(true).setTermStatistics(true).setFieldStatistics(false).setSelectedFields();
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) -1));
    assertThat(terms.getDocCount(), Matchers.equalTo(-1));
    assertThat(terms.getSumDocFreq(), equalTo((long) -1));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
            + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:56,代码来源:GetTermVectorsCheckDocFreqIT.java

示例12: checkWithoutTermStatistics

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void checkWithoutTermStatistics(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset,
        int i) throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true)
            .setPositions(true).setTermStatistics(false).setFieldStatistics(true).setSelectedFields();
    assertThat(resp.request().termStatistics(), equalTo(false));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());

        assertThat("expected ttf of " + string, -1, equalTo((int) iterator.totalTermFreq()));

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(-1));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, null);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
            + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));

}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:54,代码来源:GetTermVectorsCheckDocFreqIT.java

示例13: checkAllInfo

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void checkAllInfo(int numDocs, String[] values, int[] freq, int[][] pos, int[][] startOffset, int[][] endOffset, int i)
        throws IOException {
    TermVectorsRequestBuilder resp = client().prepareTermVectors("test", "type1", Integer.toString(i)).setPayloads(true).setOffsets(true)
            .setPositions(true).setFieldStatistics(true).setTermStatistics(true).setSelectedFields();
    assertThat(resp.request().fieldStatistics(), equalTo(true));
    TermVectorsResponse response = resp.execute().actionGet();
    assertThat("doc id: " + i + " doesn't exists but should", response.isExists(), equalTo(true));
    Fields fields = response.getFields();
    assertThat(fields.size(), equalTo(1));
    Terms terms = fields.terms("field");
    assertThat(terms.size(), equalTo(8L));
    assertThat(terms.getSumTotalTermFreq(), Matchers.equalTo((long) (9 * numDocs)));
    assertThat(terms.getDocCount(), Matchers.equalTo(numDocs));
    assertThat(terms.getSumDocFreq(), equalTo((long) numDocs * values.length));
    TermsEnum iterator = terms.iterator();
    for (int j = 0; j < values.length; j++) {
        String string = values[j];
        BytesRef next = iterator.next();
        assertThat(next, Matchers.notNullValue());
        assertThat("expected " + string, string, equalTo(next.utf8ToString()));
        assertThat(next, Matchers.notNullValue());
        if (string.equals("the")) {
            assertThat("expected ttf of " + string, numDocs * 2, equalTo((int) iterator.totalTermFreq()));
        } else {
            assertThat("expected ttf of " + string, numDocs, equalTo((int) iterator.totalTermFreq()));
        }

        PostingsEnum docsAndPositions = iterator.postings(null, PostingsEnum.ALL);
        assertThat(docsAndPositions.nextDoc(), equalTo(0));
        assertThat(freq[j], equalTo(docsAndPositions.freq()));
        assertThat(iterator.docFreq(), equalTo(numDocs));
        int[] termPos = pos[j];
        int[] termStartOffset = startOffset[j];
        int[] termEndOffset = endOffset[j];
        assertThat(termPos.length, equalTo(freq[j]));
        assertThat(termStartOffset.length, equalTo(freq[j]));
        assertThat(termEndOffset.length, equalTo(freq[j]));
        for (int k = 0; k < freq[j]; k++) {
            int nextPosition = docsAndPositions.nextPosition();
            assertThat("term: " + string, nextPosition, equalTo(termPos[k]));
            assertThat("term: " + string, docsAndPositions.startOffset(), equalTo(termStartOffset[k]));
            assertThat("term: " + string, docsAndPositions.endOffset(), equalTo(termEndOffset[k]));
            assertThat("term: " + string, docsAndPositions.getPayload(), equalTo(new BytesRef("word")));
        }
    }
    assertThat(iterator.next(), Matchers.nullValue());

    XContentBuilder xBuilder = XContentFactory.jsonBuilder();
    response.toXContent(xBuilder, ToXContent.EMPTY_PARAMS);
    String utf8 = xBuilder.bytes().utf8ToString().replaceFirst("\"took\":\\d+,", "");;
    String expectedString = "{\"_index\":\"test\",\"_type\":\"type1\",\"_id\":\""
            + i
            + "\",\"_version\":1,\"found\":true,\"term_vectors\":{\"field\":{\"field_statistics\":{\"sum_doc_freq\":120,\"doc_count\":15,\"sum_ttf\":135},\"terms\":{\"brown\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":2,\"start_offset\":10,\"end_offset\":15,\"payload\":\"d29yZA==\"}]},\"dog\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":8,\"start_offset\":40,\"end_offset\":43,\"payload\":\"d29yZA==\"}]},\"fox\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":3,\"start_offset\":16,\"end_offset\":19,\"payload\":\"d29yZA==\"}]},\"jumps\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":4,\"start_offset\":20,\"end_offset\":25,\"payload\":\"d29yZA==\"}]},\"lazy\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":7,\"start_offset\":35,\"end_offset\":39,\"payload\":\"d29yZA==\"}]},\"over\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":5,\"start_offset\":26,\"end_offset\":30,\"payload\":\"d29yZA==\"}]},\"quick\":{\"doc_freq\":15,\"ttf\":15,\"term_freq\":1,\"tokens\":[{\"position\":1,\"start_offset\":4,\"end_offset\":9,\"payload\":\"d29yZA==\"}]},\"the\":{\"doc_freq\":15,\"ttf\":30,\"term_freq\":2,\"tokens\":[{\"position\":0,\"start_offset\":0,\"end_offset\":3,\"payload\":\"d29yZA==\"},{\"position\":6,\"start_offset\":31,\"end_offset\":34,\"payload\":\"d29yZA==\"}]}}}}}";
    assertThat(utf8, equalTo(expectedString));
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:56,代码来源:GetTermVectorsCheckDocFreqIT.java

示例14: map

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
public void map(Terms terms,Spans spans) throws IOException {
	int primStart = spanStart - primaryWS;
    int primEnd = spanEnd + primaryWS;
    // stores the start and end of the adjacent previous and following
    int adjLBStart = primStart - adjWS;
    int adjLBEnd = primStart - 1;//don't overlap
    int adjUBStart = primEnd + 1;//don't overlap
    int adjUBEnd = primEnd + adjWS;
    //stores the start and end of the secondary previous and the secondary following
    int secLBStart = adjLBStart - secWS;
    int secLBEnd = adjLBStart - 1; //don't overlap the adjacent window
    int secUBStart = adjUBEnd + 1;
    int secUBEnd = adjUBEnd + secWS;
    WindowTerm lastWT = null;
    if(terms!=null)
    {}
	TermsEnum termsEnum = terms.iterator();
    BytesRef termref = null;
    String term=null;
    
    while ((termref = termsEnum.next()) != null) {
  	term=termsEnum.term().utf8ToString();
  	PostingsEnum postings = termsEnum.postings(null, PostingsEnum.PAYLOADS | PostingsEnum.OFFSETS);
  	postings.nextDoc();
  if (term.startsWith(NameFilter.NE_PREFIX) == false && term.startsWith(PassageRankingComponent.NE_PREFIX_LOWER) == false) {//filter out the types, as we don't need them here
    //construct the windows, which means we need a bunch of 
	//bracketing variables to know what window we are in
    //start and end of the primary window
      //unfortunately, we still have to loop over the positions
      //we'll make this inclusive of the boundaries, do an upfront check here so
      //we can skip over anything that is outside of all windows
    	//int position=spans.nextStartPosition();
    	int position=postings.nextPosition();
      if (position >= secLBStart && position <= secUBEnd) {
        //fill in the windows
        WindowTerm wt;
        //offsets aren't required, but they are nice to have
        
        if (postings != null){
        //log.warn("terms if postings!=null {}",term);
        wt = new WindowTerm(term, position, postings.startOffset(), postings.endOffset());
        } else {
          wt = new WindowTerm(term, position);
          //log.warn("terms if postings==null {}",term);
        }
        
        if (position >= primStart && position <= primEnd) {//are we in the primary window
          passage.terms.add(wt);
          //we are only going to keep bigrams for the primary window.  You could do it for the other windows, too
          if (lastWT != null) {
            WindowTerm bigramWT = new WindowTerm(lastWT.term + "," + term, lastWT.position);//we don't care about offsets for bigrams
            passage.bigrams.add(bigramWT);
          }
          lastWT = wt;
        } else if (position >= secLBStart && position <= secLBEnd) {
        	//are we in the secondary previous window?
          passage.secPrevTerms.add(wt);
        } else if (position >= secUBStart && position <= secUBEnd) {//are we in the secondary following window?
          passage.secFollowTerms.add(wt);
        } else if (position >= adjLBStart && position <= adjLBEnd) {//are we in the adjacent previous window?
          passage.prevTerms.add(wt);
        } else if (position >= adjUBStart && position <= adjUBEnd) {//are we in the adjacent following window?
          passage.followTerms.add(wt);
        }
      }
    //}
  }}
}
 
开发者ID:asmehra95,项目名称:wiseowl,代码行数:69,代码来源:WindowBuildingTVM.java

示例15: buildEntryValue

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private NamedList<Object> buildEntryValue(MultiPartString term, long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException {
  NamedList<Object> entry = new NamedList<>();

  // document count for this term
  entry.add(KEY_COUNT, count);

  NamedList<Object> self = new NamedList<>();
  entry.add(KEY_SELF, self);

  self.add(KEY_COUNT, 0L);
  overwriteInNamedList(self, KEY_FILING, term.getFiling());
  if(term.getPrefix() != null) {
    overwriteInNamedList(self, KEY_PREFIX, term.getPrefix());
  }

  NamedList<Object> refs = new NamedList<>();
  Set<BytesRef> trackDuplicates = new HashSet<>();

  for (Entry<LeafReader, Bits> e : leaves) {
    PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS);
    if (postings == null) {
      continue;
    }
    Bits liveDocs = e.getValue();
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (liveDocs != null && !liveDocs.get(postings.docID())) {
        continue;
      }
      trackDuplicates.clear();
      for (int j = 0; j < postings.freq(); j++) {
        postings.nextPosition();

        BytesRef payload = postings.getPayload();
        if (!trackDuplicates.add(payload)) {
          continue;
        }
        if (payload != null) {
          String payloadStr = payload.utf8ToString();
          int pos = payloadStr.indexOf(JsonReferencePayloadTokenizer.PAYLOAD_ATTR_SEPARATOR);
          if (pos != -1) {
            String referenceType = payloadStr.substring(0, pos);
            String target = payloadStr.substring(pos + 1);

            MultiPartString multiPartString = MultiPartString.parseFilingAndPrefix(target);
            String displayName = multiPartString.getDisplay();

            NamedList<Object> displayNameStructs = getOrCreateNamedListValue(refs, referenceType);

            NamedList<Object> nameStruct = getOrCreateNamedListValue(displayNameStructs, displayName);

            incrementLongInNamedList(nameStruct, KEY_COUNT);

            overwriteInNamedList(nameStruct, KEY_FILING, multiPartString.getFiling());
            if (multiPartString.getPrefix() != null) {
              overwriteInNamedList(nameStruct, KEY_PREFIX, multiPartString.getPrefix());
            }
          }
        } else {
          // no payload means term is for self, so increment count
          incrementLongInNamedList(self, KEY_COUNT);
        }

      // Couldn't get this to work: postings.attributes() doesn't return anything: why?
      /*
         ReferenceAttribute refAtt = postings.attributes().getAttribute(ReferenceAttribute.class);
         if(refAtt != null) {
         System.out.println("found refAttr, " + refAtt.getReferenceType() + "," + refAtt.getTarget());
         }
         */
      }
    }
  }

  if(refs.size() > 0) {
    entry.add(KEY_REFS, refs);
  }

  return entry;
}
 
开发者ID:upenn-libraries,项目名称:solrplugins,代码行数:80,代码来源:JsonReferencePayloadHandler.java


注:本文中的org.apache.lucene.index.PostingsEnum.nextPosition方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。