当前位置: 首页>>代码示例>>Java>>正文


Java PostingsEnum.freq方法代码示例

本文整理汇总了Java中org.apache.lucene.index.PostingsEnum.freq方法的典型用法代码示例。如果您正苦于以下问题:Java PostingsEnum.freq方法的具体用法?Java PostingsEnum.freq怎么用?Java PostingsEnum.freq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.PostingsEnum的用法示例。


在下文中一共展示了PostingsEnum.freq方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildTerm

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:17,代码来源:TermVectorsResponse.java

示例2: buildEntryValue

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private NamedList<Object> buildEntryValue(long count, Term t, List<Entry<LeafReader, Bits>> leaves) throws IOException {
  NamedList<Object> entry = new NamedList<>();
  entry.add("count", count);
  int i = -1;
  for (Entry<LeafReader, Bits> e : leaves) {
    PostingsEnum postings = e.getKey().postings(t, PostingsEnum.PAYLOADS);
    Bits liveDocs = e.getValue();
    while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      if (!liveDocs.get(postings.docID())) {
        continue;
      }
      i++;
      NamedList<Object> documentEntry = new NamedList<>();
      entry.add("doc" + i, documentEntry);
      for (int j = 0; j < postings.freq(); j++) {
        postings.nextPosition();
        String extra = postings.getPayload().utf8ToString();
        documentEntry.add("position" + j, extra);
      }
    }
  }
  return entry;
}
 
开发者ID:upenn-libraries,项目名称:solrplugins,代码行数:24,代码来源:ProofOfConceptPayloadHandler.java

示例3: getEntropy

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
/**
 * Gets the 1 - entropy (i.e. 1+ plogp) of a term,
 * a function that favors terms that are focally distributed
 * We use the definition of log-entropy weighting provided in
 * Martin and Berry (2007):
 * Entropy = 1 + sum ((Pij log2(Pij)) /  log2(n))
 * where Pij = frequency of term i in doc j / global frequency of term i
 * 		 n	 = number of documents in collection
 * @param term whose entropy you want
 * Thanks to Vidya Vasuki for adding the hash table to
 * eliminate redundant calculation
 */
private float getEntropy(Term term) {
  if (termEntropy.containsKey(term))
    return termEntropy.get(term);
  int gf = getGlobalTermFreq(term);
  double entropy = 0;
  try {
    PostingsEnum docsEnum = this.getDocsForTerm(term);
    while ((docsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
      double p = docsEnum.freq(); //frequency in this document
      p = p / gf;    //frequency across all documents
      entropy += p * (Math.log(p) / Math.log(2)); //sum of Plog(P)
    }
    int n = this.getNumDocs();
    double log2n = Math.log(n) / Math.log(2);
    entropy = entropy / log2n;
  } catch (IOException e) {
    logger.info("Couldn't get term entropy for term " + term.text());
  }
  termEntropy.put(term, 1 + (float) entropy);
  return (float) (1 + entropy);
}
 
开发者ID:semanticvectors,项目名称:semanticvectors,代码行数:34,代码来源:LuceneUtils.java

示例4: addTermFrequencies

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 */
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while(docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:39,代码来源:XMoreLikeThis.java

示例5: compareTermVectors

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void compareTermVectors(String fieldName, Fields fields0, Fields fields1) throws IOException {
    Terms terms0 = fields0.terms(fieldName);
    Terms terms1 = fields1.terms(fieldName);
    assertThat(terms0, notNullValue());
    assertThat(terms1, notNullValue());
    assertThat(terms0.size(), equalTo(terms1.size()));

    TermsEnum iter0 = terms0.iterator();
    TermsEnum iter1 = terms1.iterator();
    for (int i = 0; i < terms0.size(); i++) {
        BytesRef next0 = iter0.next();
        assertThat(next0, notNullValue());
        BytesRef next1 = iter1.next();
        assertThat(next1, notNullValue());

        // compare field value
        String string0 = next0.utf8ToString();
        String string1 = next1.utf8ToString();
        assertThat("expected: " + string0, string0, equalTo(string1));

        // compare df and ttf
        assertThat("term: " + string0, iter0.docFreq(), equalTo(iter1.docFreq()));
        assertThat("term: " + string0, iter0.totalTermFreq(), equalTo(iter1.totalTermFreq()));

        // compare freq and docs
        PostingsEnum docsAndPositions0 = iter0.postings(null, PostingsEnum.ALL);
        PostingsEnum docsAndPositions1 = iter1.postings(null, PostingsEnum.ALL);
        assertThat("term: " + string0, docsAndPositions0.nextDoc(), equalTo(docsAndPositions1.nextDoc()));
        assertThat("term: " + string0, docsAndPositions0.freq(), equalTo(docsAndPositions1.freq()));

        // compare position, start offsets and end offsets
        for (int j = 0; j < docsAndPositions0.freq(); j++) {
            assertThat("term: " + string0, docsAndPositions0.nextPosition(), equalTo(docsAndPositions1.nextPosition()));
            assertThat("term: " + string0, docsAndPositions0.startOffset(), equalTo(docsAndPositions1.startOffset()));
            assertThat("term: " + string0, docsAndPositions0.endOffset(), equalTo(docsAndPositions1.endOffset()));
        }
    }
    assertThat(iter0.next(), nullValue());
    assertThat(iter1.next(), nullValue());
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:41,代码来源:GetTermVectorsIT.java

示例6: getFeatures

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
static double[] getFeatures(IndexReader ir, String fieldName, BytesRef rawPhrase, int docId, int docSize, int numDocs, boolean inc)
    throws IOException {
  PostingsEnum de = MultiFields.getTermDocsEnum(ir, fieldName, rawPhrase);
  int ret = de.advance(docId);
  if(ret == PostingsEnum.NO_MORE_DOCS){
    throw new RuntimeException("no more docs...");
  }
  else{
    int freq = de.freq();
    if(freq < 2) return null;
    
    PostingsEnum pe = MultiFields.getTermPositionsEnum(ir, fieldName, rawPhrase);
    int ret2 = pe.advance(docId);
    if(ret2 == PostingsEnum.NO_MORE_DOCS){
      throw new RuntimeException("no more docs...");
    }
    else{
      double[] features = new double[2];
      int pos = pe.nextPosition();
      int docFreq = ir.docFreq(new Term(fieldName, rawPhrase));
      if(inc){
        docFreq++;
        numDocs++;
      }
      features[0] = Commons.calcTfIdf(freq, docSize, docFreq, numDocs);
      features[1] = Commons.calcFirstOccurrence(pos, docSize);
      
      return features;
    }
  }
}
 
开发者ID:kojisekig,项目名称:KEA-lucene,代码行数:32,代码来源:KeyphraseExtractor2.java

示例7: printFieldTermsWithInfo

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
/** Prints the terms indexed under the given fields with full postings information. */
public static void printFieldTermsWithInfo(LeafReader reader, String... fields) throws IOException {
    for (final String field : fields) {
        System.out.println(format("Terms for field [%s], with positional info:", field));
        final TermsEnum te = reader.terms(field).iterator();
        BytesRef scratch;
        PostingsEnum postings = null;
        while ((scratch = te.next()) != null) {
            System.out.println(format("  %s", scratch.utf8ToString()));
            postings = te.postings(postings, PostingsEnum.ALL);
            for (postings.nextDoc(); postings.docID() != DocIdSetIterator.NO_MORE_DOCS; postings.nextDoc()) {
                final Map<Integer, BytesRef> positions = Maps.newTreeMap();
                boolean addedPayload = false;
                for (int i = 0; i < postings.freq(); i++) {
                    final int pos = postings.nextPosition();
                    final BytesRef payload = postings.getPayload();
                    if (payload != null) {
                        positions.put(pos, BytesRef.deepCopyOf(payload));
                        addedPayload = true;
                    } else {
                        positions.put(pos, null);
                    }
                }
                if (addedPayload) {
                    System.out.println(format("    doc=%d, freq=%d", postings.docID(), postings.freq(), positions));
                    for (final Entry<Integer, BytesRef> e : positions.entrySet()) {
                        System.out.println(format("      pos=%d, payload=%s", e.getKey(), e.getValue()));
                    }
                } else {
                    System.out.println(format("    doc=%d, freq=%d, pos=%s", postings.docID(), postings.freq(),
                            positions.keySet()));
                }
            }
        }
    }
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:37,代码来源:IndexUtils.java

示例8: printAnnotations

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
public static void printAnnotations(LeafReader reader, Term term) throws IOException {
    System.out.println("Annotations for " + term);
    final ByteArrayDataInput in = new ByteArrayDataInput();
    final PostingsEnum postings = reader.postings(term, PostingsEnum.PAYLOADS);
    for (int docID = postings.nextDoc(); docID != DocIdSetIterator.NO_MORE_DOCS; docID = postings.nextDoc()) {
        final int freq = postings.freq();
        System.out.println("  doc=" + docID + ", freq=" + freq);
        for (int i = 0; i < freq; i++) {
            postings.nextPosition();
            final BytesRef payload = postings.getPayload();
            in.reset(payload.bytes, payload.offset, payload.length);
            System.out.println("    start=" + in.readVInt() + ", length=" + in.readVInt());
        }
    }
}
 
开发者ID:shaie,项目名称:lucenelab,代码行数:16,代码来源:AnnotationsUtils.java

示例9: getTermVectorWithException

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private Map<Integer,String> getTermVectorWithException(String field, String id) throws IOException {
    TermVectorsResponse response = client.prepareTermVector(indexName, documentType, id)
            .setOffsets(false).setPositions(true).setFieldStatistics(false)
            .setTermStatistics(false)
            .setSelectedFields(field).
                    execute().actionGet();

    Map<Integer,String> map = new HashMap<>();
    Terms terms = response.getFields().terms(field);
    if (terms==null){
        return map;
    }
    TermsEnum iterator = terms.iterator();
    PostingsEnum postings = null;
    
    for (BytesRef termBytes = null; (termBytes = iterator.next()) != null; ) {
    	String term = termBytes.utf8ToString();
    	
    	postings = iterator.postings(postings, PostingsEnum.ALL);
    	
    	//there can only be one doc since we are getting with id. get the doc and the position 
    	postings.nextDoc();
    	
    	int tf = postings.freq();
    	
    	for (int i = 0; i < tf; i++) {
    		int pos = postings.nextPosition();
            map.put(pos,term);
    	}
    	
    }
    
    return map;
}
 
开发者ID:cheng-li,项目名称:pyramid,代码行数:35,代码来源:ESIndex.java

示例10: collectTermOffsets

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private List<MWESentenceContext> collectTermOffsets(Terms termVectorLookup) throws IOException {
    List<MWESentenceContext> result = new ArrayList<>();

    TermsEnum tiRef= termVectorLookup.iterator();
    BytesRef luceneTerm = tiRef.next();
    while (luceneTerm != null) {
        if (luceneTerm.length == 0) {
            luceneTerm = tiRef.next();
            continue;
        }
        String tString = luceneTerm.utf8ToString();
        if(!allCandidates.contains(tString)) {
            luceneTerm=tiRef.next();
            continue;
        }


        PostingsEnum postingsEnum = tiRef.postings(null, PostingsEnum.ALL);
        //PostingsEnum postingsEnum = ti.postings(null, PostingsEnum.OFFSETS);

        int doc = postingsEnum.nextDoc(); //this should be just 1 doc, i.e., the constraint for getting this TV
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            int totalOccurrence = postingsEnum.freq();
            for (int i = 0; i < totalOccurrence; i++) {
                postingsEnum.nextPosition();
                int start = postingsEnum.startOffset();
                int end = postingsEnum.endOffset();
                BytesRef payload=postingsEnum.getPayload();
                int sentenceId=-1;
                if(payload!=null){
                    sentenceId=new SentenceContext(MWEMetadata.deserialize(payload.utf8ToString())).getSentenceId();
                }
                result.add(new MWESentenceContext(tString,sentenceId, start, end));
            }
        }
        luceneTerm = tiRef.next();
    }
    Collections.sort(result);
    return result;
}
 
开发者ID:ziqizhang,项目名称:jate,代码行数:41,代码来源:FrequencyCtxSentenceBasedFBWorker.java

示例11: executeNeedleTests

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private void executeNeedleTests(Analyzer analyzer) throws Exception {

    String needle = getNeedle(analyzer);
    int numFieldValues = 23;

    Directory directory = buildNeedleIndex(needle, analyzer, numFieldValues);

    IndexReader reader = DirectoryReader.open(directory);

    LeafReaderContext ctx = reader.leaves().get(0);
    LeafReader r = ctx.reader();

    PostingsEnum dpe = r.postings(new Term(FIELD, needle), PostingsEnum.ALL);
    int numTests = 0;
    try {
      while (dpe.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        int frq = dpe.freq();
        int advanced = 0;

        String[] fieldValues = r.document(dpe.docID()).getValues(FIELD);
        while (++advanced < frq) {
          dpe.nextPosition();
          String rebuilt = SimpleAnalyzerUtil.substringFromMultiValuedFields(dpe.startOffset(),
              dpe.endOffset(), fieldValues, analyzer.getOffsetGap(FIELD), " | ");
          assertEquals(needle, rebuilt);
          numTests++;
        }
      }
    } finally {
      reader.close();
      directory.close();
    }
    assertEquals("number of tests", numFieldValues - 1, numTests);
  }
 
开发者ID:tballison,项目名称:lucene-addons,代码行数:35,代码来源:TestSimpleAnalyzerUtil.java

示例12: seekExact

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
@Override
public boolean seekExact(BytesRef text) throws IOException {
    int docFreq = 0;
    long totalTermFreq = 0;
    for (Holder anEnum : enums) {
        if (anEnum.termsEnum.seekExact(text)) {
            if (anEnum.bits == null) {
                docFreq += anEnum.termsEnum.docFreq();
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    long leafTotalTermFreq = anEnum.termsEnum.totalTermFreq();
                    if (totalTermFreq == -1 || leafTotalTermFreq == -1) {
                        totalTermFreq = -1;
                        continue;
                    }
                    totalTermFreq += leafTotalTermFreq;
                }
            } else {
                final PostingsEnum docsEnum = anEnum.docsEnum = anEnum.termsEnum.postings(anEnum.docsEnum, docsEnumFlag);
                // 2 choices for performing same heavy loop - one attempts to calculate totalTermFreq and other does not
                if (docsEnumFlag == PostingsEnum.FREQS) {
                    for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        docFreq++;
                        // docsEnum.freq() returns 1 if doc indexed with IndexOptions.DOCS_ONLY so no way of knowing if value
                        // is really 1 or unrecorded when filtering like this
                        totalTermFreq += docsEnum.freq();
                    }
                } else {
                    for (int docId = docsEnum.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = docsEnum.nextDoc()) {
                        if (anEnum.bits != null && anEnum.bits.get(docId) == false) {
                            continue;
                        }
                        // docsEnum.freq() behaviour is undefined if docsEnumFlag==PostingsEnum.FLAG_NONE so don't bother with call
                        docFreq++;
                    }
                }
            }
        }
    }
    if (docFreq > 0) {
        currentDocFreq = docFreq;
        currentTotalTermFreq = totalTermFreq;
        current = text;
        return true;
    } else {
        currentDocFreq = NOT_FOUND;
        currentTotalTermFreq = NOT_FOUND;
        current = null;
        return false;
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:54,代码来源:FilterableTermsEnum.java

示例13: getTermFreq

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
private int getTermFreq(TermsEnum termsEnum, PostingsEnum docsEnum) throws IOException {
    docsEnum = termsEnum.postings(docsEnum);
    docsEnum.nextDoc();
    return docsEnum.freq();
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:6,代码来源:TermVectorsFilter.java

示例14: validateResponse

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFields, TestConfig testConfig) throws IOException {
    assertThat(esResponse.getIndex(), equalTo(testConfig.doc.index));
    TestDoc testDoc = testConfig.doc;
    HashSet<String> selectedFields = testConfig.selectedFields == null ? null : new HashSet<>(
            Arrays.asList(testConfig.selectedFields));
    Fields esTermVectorFields = esResponse.getFields();
    for (TestFieldSetting field : testDoc.fieldSettings) {
        Terms esTerms = esTermVectorFields.terms(field.name);
        if (selectedFields != null && !selectedFields.contains(field.name)) {
            assertNull(esTerms);
            continue;
        }

        assertNotNull(esTerms);

        Terms luceneTerms = luceneFields.terms(field.name);
        TermsEnum esTermEnum = esTerms.iterator();
        TermsEnum luceneTermEnum = luceneTerms.iterator();

        while (esTermEnum.next() != null) {
            assertNotNull(luceneTermEnum.next());

            assertThat(esTermEnum.totalTermFreq(), equalTo(luceneTermEnum.totalTermFreq()));
            PostingsEnum esDocsPosEnum = esTermEnum.postings(null, PostingsEnum.POSITIONS);
            PostingsEnum luceneDocsPosEnum = luceneTermEnum.postings(null, PostingsEnum.POSITIONS);
            if (luceneDocsPosEnum == null) {
                // test we expect that...
                assertFalse(field.storedOffset);
                assertFalse(field.storedPayloads);
                assertFalse(field.storedPositions);
                continue;
            }

            String currentTerm = esTermEnum.term().utf8ToString();

            assertThat("Token mismatch for field: " + field.name, currentTerm, equalTo(luceneTermEnum.term().utf8ToString()));

            esDocsPosEnum.nextDoc();
            luceneDocsPosEnum.nextDoc();

            int freq = esDocsPosEnum.freq();
            assertThat(freq, equalTo(luceneDocsPosEnum.freq()));
            for (int i = 0; i < freq; i++) {
                String failDesc = " (field:" + field.name + " term:" + currentTerm + ")";
                int lucenePos = luceneDocsPosEnum.nextPosition();
                int esPos = esDocsPosEnum.nextPosition();
                if (field.storedPositions && testConfig.requestPositions) {
                    assertThat("Position test failed" + failDesc, lucenePos, equalTo(esPos));
                } else {
                    assertThat("Missing position test failed" + failDesc, esPos, equalTo(-1));
                }
                if (field.storedOffset && testConfig.requestOffsets) {
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.startOffset(), equalTo(esDocsPosEnum.startOffset()));
                    assertThat("Offset test failed" + failDesc, luceneDocsPosEnum.endOffset(), equalTo(esDocsPosEnum.endOffset()));
                } else {
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
                    assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
                }
                if (field.storedPayloads && testConfig.requestPayloads) {
                    assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
                } else {
                    assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null));
                }
            }
        }
        assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
    }
}
 
开发者ID:justor,项目名称:elasticsearch_my,代码行数:69,代码来源:AbstractTermVectorsTestCase.java

示例15: load

import org.apache.lucene.index.PostingsEnum; //导入方法依赖的package包/类
public void load(String filename) throws Exception {
    BufferedReader in = new BufferedReader(new FileReader(filename));
    String line = null;
    QueryParser qps = new QueryParser(FreebaseTools.FIELD_NAME_TEXT, tools.getIndexAnalyzer());
    IndexSearcher searcher = tools.getIndexSearcher();
    IndexReader reader = tools.getIndexReader();

    while ((line = in.readLine()) != null) {
        String[] fields = line.split("\t");
        System.out.println("[Query: " + fields[0] + "] [KBid: " + fields[1] + "] [type: " + fields[2] + "]");

        try {
            // execute a Lucene query for the entity, get back 10 docs
            Query q = qps.parse(fields[0]);
            TopDocs results = searcher.search(q, 10);
            ScoreDoc[] hits = results.scoreDocs;
            boolean found = false;
            long Ndocs = reader.numDocs();

            for (ScoreDoc sd : hits) {
                //   - if d is the relevant doc, then found=true, this one's relevant.
                boolean rel = false;
                Document d = tools.getDocumentInMode(sd.doc);
                String kbid = d.get("subject");
                if (kbid.equals(fields[1])) {
                    found = true;
                    rel = true;
                }
                //   - get its termvector
                Fields docfields = reader.getTermVectors(sd.doc);
                //   - make it into what jforests wants
                for (String f : docfields) {
                    TermsEnum t = docfields.terms(f).iterator();
                    BytesRef tstring;
                    while ((tstring = t.next()) != null) {
                        PostingsEnum pe = t.postings(null);
                        int i;
                        int df = t.docFreq();
                        while ((i = pe.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
                            int freq = pe.freq();
                            double idf = Math.log(Ndocs / df);
                            double tf = 1 + Math.log(freq);
                            double tfidf = tf * idf;
                            // and that's the weight.
                        }
                    }
                }

            }

        } catch (ParseException e) {
            e.printStackTrace();
        }
    }
}
 
开发者ID:isoboroff,项目名称:basekb-search,代码行数:56,代码来源:LoadTrainingData.java


注:本文中的org.apache.lucene.index.PostingsEnum.freq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。