当前位置: 首页>>代码示例>>Java>>正文


Java DocsEnum.freq方法代码示例

本文整理汇总了Java中org.apache.lucene.index.DocsEnum.freq方法的典型用法代码示例。如果您正苦于以下问题:Java DocsEnum.freq方法的具体用法?Java DocsEnum.freq怎么用?Java DocsEnum.freq使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.lucene.index.DocsEnum的用法示例。


在下文中一共展示了DocsEnum.freq方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getTotalTermFreq

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
public static long getTotalTermFreq(IndexReader reader, Term term) throws Exception {   
  long totalTF = 0L;
  for (final AtomicReaderContext ctx : reader.leaves()) {
    AtomicReader r = ctx.reader();
    if (!r.hasDeletions()) {
      // TODO: we could do this up front, during the scan
      // (next()), instead of after-the-fact here w/ seek,
      // if the codec supports it and there are no del
      // docs...
      final long totTF = r.totalTermFreq(term);
      if (totTF != -1) {
        totalTF += totTF;
        continue;
      } // otherwise we fall-through
    }
    // note: what should we do if field omits freqs? currently it counts as 1...
    DocsEnum de = r.termDocsEnum(term);
    if (de != null) {
      while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS)
        totalTF += de.freq();
    }
  }
  
  return totalTF;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:26,代码来源:HighFreqTerms.java

示例2: addToWordcloud

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
private void addToWordcloud(ArrayList<String> ids2) throws IOException {
MultiTermVectorsRequestBuilder mtr = client.prepareMultiTermVectors();
for (String id : ids2) {
	mtr.add(newTermVectorRequest().id(id));
}
MultiTermVectorsResponse r = mtr.execute().actionGet();

TermsEnum e = null;
DocsEnum docsenum = null;

for (MultiTermVectorsItemResponse a : r) {
	TermVectorResponse t = a.getResponse();
	Fields fields = t.getFields();
	for (String f : fields) {
		Terms terms = fields.terms(f);
		TermsEnum it = terms.iterator(e);
		while (it.next() != null) {
			String term = it.term().utf8ToString();
			if (term.length() < 2) {continue;}
			DocsEnum docsit = it.docs(new Bits.MatchAllBits(ids2.size()), docsenum);
			int freq = docsit.freq();
			
            WordData data = words.get(term);
            if (data == null)
                words.put(term, new WordData(term, freq));
            else
                data.count += freq;
		}
	}
}
  }
 
开发者ID:NLeSC,项目名称:benchmarking-elasticsearch,代码行数:32,代码来源:queries.java

示例3: addSingle

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
private void addSingle(String id) throws IOException {
  	TermVectorRequestBuilder tr = client.prepareTermVector("kb", "doc", id)
  							.setSelectedFields("text_content", "article_dc_title")
			            .setPositions(false)
			            .setOffsets(false)
			            .setPayloads(false)
			            .setFieldStatistics(false)
			            .setTermStatistics(false);
  	TermVectorResponse t = tr.execute().actionGet();

TermsEnum e = null;
DocsEnum docsenum = null;

  	Fields fields = t.getFields();
for (String f : fields) {
	Terms terms = fields.terms(f);
	TermsEnum it = terms.iterator(e);
	while (it.next() != null) {
		String term = it.term().utf8ToString();
		if (term.length() < 2) {continue;}
		DocsEnum docsit = it.docs(new Bits.MatchAllBits(1), docsenum);
		int freq = docsit.freq();
		
           WordData data = words.get(term);
           if (data == null)
               words.put(term, new WordData(term, freq));
           else
               data.count += freq;
	}
}
  	
  }
 
开发者ID:NLeSC,项目名称:benchmarking-elasticsearch,代码行数:33,代码来源:queries.java

示例4: testReadTokens

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
/**
 * Test ReadTokensTask
 */
public void testReadTokens() throws Exception {

  // We will call ReadTokens on this many docs
  final int NUM_DOCS = 20;

  // Read tokens from first NUM_DOCS docs from Reuters and
  // then build index from the same docs
  String algLines1[] = {
    "# ----- properties ",
    "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
    "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
    "docs.file=" + getReuters20LinesFile(),
    "# ----- alg ",
    "{ReadTokens}: " + NUM_DOCS,
    "ResetSystemErase",
    "CreateIndex",
    "{AddDoc}: " + NUM_DOCS,
    "CloseIndex",
  };

  // Run algo
  Benchmark benchmark = execBenchmark(algLines1);

  List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();

  // Count how many tokens all ReadTokens saw
  int totalTokenCount1 = 0;
  for (final TaskStats stat : stats) {
    if (stat.getTask().getName().equals("ReadTokens")) {
      totalTokenCount1 += stat.getCount();
    }
  }

  // Separately count how many tokens are actually in the index:
  IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
  assertEquals(NUM_DOCS, reader.numDocs());

  int totalTokenCount2 = 0;

  Fields fields = MultiFields.getFields(reader);

  for (String fieldName : fields) {
    if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
      continue;
    }
    Terms terms = fields.terms(fieldName);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    DocsEnum docs = null;
    while(termsEnum.next() != null) {
      docs = TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
      while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        totalTokenCount2 += docs.freq();
      }
    }
  }
  reader.close();

  // Make sure they are the same
  assertEquals(totalTokenCount1, totalTokenCount2);
}
 
开发者ID:europeana,项目名称:search,代码行数:67,代码来源:TestPerfTasksLogic.java

示例5: SortingDocsEnum

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
SortingDocsEnum(int maxDoc, SortingDocsEnum reuse, final DocsEnum in, boolean withFreqs, final Sorter.DocMap docMap) throws IOException {
  super(in);
  this.maxDoc = maxDoc;
  this.withFreqs = withFreqs;
  if (reuse != null) {
    if (reuse.maxDoc == maxDoc) {
      sorter = reuse.sorter;
    } else {
      sorter = new DocFreqSorter(maxDoc);
    }
    docs = reuse.docs;
    freqs = reuse.freqs; // maybe null
  } else {
    docs = new int[64];
    sorter = new DocFreqSorter(maxDoc);
  }
  docIt = -1;
  int i = 0;
  int doc;
  if (withFreqs) {
    if (freqs == null || freqs.length < docs.length) {
      freqs = new int[docs.length];
    }
    while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
      if (i >= docs.length) {
        docs = ArrayUtil.grow(docs, docs.length + 1);
        freqs = ArrayUtil.grow(freqs, freqs.length + 1);
      }
      docs[i] = docMap.oldToNew(doc);
      freqs[i] = in.freq();
      ++i;
    }
  } else {
    freqs = null;
    while ((doc = in.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
      if (i >= docs.length) {
        docs = ArrayUtil.grow(docs, docs.length + 1);
      }
      docs[i++] = docMap.oldToNew(doc);
    }
  }
  // TimSort can save much time compared to other sorts in case of
  // reverse sorting, or when sorting a concatenation of sorted readers
  sorter.reset(docs, freqs);
  sorter.sort(0, i);
  upto = i;
}
 
开发者ID:europeana,项目名称:search,代码行数:48,代码来源:SortingAtomicReader.java

示例6: testReadTokens

import org.apache.lucene.index.DocsEnum; //导入方法依赖的package包/类
/**
 * Test ReadTokensTask
 */
public void testReadTokens() throws Exception {

  // We will call ReadTokens on this many docs
  final int NUM_DOCS = 20;

  // Read tokens from first NUM_DOCS docs from Reuters and
  // then build index from the same docs
  String algLines1[] = {
    "# ----- properties ",
    "analyzer=org.apache.lucene.analysis.core.WhitespaceAnalyzer",
    "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
    "docs.file=" + getReuters20LinesFile(),
    "# ----- alg ",
    "{ReadTokens}: " + NUM_DOCS,
    "ResetSystemErase",
    "CreateIndex",
    "{AddDoc}: " + NUM_DOCS,
    "CloseIndex",
  };

  // Run algo
  Benchmark benchmark = execBenchmark(algLines1);

  List<TaskStats> stats = benchmark.getRunData().getPoints().taskStats();

  // Count how many tokens all ReadTokens saw
  int totalTokenCount1 = 0;
  for (final TaskStats stat : stats) {
    if (stat.getTask().getName().equals("ReadTokens")) {
      totalTokenCount1 += stat.getCount();
    }
  }

  // Separately count how many tokens are actually in the index:
  IndexReader reader = DirectoryReader.open(benchmark.getRunData().getDirectory());
  assertEquals(NUM_DOCS, reader.numDocs());

  int totalTokenCount2 = 0;

  Fields fields = MultiFields.getFields(reader);

  for (String fieldName : fields) {
    if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
      continue;
    }
    Terms terms = fields.terms(fieldName);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    DocsEnum docs = null;
    while(termsEnum.next() != null) {
      docs = _TestUtil.docs(random(), termsEnum, MultiFields.getLiveDocs(reader), docs, DocsEnum.FLAG_FREQS);
      while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        totalTokenCount2 += docs.freq();
      }
    }
  }
  reader.close();

  // Make sure they are the same
  assertEquals(totalTokenCount1, totalTokenCount2);
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:67,代码来源:TestPerfTasksLogic.java


注:本文中的org.apache.lucene.index.DocsEnum.freq方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。