當前位置: 首頁>>代碼示例>>Java>>正文


Java PositionIndexReader類代碼示例

本文整理匯總了Java中org.lemurproject.galago.core.index.disk.PositionIndexReader的典型用法代碼示例。如果您正苦於以下問題:Java PositionIndexReader類的具體用法?Java PositionIndexReader怎麽用?Java PositionIndexReader使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。


PositionIndexReader類屬於org.lemurproject.galago.core.index.disk包,在下文中一共展示了PositionIndexReader類的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: testCountIterator

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testCountIterator() throws Exception {
  PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
  DiskCountIterator termCounts = reader.getTermCounts("b");

  ScoringContext sc = new ScoringContext();

  assertEquals(dataB[0][0], termCounts.currentCandidate());
  sc.document = termCounts.currentCandidate();
  assertEquals(dataB[0].length - 1, termCounts.count(sc));
  termCounts.movePast(dataB[0][0]);

  assertEquals(dataB[1][0], termCounts.currentCandidate());
  sc.document = termCounts.currentCandidate();
  assertEquals(dataB[1].length - 1, termCounts.count(sc));

  NodeStatistics b_stats = termCounts.getStatistics();
  assertEquals(2, b_stats.nodeDocumentCount);
  assertEquals(3, b_stats.nodeFrequency);

  reader.close();
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:23,代碼來源:PositionIndexReaderTest.java

示例2: run

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Override
public void run(Parameters argp) throws Exception {
  final boolean stop = argp.get("stop", true);
  final boolean ignoreDates = argp.get("ignoreDates", true);

  final DiskIndex index = new DiskIndex(argp.getString("index"));
  PositionIndexReader pir = (PositionIndexReader) index.getIndexPart("postings.krovetz");
  GalagoUtil.forEachKey(pir.getIterator(), new Operation<PositionIndexReader.KeyIterator>() {
	@Override
	public void process(PositionIndexReader.KeyIterator obj) {
		try {
			String term = obj.getKeyString();
			if (QueryUtil.keepTerm(term, stop, ignoreDates)) {
				NodeStatistics ns = obj.getValueCountSource().getStatistics();
				System.out.println(term + " " + ns.maximumCount + " " + ns.nodeDocumentCount + " " + ns.nodeFrequency);
			}
		} catch (IOException e) {
			throw new RuntimeException(e);
		}
	}
});

}
 
開發者ID:jjfiv,項目名稱:ecir2015timebooks,代碼行數:24,代碼來源:TermCounts.java

示例3: DistanceCalculator

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
public DistanceCalculator(TupleFlowParameters parameters) throws Exception {
  maxdistance = (double) parameters.getJSON().get("distance", 1.0F);
  String indexLocation = parameters.getJSON().getString("directory");
  String partName = parameters.getJSON().getString("part");
  isSymmetric = parameters.getJSON().get("symmetric", true);
  IndexPartReader partReader = new PositionIndexReader(DiskIndex.getPartPath(indexLocation, partName));
  counter = parameters.getCounter("pairs calculated");
  iterator = partReader.getIterator();
  String method = parameters.getJSON().get("method", "levenshtein");
  m = this.getClass().getMethod(method, String.class, String.class);
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:12,代碼來源:DistanceCalculator.java

示例4: testA

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testA() throws Exception {
  PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
  ExtentIterator termExtents = reader.getTermExtents("a");

  internalTestIterator(termExtents, dataA);
  NodeStatistics a_stats = ((NodeAggregateIterator) termExtents).getStatistics();
  assertEquals(2, a_stats.nodeDocumentCount);
  assertEquals(4, a_stats.nodeFrequency);
  reader.close();
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:12,代碼來源:PositionIndexReaderTest.java

示例5: testB

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testB() throws Exception {
  PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
  ExtentIterator termExtents = reader.getTermExtents("b");

  internalTestIterator(termExtents, dataB);
  NodeStatistics b_stats = ((NodeAggregateIterator) termExtents).getStatistics();
  assertEquals(2, b_stats.nodeDocumentCount);
  assertEquals(3, b_stats.nodeFrequency);
  reader.close();
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:12,代碼來源:PositionIndexReaderTest.java

示例6: testSkipLists

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testSkipLists() throws Exception {
  // internally fill the skip file
  Parameters p = Parameters.create();
  p.set("filename", skipPath.toString());
  p.set("skipping", true);
  p.set("skipDistance", 20);
  p.set("skipResetDistance", 5);

  PositionIndexWriter writer =
          new PositionIndexWriter(new org.lemurproject.galago.tupleflow.FakeParameters(p));

  writer.processWord(ByteUtil.fromString("a"));
  for (int docid = 1; docid < 5000; docid += 3) {
    writer.processDocument(docid);
    for (int pos = 1; pos < ((docid / 50) + 2); pos++) {
      writer.processPosition(pos);
    }
  }
  writer.close();

  // Now read it
  PositionIndexReader reader = new PositionIndexReader(skipPath.toString());
  DiskExtentIterator termExtents = reader.getTermExtents("a");
  ScoringContext sc = new ScoringContext();

  assertEquals("a", termExtents.getKeyString());

  // Read first identifier
  assertEquals(1, termExtents.currentCandidate());
  sc.document = termExtents.currentCandidate();
  assertEquals(1, termExtents.count(sc));

  termExtents.syncTo(7);
  assertTrue(termExtents.hasMatch(new ScoringContext(7)));

  // Now move to a doc, but not one we have
  termExtents.syncTo(90);
  assertFalse(termExtents.hasMatch(new ScoringContext(90)));

  // Now move forward one
  termExtents.movePast(93);
  assertEquals(94, termExtents.currentCandidate());
  sc.document = termExtents.currentCandidate();
  assertEquals(2, termExtents.count(sc));

  // One more time, then we read extents
  termExtents.movePast(2543);
  assertEquals(2545, termExtents.currentCandidate());
  sc.document = termExtents.currentCandidate();
  assertEquals(51, termExtents.count(sc));
  ExtentArray ea = termExtents.extents(sc);
  assertEquals(2545, ea.getDocument());
  assertEquals(51, ea.size());
  for (int i = 0; i < ea.size(); i++) {
    assertEquals(i + 1, ea.begin(i));
  }
  termExtents.syncTo(10005);
  assertFalse(termExtents.hasMatch(new ScoringContext(10005)));
  assertTrue(termExtents.isDone());

  skipPath.delete();
  skipPath = null;
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:65,代碼來源:PositionIndexReaderTest.java

示例7: testIndexStemming

import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testIndexStemming() throws Exception {
  File trecCorpusFile = null;
  File indexFile1 = null;
  File indexFile2 = null;

  try {
    // create a simple doc file, trec format:
    String trecCorpus = AppTest.trecDocument("1", text);
    trecCorpusFile = FileUtility.createTemporary();
    StreamUtil.copyStringToFile(trecCorpus, trecCorpusFile);

    // now, try to build an index from that
    indexFile1 = FileUtility.createTemporaryDirectory();
    App.main(new String[]{"build", "--indexPath=" + indexFile1.getAbsolutePath(),
              "--inputPath+" + trecCorpusFile.getAbsolutePath(),
              "--stemmerClass/Porter2Stemmer=org.lemurproject.galago.core.parse.stem.Porter2Stemmer"});

    // now, try to build an index from that
    indexFile2 = FileUtility.createTemporaryDirectory();
    App.main(new String[]{"build", "--indexPath=" + indexFile2.getAbsolutePath(),
              "--inputPath+" + trecCorpusFile.getAbsolutePath(),
              "--stemmerClass/KrovetzStemmer=org.lemurproject.galago.core.parse.stem.KrovetzStemmer"});

    // make sure the indexes exists
    assertTrue(indexFile1.exists());
    assertTrue(indexFile2.exists());

    // open stemmedPostings and compare lengths with postings.
    PositionIndexReader porterPart = (PositionIndexReader) DiskIndex.openIndexPart(indexFile1 + "/postings.Porter2Stemmer");
    PositionIndexReader krovetzPart = (PositionIndexReader) DiskIndex.openIndexPart(indexFile2 + "/postings.KrovetzStemmer");

    // ensure nodes can be found
    assert (porterPart.getIterator(new Node("counts", "warehouse")) != null);
    assert (krovetzPart.getIterator(new Node("counts", "warehouse")) != null);

    // ensure a second term works
    assertEquals(porterPart.getIterator(new Node("counts", "having")).getKeyString(), "have");
    assertEquals(krovetzPart.getIterator(new Node("counts", "having")).getKeyString(), "have");

  } finally {
    if (trecCorpusFile != null) {
      trecCorpusFile.delete();
    }
    if (indexFile1 != null) {
      FSUtil.deleteDirectory(indexFile1);
    }
    if (indexFile2 != null) {
      FSUtil.deleteDirectory(indexFile2);
    }
  }
}
 
開發者ID:teanalab,項目名稱:demidovii,代碼行數:53,代碼來源:KrovetzStemmerTest.java


注:本文中的org.lemurproject.galago.core.index.disk.PositionIndexReader類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。