本文整理匯總了Java中org.lemurproject.galago.core.index.disk.PositionIndexReader類的典型用法代碼示例。如果您正苦於以下問題:Java PositionIndexReader類的具體用法?Java PositionIndexReader怎麽用?Java PositionIndexReader使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
PositionIndexReader類屬於org.lemurproject.galago.core.index.disk包,在下文中一共展示了PositionIndexReader類的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: testCountIterator
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testCountIterator() throws Exception {
PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
DiskCountIterator termCounts = reader.getTermCounts("b");
ScoringContext sc = new ScoringContext();
assertEquals(dataB[0][0], termCounts.currentCandidate());
sc.document = termCounts.currentCandidate();
assertEquals(dataB[0].length - 1, termCounts.count(sc));
termCounts.movePast(dataB[0][0]);
assertEquals(dataB[1][0], termCounts.currentCandidate());
sc.document = termCounts.currentCandidate();
assertEquals(dataB[1].length - 1, termCounts.count(sc));
NodeStatistics b_stats = termCounts.getStatistics();
assertEquals(2, b_stats.nodeDocumentCount);
assertEquals(3, b_stats.nodeFrequency);
reader.close();
}
示例2: run
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Override
public void run(Parameters argp) throws Exception {
final boolean stop = argp.get("stop", true);
final boolean ignoreDates = argp.get("ignoreDates", true);
final DiskIndex index = new DiskIndex(argp.getString("index"));
PositionIndexReader pir = (PositionIndexReader) index.getIndexPart("postings.krovetz");
GalagoUtil.forEachKey(pir.getIterator(), new Operation<PositionIndexReader.KeyIterator>() {
@Override
public void process(PositionIndexReader.KeyIterator obj) {
try {
String term = obj.getKeyString();
if (QueryUtil.keepTerm(term, stop, ignoreDates)) {
NodeStatistics ns = obj.getValueCountSource().getStatistics();
System.out.println(term + " " + ns.maximumCount + " " + ns.nodeDocumentCount + " " + ns.nodeFrequency);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
示例3: DistanceCalculator
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
public DistanceCalculator(TupleFlowParameters parameters) throws Exception {
maxdistance = (double) parameters.getJSON().get("distance", 1.0F);
String indexLocation = parameters.getJSON().getString("directory");
String partName = parameters.getJSON().getString("part");
isSymmetric = parameters.getJSON().get("symmetric", true);
IndexPartReader partReader = new PositionIndexReader(DiskIndex.getPartPath(indexLocation, partName));
counter = parameters.getCounter("pairs calculated");
iterator = partReader.getIterator();
String method = parameters.getJSON().get("method", "levenshtein");
m = this.getClass().getMethod(method, String.class, String.class);
}
示例4: testA
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testA() throws Exception {
PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
ExtentIterator termExtents = reader.getTermExtents("a");
internalTestIterator(termExtents, dataA);
NodeStatistics a_stats = ((NodeAggregateIterator) termExtents).getStatistics();
assertEquals(2, a_stats.nodeDocumentCount);
assertEquals(4, a_stats.nodeFrequency);
reader.close();
}
示例5: testB
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testB() throws Exception {
PositionIndexReader reader = new PositionIndexReader(tempPath.toString());
ExtentIterator termExtents = reader.getTermExtents("b");
internalTestIterator(termExtents, dataB);
NodeStatistics b_stats = ((NodeAggregateIterator) termExtents).getStatistics();
assertEquals(2, b_stats.nodeDocumentCount);
assertEquals(3, b_stats.nodeFrequency);
reader.close();
}
示例6: testSkipLists
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testSkipLists() throws Exception {
// internally fill the skip file
Parameters p = Parameters.create();
p.set("filename", skipPath.toString());
p.set("skipping", true);
p.set("skipDistance", 20);
p.set("skipResetDistance", 5);
PositionIndexWriter writer =
new PositionIndexWriter(new org.lemurproject.galago.tupleflow.FakeParameters(p));
writer.processWord(ByteUtil.fromString("a"));
for (int docid = 1; docid < 5000; docid += 3) {
writer.processDocument(docid);
for (int pos = 1; pos < ((docid / 50) + 2); pos++) {
writer.processPosition(pos);
}
}
writer.close();
// Now read it
PositionIndexReader reader = new PositionIndexReader(skipPath.toString());
DiskExtentIterator termExtents = reader.getTermExtents("a");
ScoringContext sc = new ScoringContext();
assertEquals("a", termExtents.getKeyString());
// Read first identifier
assertEquals(1, termExtents.currentCandidate());
sc.document = termExtents.currentCandidate();
assertEquals(1, termExtents.count(sc));
termExtents.syncTo(7);
assertTrue(termExtents.hasMatch(new ScoringContext(7)));
// Now move to a doc, but not one we have
termExtents.syncTo(90);
assertFalse(termExtents.hasMatch(new ScoringContext(90)));
// Now move forward one
termExtents.movePast(93);
assertEquals(94, termExtents.currentCandidate());
sc.document = termExtents.currentCandidate();
assertEquals(2, termExtents.count(sc));
// One more time, then we read extents
termExtents.movePast(2543);
assertEquals(2545, termExtents.currentCandidate());
sc.document = termExtents.currentCandidate();
assertEquals(51, termExtents.count(sc));
ExtentArray ea = termExtents.extents(sc);
assertEquals(2545, ea.getDocument());
assertEquals(51, ea.size());
for (int i = 0; i < ea.size(); i++) {
assertEquals(i + 1, ea.begin(i));
}
termExtents.syncTo(10005);
assertFalse(termExtents.hasMatch(new ScoringContext(10005)));
assertTrue(termExtents.isDone());
skipPath.delete();
skipPath = null;
}
示例7: testIndexStemming
import org.lemurproject.galago.core.index.disk.PositionIndexReader; //導入依賴的package包/類
@Test
public void testIndexStemming() throws Exception {
File trecCorpusFile = null;
File indexFile1 = null;
File indexFile2 = null;
try {
// create a simple doc file, trec format:
String trecCorpus = AppTest.trecDocument("1", text);
trecCorpusFile = FileUtility.createTemporary();
StreamUtil.copyStringToFile(trecCorpus, trecCorpusFile);
// now, try to build an index from that
indexFile1 = FileUtility.createTemporaryDirectory();
App.main(new String[]{"build", "--indexPath=" + indexFile1.getAbsolutePath(),
"--inputPath+" + trecCorpusFile.getAbsolutePath(),
"--stemmerClass/Porter2Stemmer=org.lemurproject.galago.core.parse.stem.Porter2Stemmer"});
// now, try to build an index from that
indexFile2 = FileUtility.createTemporaryDirectory();
App.main(new String[]{"build", "--indexPath=" + indexFile2.getAbsolutePath(),
"--inputPath+" + trecCorpusFile.getAbsolutePath(),
"--stemmerClass/KrovetzStemmer=org.lemurproject.galago.core.parse.stem.KrovetzStemmer"});
// make sure the indexes exists
assertTrue(indexFile1.exists());
assertTrue(indexFile2.exists());
// open stemmedPostings and compare lengths with postings.
PositionIndexReader porterPart = (PositionIndexReader) DiskIndex.openIndexPart(indexFile1 + "/postings.Porter2Stemmer");
PositionIndexReader krovetzPart = (PositionIndexReader) DiskIndex.openIndexPart(indexFile2 + "/postings.KrovetzStemmer");
// ensure nodes can be found
assert (porterPart.getIterator(new Node("counts", "warehouse")) != null);
assert (krovetzPart.getIterator(new Node("counts", "warehouse")) != null);
// ensure a second term works
assertEquals(porterPart.getIterator(new Node("counts", "having")).getKeyString(), "have");
assertEquals(krovetzPart.getIterator(new Node("counts", "having")).getKeyString(), "have");
} finally {
if (trecCorpusFile != null) {
trecCorpusFile.delete();
}
if (indexFile1 != null) {
FSUtil.deleteDirectory(indexFile1);
}
if (indexFile2 != null) {
FSUtil.deleteDirectory(indexFile2);
}
}
}