本文整理汇总了Java中org.lemurproject.galago.core.tools.apps.BuildIndex类的典型用法代码示例。如果您正苦于以下问题:Java BuildIndex类的具体用法?Java BuildIndex怎么用?Java BuildIndex使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
BuildIndex类属于org.lemurproject.galago.core.tools.apps包,在下文中一共展示了BuildIndex类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testBuildIndexSpecific
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testBuildIndexSpecific() throws Exception {
File tmpDir = FileUtility.createTemporaryDirectory();
try {
File inputTxt = new File(tmpDir, "input.txt");
File testIndex = new File(tmpDir, "test.galago");
StreamUtil.copyStringToFile("this is a document of some kind", inputTxt);
BuildIndex.execute(
Parameters.parseArray(
"inputPath", inputTxt,
"indexPath", testIndex,
"corpusParameters", Parameters.parseArray(
"documentSerializerClass", WebDocumentSerializer.class.getName())),
System.out);
CorpusReader reader = new CorpusReader(new File(testIndex, "corpus").getAbsolutePath());
assertEquals(WebDocumentSerializer.class.getName(), reader.getManifest().getString("documentSerializerClass"));
System.out.println(reader.serializer.getClass());
Document document = reader.getIterator().getDocument(Document.DocumentComponents.JustTerms);
assertNotNull(document);
assertNotNull(document.text);
assertNotNull(document.terms);
assertEquals(7, document.terms.size());
assertEquals("this", document.terms.get(0));
} finally {
FSUtil.deleteDirectory(tmpDir);
}
}
示例2: serializerClass
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void serializerClass() throws Exception {
File tmpDir = FileUtility.createTemporaryDirectory();
try {
File inputTxt = new File(tmpDir, "input.txt");
File testIndex = new File(tmpDir, "test.galago");
StreamUtil.copyStringToFile("this is a document of some kind", inputTxt);
BuildIndex.execute(
Parameters.parseArray(
"inputPath", inputTxt,
"indexPath", testIndex,
"corpusParameters", Parameters.parseArray(
"documentSerializerClass", TokenizedDocumentSerializer.class.getName())),
System.out);
CorpusReader reader = new CorpusReader(new File(testIndex, "corpus").getAbsolutePath());
assertEquals(TokenizedDocumentSerializer.class.getName(), reader.getManifest().getString("documentSerializerClass"));
System.out.println(reader.serializer.getClass());
Document document = reader.getIterator().getDocument(Document.DocumentComponents.JustTerms);
assertNotNull(document);
assertNull(document.text);
assertNotNull(document.terms);
assertEquals(7, document.terms.size());
assertEquals("this", document.terms.get(0));
} finally {
FSUtil.deleteDirectory(tmpDir);
}
}
示例3: run
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Override
public void run(Parameters argp) throws Exception {
System.err.println(argp.getString("indexPath"));
List<FactQuery> facts = Athena.init(argp).getDataset().getAllFacts();
File temp = FileUtility.createTemporary();
PrintWriter pw = new PrintWriter(temp);
for(FactQuery fq: facts) {
pw.println(Parameters.parseArray(
"name", fq.id,
"text", fq.text,
"meta", Parameters.parseArray(
"year", fq.rel
)
));
}
pw.close();
Parameters buildP = argp.clone();
buildP.put("fields", "a");
buildP.put("filetype", JSONDocParser.class.getName());
buildP.put("inputPath", temp.getAbsolutePath());
BuildIndex build = new BuildIndex();
build.run(buildP, System.out);
System.out.println("Cleaned up temporary file:"+temp.delete());
}
示例4: testDefaultyBehavior
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testDefaultyBehavior() throws Exception {
File index = FileUtility.createTemporaryDirectory();
File dataDir = FileUtility.createTemporaryDirectory();
try {
createTxtDoc(dataDir, "d1.txt"); // 1 doc
createXMLDoc(dataDir, "d2.xml"); // 1 doc
createTrecTextDoc(dataDir, "d3.trectext"); // 10 docs
createTrecWebDoc(dataDir, "d4.trecweb"); // 10 docs
createTwitterDoc(dataDir, "d5.twitter"); // 10 docs
Parameters p = Parameters.create();
p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
p.set("indexPath", index.getAbsolutePath());
BuildIndex bi = new BuildIndex();
bi.run(p, System.err);
Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());
FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
assertEquals(cs.collectionLength, 553);
assertEquals(cs.documentCount, 32);
assertEquals(cs.maxLength, 22);
assertEquals(cs.minLength, 11);
IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
assertEquals(is1.collectionLength, 553);
IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
assertEquals(is2.collectionLength, 553);
// should have about the same vocabs
assertEquals(is1.vocabCount, is2.vocabCount);
} finally {
FSUtil.deleteDirectory(index);
FSUtil.deleteDirectory(dataDir);
}
}
示例5: testAllIsOneBehavior
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testAllIsOneBehavior() throws Exception {
File index = FileUtility.createTemporaryDirectory();
File dataDir = FileUtility.createTemporaryDirectory();
try {
createTxtDoc(dataDir, "d1"); // 1 doc
createXMLDoc(dataDir, "d2"); // 1 doc
createTxtDoc(dataDir, "d3"); // 1 doc
createXMLDoc(dataDir, "d4"); // 1 doc
createTxtDoc(dataDir, "d5"); // 1 doc
createXMLDoc(dataDir, "d6"); // 1 doc
Parameters p = Parameters.create();
p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
p.set("indexPath", index.getAbsolutePath());
p.set("filetype", "txt");
BuildIndex bi = new BuildIndex();
bi.run(p, System.err);
Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());
FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
assertEquals(cs.collectionLength, 129);
assertEquals(cs.documentCount, 6);
assertEquals(cs.maxLength, 22);
assertEquals(cs.minLength, 21);
IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
assertEquals(is1.collectionLength, 129);
IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
assertEquals(is2.collectionLength, 129);
// should have about the same vocabs
assertEquals(is1.vocabCount, is2.vocabCount);
} finally {
FSUtil.deleteDirectory(index);
FSUtil.deleteDirectory(dataDir);
}
}
示例6: testManualOverrideBehavior
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testManualOverrideBehavior() throws Exception {
File index = FileUtility.createTemporaryDirectory();
File dataDir = FileUtility.createTemporaryDirectory();
try {
createTrecTextDoc(dataDir, "d1.qqe"); // 10 docs - trectext
createTrecWebDoc(dataDir, "d2.qwe"); // 10 docs - trecweb
createTrecTextDoc(dataDir, "d3.trectext"); // 10 docs - trectext
createTrecWebDoc(dataDir, "d4.trecweb"); // 10 docs - trectext
createTxtDoc(dataDir, "d5.txt"); // 1 docs - txt
Parameters p = Parameters.create();
p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
p.set("indexPath", index.getAbsolutePath());
p.set("parser", Parameters.create());
List<Parameters> kinds = new ArrayList<Parameters>();
kinds.add(Parameters.parseString("{\"filetype\" : \"qqe\", \"class\" :\"" + TrecTextParser.class.getName() + "\"}"));
kinds.add(Parameters.parseString("{\"filetype\" : \"qwe\", \"class\" :\"" + TrecWebParser.class.getName() + "\"}"));
kinds.add(Parameters.parseString("{\"filetype\" : \"trecweb\", \"class\" :\"" + TrecTextParser.class.getName() + "\"}"));
p.getMap("parser").put("externalParsers", kinds);
BuildIndex bi = new BuildIndex();
bi.run(p, System.err);
Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());
FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
assertEquals(cs.collectionLength, 622); // trecweb with trectext will be empty
assertEquals(cs.documentCount, 41);
assertEquals(cs.maxLength, 22);
assertEquals(cs.minLength, 20);
IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
assertEquals(is1.collectionLength, 622);
IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
assertEquals(is2.collectionLength, 622);
// should have about the same vocabs
assertEquals(is1.vocabCount, is2.vocabCount);
} finally {
FSUtil.deleteDirectory(index);
FSUtil.deleteDirectory(dataDir);
}
}
示例7: main
import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
public static void main(String [] args) throws Exception {
final Tool[] tools = {
new ExtractTimexSentences(),
new ExtractWebTimexSentences(),
new SentenceCollector(),
new DocDateLMCollector(),
new DocDatesBuilder(),
new WikipediaToHTML(),
new WikipediaYearFinder(),
new WikiYearParser(),
new DateStatsExtractor(),
new FactsToQrels(),
new DataCounts(),
new SampleFacts(),
new TermCounts(), // aka stopword detection
new DateDeltaExtractor(),
new GetPubdate(),
new CreateFactIndex(),
// evaluate
new GenerateFactRun(),
new GenerateQueryRun(),
new EvaluateRun(),
new CompareRuns(),
// collect publication dates
new CollectPubDates(),
new RobustCollectPubDates(),
// generate ambiguous queries
new FindBigramEntities(),
new EntityLinkExtractor(),
new PageToSentenceTSV(),
new GalagoTool(new BuildIndex()),
new GalagoTool(new DumpNamesLengths()),
new GalagoTool(new DumpKeysFn()),
// linking
new ExtractDatedSentences(),
// new standard-annotation
new StanfordNERJSONLines(),
};
Parameters argp = Parameters.parseArgs(args);
if(!argp.containsKey("tool")) {
showHelp(tools);
return;
}
String toolName = argp.getString("tool");
for(Tool tool : tools) {
if(tool.getName().equals(toolName)) {
tool.run(argp);
return;
}
}
showHelp(tools);
throw new IllegalArgumentException("No tool found for `"+toolName+"'");
}