当前位置: 首页>>代码示例>>Java>>正文


Java BuildIndex类代码示例

本文整理汇总了Java中org.lemurproject.galago.core.tools.apps.BuildIndex的典型用法代码示例。如果您正苦于以下问题:Java BuildIndex类的具体用法?Java BuildIndex怎么用?Java BuildIndex使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


BuildIndex类属于org.lemurproject.galago.core.tools.apps包,在下文中一共展示了BuildIndex类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testBuildIndexSpecific

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testBuildIndexSpecific() throws Exception {
    File tmpDir = FileUtility.createTemporaryDirectory();
    try {
        File inputTxt = new File(tmpDir, "input.txt");
        File testIndex = new File(tmpDir, "test.galago");
        StreamUtil.copyStringToFile("this is a document of some kind", inputTxt);
        BuildIndex.execute(
                Parameters.parseArray(
                        "inputPath", inputTxt,
                        "indexPath", testIndex,
                        "corpusParameters", Parameters.parseArray(
                                "documentSerializerClass", WebDocumentSerializer.class.getName())),
                System.out);

        CorpusReader reader = new CorpusReader(new File(testIndex, "corpus").getAbsolutePath());
        assertEquals(WebDocumentSerializer.class.getName(), reader.getManifest().getString("documentSerializerClass"));
        System.out.println(reader.serializer.getClass());
        Document document = reader.getIterator().getDocument(Document.DocumentComponents.JustTerms);
        assertNotNull(document);
        assertNotNull(document.text);
        assertNotNull(document.terms);
        assertEquals(7, document.terms.size());
        assertEquals("this", document.terms.get(0));
    } finally {
        FSUtil.deleteDirectory(tmpDir);
    }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:29,代码来源:WebDocumentSerializerTest.java

示例2: serializerClass

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void serializerClass() throws Exception {
  File tmpDir = FileUtility.createTemporaryDirectory();
  try {
    File inputTxt = new File(tmpDir, "input.txt");
    File testIndex = new File(tmpDir, "test.galago");
    StreamUtil.copyStringToFile("this is a document of some kind", inputTxt);
    BuildIndex.execute(
        Parameters.parseArray(
            "inputPath", inputTxt,
            "indexPath", testIndex,
            "corpusParameters", Parameters.parseArray(
                "documentSerializerClass", TokenizedDocumentSerializer.class.getName())),
        System.out);

    CorpusReader reader = new CorpusReader(new File(testIndex, "corpus").getAbsolutePath());
    assertEquals(TokenizedDocumentSerializer.class.getName(), reader.getManifest().getString("documentSerializerClass"));
    System.out.println(reader.serializer.getClass());
    Document document = reader.getIterator().getDocument(Document.DocumentComponents.JustTerms);
    assertNotNull(document);
    assertNull(document.text);
    assertNotNull(document.terms);
    assertEquals(7, document.terms.size());
    assertEquals("this", document.terms.get(0));
  } finally {
    FSUtil.deleteDirectory(tmpDir);
  }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:29,代码来源:TokenizedDocumentSerializerTest.java

示例3: run

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Override
public void run(Parameters argp) throws Exception {
  System.err.println(argp.getString("indexPath"));
  List<FactQuery> facts = Athena.init(argp).getDataset().getAllFacts();

  File temp = FileUtility.createTemporary();
  PrintWriter pw = new PrintWriter(temp);

  for(FactQuery fq: facts) {
    pw.println(Parameters.parseArray(
        "name", fq.id,
        "text", fq.text,
        "meta", Parameters.parseArray(
            "year", fq.rel
        )
    ));
  }

  pw.close();

  Parameters buildP = argp.clone();
  buildP.put("fields", "a");
  buildP.put("filetype", JSONDocParser.class.getName());
  buildP.put("inputPath", temp.getAbsolutePath());

  BuildIndex build = new BuildIndex();
  build.run(buildP, System.out);

  System.out.println("Cleaned up temporary file:"+temp.delete());
}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:31,代码来源:CreateFactIndex.java

示例4: testDefaultyBehavior

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testDefaultyBehavior() throws Exception {
    File index = FileUtility.createTemporaryDirectory();
    File dataDir = FileUtility.createTemporaryDirectory();
    try {

        createTxtDoc(dataDir, "d1.txt"); // 1 doc
        createXMLDoc(dataDir, "d2.xml"); // 1 doc
        createTrecTextDoc(dataDir, "d3.trectext"); // 10 docs
        createTrecWebDoc(dataDir, "d4.trecweb"); // 10 docs
        createTwitterDoc(dataDir, "d5.twitter"); // 10 docs

        Parameters p = Parameters.create();
        p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
        p.set("indexPath", index.getAbsolutePath());

        BuildIndex bi = new BuildIndex();
        bi.run(p, System.err);

        Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());

        FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
        assertEquals(cs.collectionLength, 553);
        assertEquals(cs.documentCount, 32);
        assertEquals(cs.maxLength, 22);
        assertEquals(cs.minLength, 11);

        IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
        assertEquals(is1.collectionLength, 553);

        IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
        assertEquals(is2.collectionLength, 553);

        // should have about the same vocabs
        assertEquals(is1.vocabCount, is2.vocabCount);

    } finally {
        FSUtil.deleteDirectory(index);
        FSUtil.deleteDirectory(dataDir);
    }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:42,代码来源:UniversalParserTest.java

示例5: testAllIsOneBehavior

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testAllIsOneBehavior() throws Exception {
    File index = FileUtility.createTemporaryDirectory();
    File dataDir = FileUtility.createTemporaryDirectory();
    try {

        createTxtDoc(dataDir, "d1"); // 1 doc
        createXMLDoc(dataDir, "d2"); // 1 doc
        createTxtDoc(dataDir, "d3"); // 1 doc
        createXMLDoc(dataDir, "d4"); // 1 doc
        createTxtDoc(dataDir, "d5"); // 1 doc
        createXMLDoc(dataDir, "d6"); // 1 doc

        Parameters p = Parameters.create();
        p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
        p.set("indexPath", index.getAbsolutePath());
        p.set("filetype", "txt");

        BuildIndex bi = new BuildIndex();
        bi.run(p, System.err);

        Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());

        FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
        assertEquals(cs.collectionLength, 129);
        assertEquals(cs.documentCount, 6);
        assertEquals(cs.maxLength, 22);
        assertEquals(cs.minLength, 21);

        IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
        assertEquals(is1.collectionLength, 129);

        IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
        assertEquals(is2.collectionLength, 129);

        // should have about the same vocabs
        assertEquals(is1.vocabCount, is2.vocabCount);

    } finally {
        FSUtil.deleteDirectory(index);
        FSUtil.deleteDirectory(dataDir);
    }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:44,代码来源:UniversalParserTest.java

示例6: testManualOverrideBehavior

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
@Test
public void testManualOverrideBehavior() throws Exception {
    File index = FileUtility.createTemporaryDirectory();
    File dataDir = FileUtility.createTemporaryDirectory();
    try {

        createTrecTextDoc(dataDir, "d1.qqe"); // 10 docs - trectext
        createTrecWebDoc(dataDir, "d2.qwe"); // 10 docs - trecweb
        createTrecTextDoc(dataDir, "d3.trectext"); // 10 docs - trectext
        createTrecWebDoc(dataDir, "d4.trecweb"); // 10 docs - trectext
        createTxtDoc(dataDir, "d5.txt"); // 1 docs - txt

        Parameters p = Parameters.create();
        p.set("inputPath", Collections.singletonList(dataDir.getAbsolutePath()));
        p.set("indexPath", index.getAbsolutePath());
        p.set("parser", Parameters.create());

        List<Parameters> kinds = new ArrayList<Parameters>();
        kinds.add(Parameters.parseString("{\"filetype\" : \"qqe\", \"class\" :\"" + TrecTextParser.class.getName() + "\"}"));
        kinds.add(Parameters.parseString("{\"filetype\" : \"qwe\", \"class\" :\"" + TrecWebParser.class.getName() + "\"}"));
        kinds.add(Parameters.parseString("{\"filetype\" : \"trecweb\", \"class\" :\"" + TrecTextParser.class.getName() + "\"}"));
        p.getMap("parser").put("externalParsers", kinds);

        BuildIndex bi = new BuildIndex();
        bi.run(p, System.err);

        Retrieval ret = RetrievalFactory.instance(index.getAbsolutePath(), Parameters.create());

        FieldStatistics cs = ret.getCollectionStatistics("#lengths:part=lengths()");
        assertEquals(cs.collectionLength, 622); // trecweb with trectext will be empty
        assertEquals(cs.documentCount, 41);
        assertEquals(cs.maxLength, 22);
        assertEquals(cs.minLength, 20);

        IndexPartStatistics is1 = ret.getIndexPartStatistics("postings");
        assertEquals(is1.collectionLength, 622);

        IndexPartStatistics is2 = ret.getIndexPartStatistics("postings.krovetz");
        assertEquals(is2.collectionLength, 622);

        // should have about the same vocabs
        assertEquals(is1.vocabCount, is2.vocabCount);

    } finally {
        FSUtil.deleteDirectory(index);
        FSUtil.deleteDirectory(dataDir);
    }
}
 
开发者ID:teanalab,项目名称:demidovii,代码行数:49,代码来源:UniversalParserTest.java

示例7: main

import org.lemurproject.galago.core.tools.apps.BuildIndex; //导入依赖的package包/类
public static void main(String [] args) throws Exception {
  final Tool[] tools = {
    new ExtractTimexSentences(),
    new ExtractWebTimexSentences(),

    new SentenceCollector(),
    new DocDateLMCollector(),
    new DocDatesBuilder(),

    new WikipediaToHTML(),

    new WikipediaYearFinder(),
    new WikiYearParser(),

    new DateStatsExtractor(),
    new FactsToQrels(),
    new DataCounts(),
    new SampleFacts(),
    new TermCounts(), // aka stopword detection
    new DateDeltaExtractor(),
    new GetPubdate(),

    new CreateFactIndex(),

    // evaluate
    new GenerateFactRun(),
    new GenerateQueryRun(),
    new EvaluateRun(),
    new CompareRuns(),

    // collect publication dates
    new CollectPubDates(),
    new RobustCollectPubDates(),

    // generate ambiguous queries
    new FindBigramEntities(),
    new EntityLinkExtractor(),

    new PageToSentenceTSV(),
    new GalagoTool(new BuildIndex()),
    new GalagoTool(new DumpNamesLengths()),
    new GalagoTool(new DumpKeysFn()),

    // linking
    new ExtractDatedSentences(),


    // new standard-annotation
    new StanfordNERJSONLines(),
  };

  Parameters argp = Parameters.parseArgs(args);

  if(!argp.containsKey("tool")) {
    showHelp(tools);
    return;
  }

  String toolName = argp.getString("tool");
  for(Tool tool : tools) {
    if(tool.getName().equals(toolName)) {
      tool.run(argp);
      return;
    }
  }

  showHelp(tools);
  throw new IllegalArgumentException("No tool found for `"+toolName+"'");
}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:70,代码来源:Main.java


注:本文中的org.lemurproject.galago.core.tools.apps.BuildIndex类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。