当前位置: 首页>>代码示例>>Java>>正文


Java DocumentStreamParser.nextDocument方法代码示例

本文整理汇总了Java中org.lemurproject.galago.core.parse.DocumentStreamParser.nextDocument方法的典型用法代码示例。如果您正苦于以下问题:Java DocumentStreamParser.nextDocument方法的具体用法?Java DocumentStreamParser.nextDocument怎么用?Java DocumentStreamParser.nextDocument使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.lemurproject.galago.core.parse.DocumentStreamParser的用法示例。


在下文中一共展示了DocumentStreamParser.nextDocument方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: forEachDocument

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
public static void forEachDocument(DocumentStreamParser parser, Operation<Document> action) throws IOException {
  try {
    while (true) {
      Document doc = parser.nextDocument();
      if (doc == null) break;
      action.process(doc);
    }
  } finally {
    parser.close();
  }
}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:12,代码来源:GalagoUtil.java

示例2: simpleParse

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void simpleParse() throws IOException {
  String data = "<http://dbpedia.org/resource/Anarchism> <http://dbpedia.org/ontology/abstract> \"\"@en .\n" +
      "<http://dbpedia.org/resource/Autism> <http://dbpedia.org/ontology/abstract> \"Autism is a disorder of neural development characterized by impaired social interaction and communication, and by restricted and repetitive behavior.\"@en .\n" +
      "<http://dbpedia.org/resource/Achilles> <http://dbpedia.org/ontology/abstract> \"In Greek mythology, Achilles was a Greek hero of the Trojan War and the central character and greatest warrior of Homer's Iliad.\"@en .\n";

  File tmp = File.createTempFile("fake-dbpedia-abstracts", ".ttl");
  try {
    Utility.copyStringToFile(data, tmp);
    DocumentStreamParser ps = new DbpediaAbstractParser(DocumentSplitFactory.file(tmp), Parameters.instance());

    Document autism = ps.nextDocument();
    assertNotNull(autism);
    Document achilles = ps.nextDocument();
    assertNotNull(achilles);
    assertNull(ps.nextDocument());
    assertNull(ps.nextDocument());

    assertEquals("Autism", autism.name);
    assertEquals("Achilles", achilles.name);
    assertEquals("<title>Achilles</title>\n<body>In Greek mythology, Achilles was a Greek hero of the Trojan War and the central character and greatest warrior of Homer's Iliad.</body>", achilles.text);

  } finally {
    assertTrue(tmp.delete());
  }

}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:28,代码来源:DbpediaAbstractParserTest.java

示例3: DocumentStreamIterator

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
public DocumentStreamIterator(DocumentStreamParser parser) throws IOException {
  this.parser = parser;
  this.current = parser.nextDocument();
}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:5,代码来源:GalagoUtil.java

示例4: testSimpleData

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testSimpleData() throws Exception {
  File inF = null;
  File outF = null;
  try {
    inF = File.createTempFile("asd", "jkl");
	outF = File.createTempFile("asd", "jkl");

    Utility.copyStringToFile(
      "doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
      "doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
      "doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
      "doc1\t0\t0\t1783\t1783 was a year that I keep using for examples.\n",
      inF
    );

    Main.main(new String[]{
      "--tool=doc-date-lm-collector",
      "--dataset=none",
      "--what=books",
      "--input=" + inF.getAbsolutePath(),
      "--output=" + outF.getAbsolutePath()
    });

    DocumentSplit written = DocumentSplitFactory.file(outF);
    DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());

    List<Document> docs = new ArrayList<Document>();
    while(true) {
      Document d = docsSP.nextDocument();
      if(d == null) break;
      docs.add(d);
    }

    Assert.assertEquals(3, docs.size());
    Assert.assertEquals("doc0", docs.get(0).metadata.get("book"));
    Assert.assertEquals("doc0", docs.get(1).metadata.get("book"));
    Assert.assertEquals("doc1", docs.get(2).metadata.get("book"));
    Assert.assertEquals("1783", docs.get(2).metadata.get("year"));
    String year0 = docs.get(0).metadata.get("year");
    String year1 = docs.get(1).metadata.get("year");
    Assert.assertTrue(("1981".equals(year0) && "1982".equals(year1)) || ("1982".equals(year0) && "1981".equals(year1)));


  } finally {
    Assert.assertNotNull(inF);
    Assert.assertTrue(inF.delete());
    Assert.assertNotNull(outF);
    Assert.assertTrue(outF.delete());
  }

}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:53,代码来源:DocDateLMCollectorTest.java

示例5: testByPages

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testByPages() throws Exception {
  File inF = null;
  File outF = null;
  try {
	inF = File.createTempFile("asdf", "jkl");
	outF = File.createTempFile("asdf", "jkl");

    Utility.copyStringToFile(
        "doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
            "doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
            "doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
            "doc0\t1\t0\t1783\t1783 was a year that I keep using for examples.\n",
        inF
    );

    Main.main(new String[]{
        "--tool=doc-date-lm-collector",
        "--dataset=none",
        "--what=pages",
        "--input=" + inF.getAbsolutePath(),
        "--output=" + outF.getAbsolutePath()
    });

    DocumentSplit written = DocumentSplitFactory.file(outF);
    DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());

    List<Document> docs = new ArrayList<Document>();
    while(true) {
      Document d = docsSP.nextDocument();
      if(d == null) break;
      docs.add(d);
    }

    Assert.assertEquals(3, docs.size());
    Assert.assertEquals("doc0_0", docs.get(0).metadata.get("book"));
    Assert.assertEquals("doc0_0", docs.get(1).metadata.get("book"));
    Assert.assertEquals("doc0_1", docs.get(2).metadata.get("book"));
    Assert.assertEquals("1783", docs.get(2).metadata.get("year"));
    String year0 = docs.get(0).metadata.get("year");
    String year1 = docs.get(1).metadata.get("year");
    Assert.assertTrue(("1981".equals(year0) && "1982".equals(year1)) || ("1982".equals(year0) && "1981".equals(year1)));


  } finally {
	if(inF != null) {
		Assert.assertTrue(inF.delete());
	}
	if(outF != null) {
		Assert.assertTrue(outF.delete());
	}
  }

}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:55,代码来源:DocDateLMCollectorTest.java

示例6: testFilterYears

import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testFilterYears() throws Exception {
  File inF = null;
  File outF = null;
  try {
    inF = File.createTempFile("asdf", "jkl");
	outF = File.createTempFile("asdf", "jkl");

    Utility.copyStringToFile(
      "doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
        "doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
        "doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
        "doc1\t0\t0\t1783\t1783 was a year that I keep using for examples.\n",
      inF
    );

    Main.main(new String[]{
      "--tool=doc-date-lm-collector",
      "--dataset=books",
      "--what=books",
      "--input=" + inF.getAbsolutePath(),
      "--output=" + outF.getAbsolutePath()
    });

    DocumentSplit written = DocumentSplitFactory.file(outF);
    DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());

    List<Document> docs = new ArrayList<Document>();
    while(true) {
      Document d = docsSP.nextDocument();
      if(d == null) break;
      docs.add(d);
    }

    Assert.assertEquals(1, docs.size());
    Assert.assertEquals("doc1", docs.get(0).metadata.get("book"));
    Assert.assertEquals("1783", docs.get(0).metadata.get("year"));
  } finally {
	if(inF != null) {
		Assert.assertTrue(inF.delete());
	}
	if(outF != null) {
		Assert.assertTrue(outF.delete());
	}
  }

}
 
开发者ID:jjfiv,项目名称:ecir2015timebooks,代码行数:48,代码来源:DocDateLMCollectorTest.java


注:本文中的org.lemurproject.galago.core.parse.DocumentStreamParser.nextDocument方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。