本文整理汇总了Java中org.lemurproject.galago.core.parse.DocumentStreamParser.nextDocument方法的典型用法代码示例。如果您正苦于以下问题:Java DocumentStreamParser.nextDocument方法的具体用法?Java DocumentStreamParser.nextDocument怎么用?Java DocumentStreamParser.nextDocument使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.lemurproject.galago.core.parse.DocumentStreamParser
的用法示例。
在下文中一共展示了DocumentStreamParser.nextDocument方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: forEachDocument
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
public static void forEachDocument(DocumentStreamParser parser, Operation<Document> action) throws IOException {
try {
while (true) {
Document doc = parser.nextDocument();
if (doc == null) break;
action.process(doc);
}
} finally {
parser.close();
}
}
示例2: simpleParse
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void simpleParse() throws IOException {
String data = "<http://dbpedia.org/resource/Anarchism> <http://dbpedia.org/ontology/abstract> \"\"@en .\n" +
"<http://dbpedia.org/resource/Autism> <http://dbpedia.org/ontology/abstract> \"Autism is a disorder of neural development characterized by impaired social interaction and communication, and by restricted and repetitive behavior.\"@en .\n" +
"<http://dbpedia.org/resource/Achilles> <http://dbpedia.org/ontology/abstract> \"In Greek mythology, Achilles was a Greek hero of the Trojan War and the central character and greatest warrior of Homer's Iliad.\"@en .\n";
File tmp = File.createTempFile("fake-dbpedia-abstracts", ".ttl");
try {
Utility.copyStringToFile(data, tmp);
DocumentStreamParser ps = new DbpediaAbstractParser(DocumentSplitFactory.file(tmp), Parameters.instance());
Document autism = ps.nextDocument();
assertNotNull(autism);
Document achilles = ps.nextDocument();
assertNotNull(achilles);
assertNull(ps.nextDocument());
assertNull(ps.nextDocument());
assertEquals("Autism", autism.name);
assertEquals("Achilles", achilles.name);
assertEquals("<title>Achilles</title>\n<body>In Greek mythology, Achilles was a Greek hero of the Trojan War and the central character and greatest warrior of Homer's Iliad.</body>", achilles.text);
} finally {
assertTrue(tmp.delete());
}
}
示例3: DocumentStreamIterator
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
public DocumentStreamIterator(DocumentStreamParser parser) throws IOException {
this.parser = parser;
this.current = parser.nextDocument();
}
示例4: testSimpleData
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testSimpleData() throws Exception {
File inF = null;
File outF = null;
try {
inF = File.createTempFile("asd", "jkl");
outF = File.createTempFile("asd", "jkl");
Utility.copyStringToFile(
"doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
"doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
"doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
"doc1\t0\t0\t1783\t1783 was a year that I keep using for examples.\n",
inF
);
Main.main(new String[]{
"--tool=doc-date-lm-collector",
"--dataset=none",
"--what=books",
"--input=" + inF.getAbsolutePath(),
"--output=" + outF.getAbsolutePath()
});
DocumentSplit written = DocumentSplitFactory.file(outF);
DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());
List<Document> docs = new ArrayList<Document>();
while(true) {
Document d = docsSP.nextDocument();
if(d == null) break;
docs.add(d);
}
Assert.assertEquals(3, docs.size());
Assert.assertEquals("doc0", docs.get(0).metadata.get("book"));
Assert.assertEquals("doc0", docs.get(1).metadata.get("book"));
Assert.assertEquals("doc1", docs.get(2).metadata.get("book"));
Assert.assertEquals("1783", docs.get(2).metadata.get("year"));
String year0 = docs.get(0).metadata.get("year");
String year1 = docs.get(1).metadata.get("year");
Assert.assertTrue(("1981".equals(year0) && "1982".equals(year1)) || ("1982".equals(year0) && "1981".equals(year1)));
} finally {
Assert.assertNotNull(inF);
Assert.assertTrue(inF.delete());
Assert.assertNotNull(outF);
Assert.assertTrue(outF.delete());
}
}
示例5: testByPages
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testByPages() throws Exception {
File inF = null;
File outF = null;
try {
inF = File.createTempFile("asdf", "jkl");
outF = File.createTempFile("asdf", "jkl");
Utility.copyStringToFile(
"doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
"doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
"doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
"doc0\t1\t0\t1783\t1783 was a year that I keep using for examples.\n",
inF
);
Main.main(new String[]{
"--tool=doc-date-lm-collector",
"--dataset=none",
"--what=pages",
"--input=" + inF.getAbsolutePath(),
"--output=" + outF.getAbsolutePath()
});
DocumentSplit written = DocumentSplitFactory.file(outF);
DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());
List<Document> docs = new ArrayList<Document>();
while(true) {
Document d = docsSP.nextDocument();
if(d == null) break;
docs.add(d);
}
Assert.assertEquals(3, docs.size());
Assert.assertEquals("doc0_0", docs.get(0).metadata.get("book"));
Assert.assertEquals("doc0_0", docs.get(1).metadata.get("book"));
Assert.assertEquals("doc0_1", docs.get(2).metadata.get("book"));
Assert.assertEquals("1783", docs.get(2).metadata.get("year"));
String year0 = docs.get(0).metadata.get("year");
String year1 = docs.get(1).metadata.get("year");
Assert.assertTrue(("1981".equals(year0) && "1982".equals(year1)) || ("1982".equals(year0) && "1981".equals(year1)));
} finally {
if(inF != null) {
Assert.assertTrue(inF.delete());
}
if(outF != null) {
Assert.assertTrue(outF.delete());
}
}
}
示例6: testFilterYears
import org.lemurproject.galago.core.parse.DocumentStreamParser; //导入方法依赖的package包/类
@Test
public void testFilterYears() throws Exception {
File inF = null;
File outF = null;
try {
inF = File.createTempFile("asdf", "jkl");
outF = File.createTempFile("asdf", "jkl");
Utility.copyStringToFile(
"doc0\t0\t0\t1982\tThis is the way things are, here in 1982.\n" +
"doc0\t0\t0\t1981\tThis is the way things were, last year, in 1981.\n" +
"doc0\t0\t0\t1982\tBut 1982 wasn't always this good.\n" +
"doc1\t0\t0\t1783\t1783 was a year that I keep using for examples.\n",
inF
);
Main.main(new String[]{
"--tool=doc-date-lm-collector",
"--dataset=books",
"--what=books",
"--input=" + inF.getAbsolutePath(),
"--output=" + outF.getAbsolutePath()
});
DocumentSplit written = DocumentSplitFactory.file(outF);
DocumentStreamParser docsSP = new DocDateSketchParser(written, Parameters.instance());
List<Document> docs = new ArrayList<Document>();
while(true) {
Document d = docsSP.nextDocument();
if(d == null) break;
docs.add(d);
}
Assert.assertEquals(1, docs.size());
Assert.assertEquals("doc1", docs.get(0).metadata.get("book"));
Assert.assertEquals("1783", docs.get(0).metadata.get("year"));
} finally {
if(inF != null) {
Assert.assertTrue(inF.delete());
}
if(outF != null) {
Assert.assertTrue(outF.delete());
}
}
}