本文整理汇总了Java中de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser.parse方法的典型用法代码示例。如果您正苦于以下问题:Java MediaWikiParser.parse方法的具体用法?Java MediaWikiParser.parse怎么用?Java MediaWikiParser.parse使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser
的用法示例。
在下文中一共展示了MediaWikiParser.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {
//db connection settings
DatabaseConfiguration dbConfig = new DatabaseConfiguration();
dbConfig.setDatabase("DATABASE");
dbConfig.setHost("HOST");
dbConfig.setUser("USER");
dbConfig.setPassword("PASSWORD");
dbConfig.setLanguage(Language.english);
//initialize a wiki
Wikipedia wiki = new Wikipedia(dbConfig);
MediaWikiParserFactory pf = new MediaWikiParserFactory(Language.english);
MediaWikiParser parser = pf.createParser();
//get the page 'House_(disambiguation)'
ParsedPage pp = parser.parse(wiki.getPage("House_(disambiguation)").getText());
int i = 1;
// print out all nested lists of the page
for(NestedList nl : pp.getNestedLists()){
System.out.println(i + ": \n" + outputNestedList(nl,0));
i++;
}
}
示例2: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args){
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
// Link Context (return 1 token left, 2 token right of the link)
for (Link link : pp.getLinks()) {
System.out.println(
link.getContext(1, 0) + "<" +
link.getText().toString().toUpperCase() + ">" +
link.getContext(0, 2)
);
}
}
示例3: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
//get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
//get the sections
for(Section section : pp.getSections()) {
System.out.println("section : " + section.getTitle());
System.out.println(" nr of paragraphs : " + section.nrOfParagraphs());
System.out.println(" nr of tables : " + section.nrOfTables());
System.out.println(" nr of nested lists : " + section.nrOfNestedLists());
System.out.println(" nr of definition lists: " + section.nrOfDefinitionLists());
}
}
示例4: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
/**
* Prints the targets of the internal links found in the page <i>Germany</i>.
* @param args
* @throws WikiApiException
*/
public static void main(String[] args) throws WikiApiException {
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
// only the links to other Wikipedia language editions
for (Link language : pp.getLanguages()) {
System.out.println(language.getTarget());
}
//get the internal links of each section
for (Section section : pp.getSections()){
System.out.println("Section: " + section.getTitle());
for (Link link : section.getLinks(Link.type.INTERNAL)) {
System.out.println(" " + link.getTarget());
}
}
}
示例5: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main( String[] argv ) throws Exception{
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// set up an individually parametrized MediaWikiParser
MediaWikiParserFactory pf = new MediaWikiParserFactory();
pf.getImageIdentifers().add("Image");
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse( documentText );
String outFileName = "htmlFileDemo.html";
HtmlWriter.writeFile(outFileName, "UTF8", HtmlWriter.parsedPageToHtml(pp));
System.out.println("Writing output to file: " + outFileName);
}
示例6: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args){
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
// Link Context (return 1 token left, 2 token right of the link)
for (Link link : pp.getLinks()) {
System.out.println(
link.getContext(1, 0) + "<" +
link.getText().toString().toUpperCase() + ">" +
link.getContext(0, 2)
);
}
}
示例7: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
/**
* @param args
* @throws WikiApiException
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
//get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
//get the sections
for(Section section : pp.getSections()) {
System.out.println("section : " + section.getTitle());
System.out.println(" nr of paragraphs : " + section.nrOfParagraphs());
System.out.println(" nr of tables : " + section.nrOfTables());
System.out.println(" nr of nested lists : " + section.nrOfNestedLists());
System.out.println(" nr of definition lists: " + section.nrOfDefinitionLists());
}
}
示例8: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
/**
* Prints the targets of the internal links found in the page <i>Germany</i>.
* @param args
* @throws WikiApiException
*/
public static void main(String[] args) throws WikiApiException {
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(documentText);
// only the links to other Wikipedia language editions
for (Link language : pp.getLanguages()) {
System.out.println(language.getTarget());
}
//get the internal links of each section
for (Section section : pp.getSections()){
System.out.println("Section: " + section.getTitle());
for (Link link : section.getLinks(Link.type.INTERNAL)) {
System.out.println(" " + link.getTarget());
}
}
}
示例9: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main( String[] argv ) throws Exception{
// load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
String documentText = TestFile.getFileText();
// set up an individually parametrized MediaWikiParser
MediaWikiParserFactory pf = new MediaWikiParserFactory();
pf.getImageIdentifers().add("Image");
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse( documentText );
String outFileName = "htmlFileDemo.html";
HtmlWriter.writeFile(outFileName, "UTF8", HtmlWriter.parsedPageToHtml(pp));
System.out.println("Writing output to file: " + outFileName);
}
示例10: getSectionsWithJWPL
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
/**
* Parses the Talk page using the JWPL MediaWiki Parser.
*
* @param text the talk page text with markup
* @return a list of extracted sections that contain each contain a list of paragraphs
*/
public static List<ExtractedSection> getSectionsWithJWPL(String text){
List<ExtractedSection> sections = new ArrayList<>();
MediaWikiParserFactory pf = new MediaWikiParserFactory();
pf.setCalculateSrcSpans(true);
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(text);
for(Section sec: pp.getSections()){
ExtractedSection sect = new ExtractedSection(sec.getTitle(), sec.getParagraphs());
sect.addNestedLists(sec.getNestedLists());
sections.add(sect);
}
return sections;
}
示例11: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {
//db connection settings
DatabaseConfiguration dbConfig = new DatabaseConfiguration();
dbConfig.setDatabase("DATABASE");
dbConfig.setHost("HOST");
dbConfig.setUser("USER");
dbConfig.setPassword("PASSWORD");
dbConfig.setLanguage(Language.english);
//initialize a wiki
Wikipedia wiki = new Wikipedia(dbConfig);
//get the page 'Dog'
Page p = wiki.getPage("Dog");
//get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(p.getText());
//get the sections of the page
List<Section> sections = pp.getSections();
for(Section section : sections) {
System.out.println(section.getTitle());
}
}
示例12: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {
//db connection settings
DatabaseConfiguration dbConfig = new DatabaseConfiguration();
dbConfig.setDatabase("DATABASE");
dbConfig.setHost("HOST");
dbConfig.setUser("USER");
dbConfig.setPassword("PASSWORD");
dbConfig.setLanguage(Language.english);
//initialize a wiki
Wikipedia wiki = new Wikipedia(dbConfig);
//get the page 'Dog'
Page p = wiki.getPage("Dog");
//get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory();
pf.setTemplateParserClass(FlushTemplates.class); // Filtering TEMPLATE-Elements
String IMAGE = "Image"; // Replace it with the image template name in your Wiki language edition,
// e.g. "Image" in English
// filtering Image-Elements
pf.getImageIdentifers().add(IMAGE);
// parse page text
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(p.getText());
System.out.println(pp.getText());
}
示例13: testParsedPage
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
@Test
public void testParsedPage(){
String title = "Wikipedia API";
Page p = null;
try {
p = wiki.getPage(title);
} catch (WikiApiException e) {
e.printStackTrace();
fail("A WikiApiException occured while getting the page " + title);
}
String LF = "\n";
String text = "Wikipedia API ist die wichtigste Software überhaupt." + LF +
"Wikipedia API. Nicht zu übertreffen. Unglaublich http://www.ukp.tu-darmstadt.de en:Wikipedia API";
MediaWikiParserFactory pf = new MediaWikiParserFactory(Language.english);
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(p.getText());
int i=0;
for (Link link : pp.getSection(0).getLinks()) {
if (i==0) {
assertEquals("Software", link.getText());
}
else if (i==1) {
assertEquals("Wikipedia API", link.getText());
assertEquals("JWPL", link.getTarget());
}
i++;
}
assertEquals(text, pp.getText());
}
示例14: parseInternalLinks
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static List<String> parseInternalLinks(String text) {
MediaWikiParserFactory pf = new MediaWikiParserFactory();
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(text);
List<String> internalLinks = new LinkedList<>();
if (pp != null) {
for (Link link : pp.getLinks()) {
if (link.getType() == Link.type.INTERNAL) {
internalLinks.add(link.getTarget());
}
}
}
return internalLinks;
}
示例15: main
import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParser; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {
//db connection settings
DatabaseConfiguration dbConfig = new DatabaseConfiguration();
dbConfig.setDatabase("DATABASE");
dbConfig.setHost("HOST");
dbConfig.setUser("USER");
dbConfig.setPassword("PASSWORD");
dbConfig.setLanguage(Language.english);
//initialize a wiki
Wikipedia wiki = new Wikipedia(dbConfig);
//get the page 'Dog'
Page p = wiki.getPage("Dog");
//get a ParsedPage object
MediaWikiParserFactory pf = new MediaWikiParserFactory(Language.english);
pf.setTemplateParserClass(FlushTemplates.class); // Filtering TEMPLATE-Elements
String IMAGE = "Image"; // Replace it with the image template name in your Wiki language edition,
// e.g. "Image" in English
// filtering Image-Elements
pf.getImageIdentifers().add(IMAGE);
// parse page text
MediaWikiParser parser = pf.createParser();
ParsedPage pp = parser.parse(p.getText());
System.out.println(pp.getText());
}