当前位置: 首页>>代码示例>>Java>>正文


Java MediaWikiParserFactory.createParser方法代码示例

本文整理汇总了Java中de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory.createParser方法的典型用法代码示例。如果您正苦于以下问题:Java MediaWikiParserFactory.createParser方法的具体用法?Java MediaWikiParserFactory.createParser怎么用?Java MediaWikiParserFactory.createParser使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory的用法示例。


在下文中一共展示了MediaWikiParserFactory.createParser方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {

		//db connection settings
		DatabaseConfiguration dbConfig = new DatabaseConfiguration();
	    dbConfig.setDatabase("DATABASE");
	    dbConfig.setHost("HOST");
	    dbConfig.setUser("USER");
	    dbConfig.setPassword("PASSWORD");
	    dbConfig.setLanguage(Language.english);

		//initialize a wiki
		Wikipedia wiki = new Wikipedia(dbConfig);

        MediaWikiParserFactory pf = new MediaWikiParserFactory(Language.english);
        MediaWikiParser parser = pf.createParser();

		//get the page 'House_(disambiguation)'
		ParsedPage pp = parser.parse(wiki.getPage("House_(disambiguation)").getText());

		int i = 1;
		// print out all nested lists of the page
		for(NestedList nl : pp.getNestedLists()){
			System.out.println(i + ": \n" + outputNestedList(nl,0));
			i++;
		}
	}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:27,代码来源:T6_NestedLists.java

示例2: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main(String[] args){

      // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
      String documentText = TestFile.getFileText();

      // get a ParsedPage object
      MediaWikiParserFactory pf = new MediaWikiParserFactory();
      MediaWikiParser parser = pf.createParser();
      ParsedPage pp = parser.parse(documentText);

// Link Context (return 1 token left, 2 token right of the link)
      for (Link link : pp.getLinks()) {
          System.out.println(
              link.getContext(1, 0) + "<" +
              link.getText().toString().toUpperCase() + ">" +
		link.getContext(0, 2)
          );
      }
  }
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:20,代码来源:T3_LinkContexts.java

示例3: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * @param args
 * @throws IOException
 */
public static void main(String[] args) throws IOException {

       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();

       //get a ParsedPage object
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(documentText);
	
	//get the sections
	for(Section section : pp.getSections()) {
		System.out.println("section : " + section.getTitle());
		System.out.println(" nr of paragraphs      : " + section.nrOfParagraphs());
		System.out.println(" nr of tables          : " + section.nrOfTables());
		System.out.println(" nr of nested lists    : " + section.nrOfNestedLists());
		System.out.println(" nr of definition lists: " + section.nrOfDefinitionLists());
	}
}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:24,代码来源:T1_SimpleParserDemo.java

示例4: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * Prints the targets of the internal links found in the page <i>Germany</i>.
 * @param args
 * @throws WikiApiException
 */
public static void main(String[] args) throws WikiApiException {

       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();
	
	// get a ParsedPage object
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(documentText);
	
       // only the links to other Wikipedia language editions
       for (Link language : pp.getLanguages()) {
           System.out.println(language.getTarget());
       }

       //get the internal links of each section
       for (Section section : pp.getSections()){
           System.out.println("Section: " + section.getTitle());

           for (Link link : section.getLinks(Link.type.INTERNAL)) {
               System.out.println("  " + link.getTarget());
           }
       }
   }
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:30,代码来源:T2_InternalLinks.java

示例5: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main( String[] argv ) throws Exception{
	
       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();

	// set up an individually parametrized MediaWikiParser
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	pf.getImageIdentifers().add("Image");
	MediaWikiParser parser = pf.createParser();
	
	ParsedPage pp = parser.parse( documentText );
	
       String outFileName = "htmlFileDemo.html";
	HtmlWriter.writeFile(outFileName, "UTF8", HtmlWriter.parsedPageToHtml(pp));

       System.out.println("Writing output to file: " + outFileName);
}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:18,代码来源:HtmlFileDemo.java

示例6: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main(String[] args){

      // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
      String documentText = TestFile.getFileText();
      
      // get a ParsedPage object
      MediaWikiParserFactory pf = new MediaWikiParserFactory();
      MediaWikiParser parser = pf.createParser();
      ParsedPage pp = parser.parse(documentText);

// Link Context (return 1 token left, 2 token right of the link)
      for (Link link : pp.getLinks()) {
          System.out.println(
              link.getContext(1, 0) + "<" +
              link.getText().toString().toUpperCase() + ">" +
		link.getContext(0, 2)
          );
      }
  }
 
开发者ID:fauconnier,项目名称:LaToe,代码行数:20,代码来源:T3_LinkContexts.java

示例7: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * @param args
 * @throws WikiApiException 
 * @throws IOException 
 */
public static void main(String[] args) throws IOException {

       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();

       //get a ParsedPage object
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(documentText);
	
	//get the sections
	for(Section section : pp.getSections()) {
		System.out.println("section : " + section.getTitle());
		System.out.println(" nr of paragraphs      : " + section.nrOfParagraphs());
		System.out.println(" nr of tables          : " + section.nrOfTables());
		System.out.println(" nr of nested lists    : " + section.nrOfNestedLists());
		System.out.println(" nr of definition lists: " + section.nrOfDefinitionLists());
	}
}
 
开发者ID:fauconnier,项目名称:LaToe,代码行数:25,代码来源:T1_SimpleParserDemo.java

示例8: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * Prints the targets of the internal links found in the page <i>Germany</i>.
 * @param args
 * @throws WikiApiException 
 */
public static void main(String[] args) throws WikiApiException {

       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();
	
	// get a ParsedPage object
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(documentText);
	
       // only the links to other Wikipedia language editions
       for (Link language : pp.getLanguages()) {
           System.out.println(language.getTarget());
       }
   
       //get the internal links of each section
       for (Section section : pp.getSections()){
           System.out.println("Section: " + section.getTitle());

           for (Link link : section.getLinks(Link.type.INTERNAL)) {
               System.out.println("  " + link.getTarget());
           }
       }
   }
 
开发者ID:fauconnier,项目名称:LaToe,代码行数:30,代码来源:T2_InternalLinks.java

示例9: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main( String[] argv ) throws Exception{
	
       // load a sample document (the contents are equal to "DarmstadtWikipediaArticle.txt")
       String documentText = TestFile.getFileText();
       
	// set up an individually parametrized MediaWikiParser
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	pf.getImageIdentifers().add("Image");
	MediaWikiParser parser = pf.createParser();
	
	ParsedPage pp = parser.parse( documentText );
	
       String outFileName = "htmlFileDemo.html";
	HtmlWriter.writeFile(outFileName, "UTF8", HtmlWriter.parsedPageToHtml(pp));
       
       System.out.println("Writing output to file: " + outFileName);
}
 
开发者ID:fauconnier,项目名称:LaToe,代码行数:18,代码来源:HtmlFileDemo.java

示例10: getSectionsWithJWPL

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * Parses the Talk page using the JWPL MediaWiki Parser.
 * 
 * @param text the talk page text with markup
 * @return a list of extracted sections that contain each contain a list of paragraphs 
 */
public static List<ExtractedSection> getSectionsWithJWPL(String text){
	List<ExtractedSection> sections = new ArrayList<>();
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	pf.setCalculateSrcSpans(true);
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(text);
	for(Section sec: pp.getSections()){
		ExtractedSection sect = new ExtractedSection(sec.getTitle(), sec.getParagraphs());
		sect.addNestedLists(sec.getNestedLists());			
		sections.add(sect);
	}
	return sections;		
}
 
开发者ID:DiscourseDB,项目名称:discoursedb-core,代码行数:20,代码来源:WikitextParseUtils.java

示例11: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {
	//db connection settings
	DatabaseConfiguration dbConfig = new DatabaseConfiguration();
       dbConfig.setDatabase("DATABASE");
       dbConfig.setHost("HOST");
       dbConfig.setUser("USER");
       dbConfig.setPassword("PASSWORD");
       dbConfig.setLanguage(Language.english);
	
	//initialize a wiki
	Wikipedia wiki = new Wikipedia(dbConfig);
	
	//get the page 'Dog'
	Page p = wiki.getPage("Dog");
	
	//get a ParsedPage object
	MediaWikiParserFactory pf = new MediaWikiParserFactory();
	MediaWikiParser parser = pf.createParser();
	ParsedPage pp = parser.parse(p.getText());

	//get the sections of the page
	List<Section> sections = pp.getSections();
	
	for(Section section : sections) {
           System.out.println(section.getTitle());
       }
}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:28,代码来源:T4_InterfacingWithWikipedia.java

示例12: main

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public static void main(String[] args) throws WikiApiException {

		//db connection settings
		DatabaseConfiguration dbConfig = new DatabaseConfiguration();
	    dbConfig.setDatabase("DATABASE");
	    dbConfig.setHost("HOST");
	    dbConfig.setUser("USER");
	    dbConfig.setPassword("PASSWORD");
	    dbConfig.setLanguage(Language.english);

		//initialize a wiki
		Wikipedia wiki = new Wikipedia(dbConfig);
		
		//get the page 'Dog'
		Page p = wiki.getPage("Dog");
		
		//get a ParsedPage object
		MediaWikiParserFactory pf = new MediaWikiParserFactory();
		pf.setTemplateParserClass(FlushTemplates.class); // Filtering TEMPLATE-Elements
		
		String IMAGE = "Image"; // Replace it with the image template name in your Wiki language edition,
								// e.g. "Image" in English
		
		// filtering Image-Elements
		pf.getImageIdentifers().add(IMAGE);	
		
		// parse page text
		MediaWikiParser parser = pf.createParser();
		ParsedPage pp = parser.parse(p.getText());
		
		System.out.println(pp.getText());	
	}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:33,代码来源:T5_CleaningTemplateImage.java

示例13: testParsedPage

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
@Test
public void testParsedPage(){
       String title = "Wikipedia API";
       Page p = null;
       try {
           p = wiki.getPage(title);
       } catch (WikiApiException e) {
           e.printStackTrace();
           fail("A WikiApiException occured while getting the page " + title);
       }

       String LF = "\n";
       String text = "Wikipedia API ist die wichtigste Software überhaupt." + LF +
       	"Wikipedia API. Nicht zu übertreffen. Unglaublich http://www.ukp.tu-darmstadt.de en:Wikipedia API";


       MediaWikiParserFactory pf = new MediaWikiParserFactory(Language.english);
       MediaWikiParser parser = pf.createParser();

	ParsedPage pp = parser.parse(p.getText());


       int i=0;
       for (Link link : pp.getSection(0).getLinks()) {
           if (i==0) {
               assertEquals("Software", link.getText());
           }
           else if (i==1) {
               assertEquals("Wikipedia API", link.getText());
               assertEquals("JWPL", link.getTarget());
           }
           i++;
       }
       assertEquals(text, pp.getText());
}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:36,代码来源:ParsedPageTest.java

示例14: WikipediaTemplateInfoGenerator

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
public WikipediaTemplateInfoGenerator(DatabaseConfiguration dbc, int pageBuffer,
		String charset, String outputPath, long maxAllowedPacket,
		TemplateFilter pageFilter, TemplateFilter revisionFilter,
		GeneratorMode mode)
	throws WikiApiException
{
	this.dbConf = dbc;

	this.pageBuffer=pageBuffer;

	MediaWikiParserFactory pf = new MediaWikiParserFactory(
			dbc.getLanguage());
	pf.setTemplateParserClass(ShowTemplateNamesAndParameters.class);
	parser = pf.createParser();

	// this.maxAllowedPacket = maxAllowedPacket;
	this.charset = charset;
	this.outputPath = outputPath;

	// Filters
	this.pageFilter = pageFilter;
	this.revisionFilter = revisionFilter;
	//

	this.mode = mode;

}
 
开发者ID:dkpro,项目名称:dkpro-jwpl,代码行数:28,代码来源:WikipediaTemplateInfoGenerator.java

示例15: initialize

import de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory; //导入方法依赖的package包/类
/**
 * @see org.apache.uima.analysis_component.AnalysisComponent_ImplBase#initialize(org.apache.uima.UimaContext)
 */
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
	super.initialize(context);
	
	// read AE parameters
	String prefixCategory = (String)context.getConfigParameterValue(PARAM_NAME_PREFIX_CATEGORY);
	if (prefixCategory == null || "".equals(prefixCategory)) {
		log.warn("Parameter " + PARAM_NAME_PREFIX_CATEGORY + " not set for UIMA AE " + MediaWikiMarkupDetector.class.getName() + ". Defaulting to 'Category'.");
		prefixCategory = "Category";
	}
	
	prefixCategoryLength = prefixCategory.length();

	String prefixImage = (String)context.getConfigParameterValue(PARAM_NAME_PREFIX_IMAGE);
	if (prefixImage == null || "".equals(prefixImage)) {
		log.warn("Parameter " + PARAM_NAME_PREFIX_IMAGE + " not set for UIMA AE " + MediaWikiMarkupDetector.class.getName() + ". Defaulting to 'Image'.");
		prefixImage = "Image";
	}
	
	
	MediaWikiParserFactory parserFactory = new MediaWikiParserFactory();
	parserFactory.setCalculateSrcSpans(true);
	parserFactory.setImageIdentifers(Arrays.asList(prefixImage));
	parserFactory.setCategoryIdentifers(Arrays.asList(prefixCategory));
	parserFactory.setShowImageText(true);
	parser = parserFactory.createParser();
}
 
开发者ID:ag-csw,项目名称:ExpertFinder,代码行数:31,代码来源:MediaWikiMarkupDetector.java


注:本文中的de.tudarmstadt.ukp.wikipedia.parser.mediawiki.MediaWikiParserFactory.createParser方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。