本文整理汇总了Java中org.ccil.cowan.tagsoup.HTMLSchema类的典型用法代码示例。如果您正苦于以下问题:Java HTMLSchema类的具体用法?Java HTMLSchema怎么用?Java HTMLSchema使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HTMLSchema类属于org.ccil.cowan.tagsoup包,在下文中一共展示了HTMLSchema类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: xmlizeInputStream
import org.ccil.cowan.tagsoup.HTMLSchema; //导入依赖的package包/类
static InputStream xmlizeInputStream(InputStream original) {
try {
ByteArrayOutputStream out = new ByteArrayOutputStream();
HTMLSchema schema = new HTMLSchema();
XMLReader reader = new Parser();
//TODO walk through the javadoc and tune more settings
//see tagsoup javadoc for details
reader.setProperty(Parser.schemaProperty, schema);
reader.setFeature(Parser.bogonsEmptyFeature, false);
reader.setFeature(Parser.ignorableWhitespaceFeature, true);
reader.setFeature(Parser.ignoreBogonsFeature, false);
Writer writeger = new OutputStreamWriter(out);
XMLWriter x = new XMLWriter(writeger);
reader.setContentHandler(x);
InputSource s = new InputSource(original);
reader.parse(s);
original.close();
return new ByteArrayInputStream(out.toByteArray());
} catch (Exception ex) {
ex.printStackTrace();
return original;
}
}
示例2: generate
import org.ccil.cowan.tagsoup.HTMLSchema; //导入依赖的package包/类
/** Generate a clean HTML from a given wikitext containing improper HTML tags
* (e.g tags that do not opened or closed properly) using TagSoup library.
* TagSoup parser removes a close tag without an open tag, and generates
* a missing close tag for an open tag without a close tag. However, the part
* of the text nested by the generated tags may not be correct.
*
* @param wikitext
* @param segment
* @return wikitext
* @throws IOException
* @throws SAXException
*/
public String generate(String wikitext, boolean segment) throws IOException, SAXException{
theSchema = new HTMLSchema();
XMLReader r = new Parser();
r.setFeature(Parser.namespacesFeature, false); // omit namespace
r.setProperty(Parser.schemaProperty, theSchema);
Writer w = new StringWriter();
ContentHandler h = chooseContentHandler(w);
r.setContentHandler(h);
// Do process per paragraph because the correction of improper tags
// will be accumulated and repeated until the end of the given text.
if (segment) {
for (String p : wikitext.split("\n\n")){
r.parse(new InputSource( new ByteArrayInputStream(p.getBytes())));
}
}
else{
r.parse(new InputSource( new ByteArrayInputStream(wikitext.getBytes())));
}
String cleanWikitext = w.toString();
cleanWikitext = StringUtils.replaceEach(cleanWikitext,
new String[] {"<html><body>", "</body></html>"},
new String[] {"", "\n"});
return cleanWikitext;
}
示例3: getParsingSchema
import org.ccil.cowan.tagsoup.HTMLSchema; //导入依赖的package包/类
public Schema getParsingSchema() {
if (parsingSchema == null) {
this.parsingSchema = new HTMLSchema();
}
return parsingSchema;
}
示例4: getSchema
import org.ccil.cowan.tagsoup.HTMLSchema; //导入依赖的package包/类
public static HTMLSchema getSchema() {
return schema;
}