本文整理汇总了Java中org.jsoup.safety.Whitelist.addTags方法的典型用法代码示例。如果您正苦于以下问题:Java Whitelist.addTags方法的具体用法?Java Whitelist.addTags怎么用?Java Whitelist.addTags使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.safety.Whitelist
的用法示例。
在下文中一共展示了Whitelist.addTags方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: cleanContent
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
/**
* Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
* @param content html content
* @param extraTags any other tags that you may want to keep, e. g. "a"
* @return
*/
public String cleanContent(String content, String ... extraTags) {
Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
allowedTags.addTags(extraTags);
allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
allowedTags.addAttributes("img", "src", "style", "class");
if (Arrays.asList(extraTags).contains("a")) {
allowedTags.addAttributes("a", "href", "target");
}
Document dirty = Jsoup.parseBodyFragment(content, "");
Cleaner cleaner = new Cleaner(allowedTags);
Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
String safe = clean.body().html();
return safe;
}
示例2: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace(" 廣告","");
rs = rs.replace("data-original=","src=");
//rs = rs.replace("<span>","<p>");
//rs = rs.replace("</span>","</p>");
rs = rs.replace("相關新聞", "<!--");
Whitelist wlist = new Whitelist();
wlist.addTags("p", "span");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例3: htmlTextToPlainText
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
/**
* Cleans some html text by stripping all tags but <code>br</code> and then
* unescapes named entitiesl like '"e';. brs will be replaced by
* newlines.
*
* @param htmlText
* @return
*/
String htmlTextToPlainText(final String htmlText) {
final Whitelist whitelist = Whitelist.none();
whitelist.addTags("br");
final Cleaner cleaner = new Cleaner(whitelist);
final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
cleanedDocument
.outputSettings()
.prettyPrint(false)
.escapeMode(EscapeMode.xhtml)
.charset(StandardCharsets.UTF_8);
return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}
示例4: process
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
@Override
public String process(String html) {
// Parse str into a Document
Document doc = Jsoup.parseBodyFragment(html);
doc.select("nav").remove();
doc.select("div#pdfurl").remove();
// white list to clean html
Whitelist wl = Whitelist.relaxed();
wl.addTags("div", "span", "p", "h1", "h2", "h3", "ul", "ol", "li", "a", "img");
wl.preserveRelativeLinks(true);
wl.addAttributes("img", "src");
wl.addAttributes("a", "href");
// perform cleaning
Document cleaned = new Cleaner(wl).clean(doc);
cleaned.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
// Remove empty elements
Set<String> removable = new HashSet<>(Arrays.asList("div", "span", "strong", "p", "h1", "h2", "h3", "ul", "ol", "li", "a"));
cleaned.select("p:matchesOwn((?is) )").remove();
// For each element in the cleaned document
for (Element el : cleaned.getAllElements()) {
if (el.children().isEmpty() && (!el.hasText() || el.text().replaceAll("\u00a0", "").trim().equals(""))) {
// Element is empty, check if should be removed
if (removable.contains(el.tagName())) el.remove();
}
}
// return html for display
return cleaned.html();
}
示例5: buildWhiteList
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
private static synchronized Whitelist buildWhiteList() {
Whitelist whitelist = new Whitelist();
whitelist.addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1",
"h2", "h3", "h4", "h5", "h6", "i", "iframe", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub", "sup",
"table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul");
whitelist.addAttributes("div", "dir");
whitelist.addAttributes("pre", "dir");
whitelist.addAttributes("code", "dir");
whitelist.addAttributes("table", "dir");
whitelist.addAttributes("p", "dir");
whitelist.addAttributes("a", "href", "title");
whitelist.addAttributes("blockquote", "cite");
whitelist.addAttributes("col", "span", "width");
whitelist.addAttributes("colgroup", "span", "width");
whitelist.addAttributes("iframe", "src", "height", "width", "allowfullscreen", "frameborder", "style");
whitelist.addAttributes("img", "align", "alt", "height", "src", "title", "width", "style");
whitelist.addAttributes("ol", "start", "type");
whitelist.addAttributes("q", "cite");
whitelist.addAttributes("table", "border", "bordercolor", "summary", "width");
whitelist.addAttributes("td", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "width");
whitelist.addAttributes("th", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "scope", "width");
whitelist.addAttributes("ul", "type");
whitelist.addProtocols("a", "href", "ftp", "http", "https", "magnet", "mailto");
whitelist.addProtocols("blockquote", "cite", "http", "https");
whitelist.addProtocols("img", "src", "http", "https");
whitelist.addProtocols("q", "cite", "http", "https");
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addEnforcedAttribute("a", "rel", "noreferrer");
return whitelist;
}
示例6: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace("<div class=\"social_bar\"> ","<!--");
rs = rs.replace("<div id=\"set_font_size\" class=\"only_web\">","<!--");
rs = rs.replace("<a href=\"####\" class=\"photo_pop_icon\">","<!--");
rs = rs.replace("<div class=\"photo_pop\">","<!--");
Whitelist wlist = new Whitelist();
wlist.addTags("p","h4");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例7: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace("<img src=\"https://d2e7nuz2r6mjca.cloudfront.net/assets/img/logo_bottom.png\" class=\"logo\">","");
// rs = rs.replace("<img src=\"uploads","<img src=\"http://www.am730.com.hk/uploads");
Whitelist wlist = new Whitelist();
wlist.addTags("p","br","strong");
//wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例8: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace("<br>", "<p>");
rs = rs.replace("src=\"//", "src=\"http://");
Whitelist wlist = new Whitelist();
wlist.addTags("txt","p");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例9: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
Whitelist wlist = new Whitelist();
wlist.addTags("p","figcaption");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例10: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
Whitelist wlist = new Whitelist();
wlist.addTags("p");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例11: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
Whitelist wlist = new Whitelist();
wlist.addTags("p");
//wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例12: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
Whitelist wlist = new Whitelist();
wlist.addTags("txt","p");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例13: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
//rs = rs.replace("相關報導", "<!--");
Whitelist wlist = new Whitelist();
wlist.addTags("p");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例14: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace("src=","xxx=");
rs = rs.replace("src-lg=","src=");
Whitelist wlist = new Whitelist();
wlist.addTags("p","ul","li","div");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}
示例15: cleaner
import org.jsoup.safety.Whitelist; //导入方法依赖的package包/类
protected String cleaner(String rs) {
rs = rs.replace("<br>", "<p>");
rs = rs.replace("※你可能還想看:","<!--");
Whitelist wlist = new Whitelist();
wlist.addTags("p");
wlist.addTags("table","tbody","tr","td");
wlist.addTags("img").addAttributes("img", "src");
return Jsoup.clean(rs, wlist);
}