本文整理匯總了Java中org.jsoup.safety.Whitelist.addAttributes方法的典型用法代碼示例。如果您正苦於以下問題:Java Whitelist.addAttributes方法的具體用法?Java Whitelist.addAttributes怎麽用?Java Whitelist.addAttributes使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.jsoup.safety.Whitelist
的用法示例。
在下文中一共展示了Whitelist.addAttributes方法的4個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: cleanContent
import org.jsoup.safety.Whitelist; //導入方法依賴的package包/類
/**
* Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
* @param content html content
* @param extraTags any other tags that you may want to keep, e. g. "a"
* @return
*/
public String cleanContent(String content, String ... extraTags) {
Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
allowedTags.addTags(extraTags);
allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
allowedTags.addAttributes("img", "src", "style", "class");
if (Arrays.asList(extraTags).contains("a")) {
allowedTags.addAttributes("a", "href", "target");
}
Document dirty = Jsoup.parseBodyFragment(content, "");
Cleaner cleaner = new Cleaner(allowedTags);
Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
String safe = clean.body().html();
return safe;
}
示例2: process
import org.jsoup.safety.Whitelist; //導入方法依賴的package包/類
@Override
public String process(String html) {
// Parse str into a Document
Document doc = Jsoup.parseBodyFragment(html);
doc.select("nav").remove();
doc.select("div#pdfurl").remove();
// white list to clean html
Whitelist wl = Whitelist.relaxed();
wl.addTags("div", "span", "p", "h1", "h2", "h3", "ul", "ol", "li", "a", "img");
wl.preserveRelativeLinks(true);
wl.addAttributes("img", "src");
wl.addAttributes("a", "href");
// perform cleaning
Document cleaned = new Cleaner(wl).clean(doc);
cleaned.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
// Remove empty elements
Set<String> removable = new HashSet<>(Arrays.asList("div", "span", "strong", "p", "h1", "h2", "h3", "ul", "ol", "li", "a"));
cleaned.select("p:matchesOwn((?is) )").remove();
// For each element in the cleaned document
for (Element el : cleaned.getAllElements()) {
if (el.children().isEmpty() && (!el.hasText() || el.text().replaceAll("\u00a0", "").trim().equals(""))) {
// Element is empty, check if should be removed
if (removable.contains(el.tagName())) el.remove();
}
}
// return html for display
return cleaned.html();
}
示例3: buildWhiteList
import org.jsoup.safety.Whitelist; //導入方法依賴的package包/類
private static synchronized Whitelist buildWhiteList() {
Whitelist whitelist = new Whitelist();
whitelist.addTags("a", "b", "blockquote", "br", "caption", "cite", "code", "col", "colgroup", "dd", "div", "dl", "dt", "em", "h1",
"h2", "h3", "h4", "h5", "h6", "i", "iframe", "img", "li", "ol", "p", "pre", "q", "small", "strike", "strong", "sub", "sup",
"table", "tbody", "td", "tfoot", "th", "thead", "tr", "u", "ul");
whitelist.addAttributes("div", "dir");
whitelist.addAttributes("pre", "dir");
whitelist.addAttributes("code", "dir");
whitelist.addAttributes("table", "dir");
whitelist.addAttributes("p", "dir");
whitelist.addAttributes("a", "href", "title");
whitelist.addAttributes("blockquote", "cite");
whitelist.addAttributes("col", "span", "width");
whitelist.addAttributes("colgroup", "span", "width");
whitelist.addAttributes("iframe", "src", "height", "width", "allowfullscreen", "frameborder", "style");
whitelist.addAttributes("img", "align", "alt", "height", "src", "title", "width", "style");
whitelist.addAttributes("ol", "start", "type");
whitelist.addAttributes("q", "cite");
whitelist.addAttributes("table", "border", "bordercolor", "summary", "width");
whitelist.addAttributes("td", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "width");
whitelist.addAttributes("th", "border", "bordercolor", "abbr", "axis", "colspan", "rowspan", "scope", "width");
whitelist.addAttributes("ul", "type");
whitelist.addProtocols("a", "href", "ftp", "http", "https", "magnet", "mailto");
whitelist.addProtocols("blockquote", "cite", "http", "https");
whitelist.addProtocols("img", "src", "http", "https");
whitelist.addProtocols("q", "cite", "http", "https");
whitelist.addEnforcedAttribute("a", "target", "_blank");
whitelist.addEnforcedAttribute("a", "rel", "noreferrer");
return whitelist;
}
示例4: getData
import org.jsoup.safety.Whitelist; //導入方法依賴的package包/類
/**
* Make the query to google and return the data.
*
* @param query
* textfield for google
* @return webpage in Document format
*/
private Document getData(String query) throws CaptchaException, EmptyQueryException, UnsupportedEncodingException {
if (this.query.isEmpty() || this.query == null) {
throw new EmptyQueryException();
}
Connection conn = null;
Document doc = null;
String request = "https://www.google.com/search?q=" + URLEncoder.encode( stripXSS(query), "UTF-8");
if(!tokenCookie.isEmpty()){
request = request + "&google_abuse=" + URLEncoder.encode(tokenCookie, "UTF-8");
}
try {
conn = Jsoup
.connect(request)
.method(Method.GET)
.userAgent("Mozilla/5.0 (Windows NT 6.3; WOW64; rv:36.0) Gecko/20100101 Firefox/48.0")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Cookie", tokenCookie)
.header("Connection", "keep-alive")
.ignoreHttpErrors(true)
.timeout(5000);
if(!referer.isEmpty()){
conn.header("Referer", referer);
}
Connection.Response response = conn.execute();
if (response.statusCode() == 503) {
referer = response.url().toString();
idCaptcha = getIDCaptcha(response.parse());
getCaptcha("https://ipv4.google.com/sorry/image?id=" + idCaptcha + "&hl=es&" + referer.substring(referer.indexOf('?')+1));
throw new CaptchaException();
}
doc = Jsoup.parse(response.body());
// Clean the response
Whitelist wl = new Whitelist().basic();
wl.addAttributes("span", "class");
Cleaner clean = new Cleaner(wl);
doc = clean.clean(doc);
} catch (IOException e) {
//System.out.println(e.getMessage());
e.printStackTrace();
}
return doc;
}