本文整理汇总了Java中org.jsoup.nodes.Entities.EscapeMode类的典型用法代码示例。如果您正苦于以下问题:Java EscapeMode类的具体用法?Java EscapeMode怎么用?Java EscapeMode使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
EscapeMode类属于org.jsoup.nodes.Entities包,在下文中一共展示了EscapeMode类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: cleanContent
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
* Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
* @param content html content
* @param extraTags any other tags that you may want to keep, e. g. "a"
* @return
*/
public String cleanContent(String content, String ... extraTags) {
Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
allowedTags.addTags(extraTags);
allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
allowedTags.addAttributes("img", "src", "style", "class");
if (Arrays.asList(extraTags).contains("a")) {
allowedTags.addAttributes("a", "href", "target");
}
Document dirty = Jsoup.parseBodyFragment(content, "");
Cleaner cleaner = new Cleaner(allowedTags);
Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
String safe = clean.body().html();
return safe;
}
示例2: extractPackSection
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public String extractPackSection() {
mDoc = Jsoup.parse(mHtmlStr);
mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
String pack_section = "";
// Find all information between X and X+1
Element start_elem = mDoc.select("p:contains(Packungen)").first();
Element stop_elem = mDoc.select("p:contains(Zulassungsinhaberin)").first();
// Alternative:
/*
Element start_elem = mDoc.select("p[id=section18]").first();
Element stop_elem = mDoc.select("p[id=section19]").first();
*/
Element pe = start_elem.nextElementSibling();
if (pe!=null && start_elem!=null && stop_elem!=null) {
while (pe!=stop_elem) {
System.out.println(pe.text());
pe = pe.nextElementSibling();
}
}
return pack_section;
}
示例3: htmlTextToPlainText
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
* Cleans some html text by stripping all tags but <code>br</code> and then
* unescapes named entitiesl like '"e';. brs will be replaced by
* newlines.
*
* @param htmlText
* @return
*/
String htmlTextToPlainText(final String htmlText) {
final Whitelist whitelist = Whitelist.none();
whitelist.addTags("br");
final Cleaner cleaner = new Cleaner(whitelist);
final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
cleanedDocument
.outputSettings()
.prettyPrint(false)
.escapeMode(EscapeMode.xhtml)
.charset(StandardCharsets.UTF_8);
return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}
示例4: stripXSS
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
* Strips any potential XSS threats out of the value
*
* @param value
* @return
*/
public String stripXSS(String value) {
if (StringUtils.isBlank(value)) {
return null;
}
// try {
// value = ESAPI.encoder().encodeForHTML(value);
// } catch (Exception e) {
// logger.warn(e.getMessage(),e); //
// }
// Use the ESAPI library to avoid encoded attacks.
value = ESAPI.encoder().canonicalize(value);
//
// // Avoid null characters
value = value.replaceAll("\0", "");
value = value.replaceAll("<", "& lt;").replaceAll(">", "& gt;");
value = value.replaceAll("\\(", "& #40;").replaceAll("\\)", "& #41;");
value = value.replaceAll("'", "& #39;");
value = value.replaceAll("eval\\((.*)\\)", "");
value = value.replaceAll("[\\\"\\\'][\\s]*javascript:(.*)[\\\"\\\']", "\"\"");
value = value.replaceAll("script", "");
//
// // Clean out HTML
Document.OutputSettings outputSettings = new Document.OutputSettings();
outputSettings.escapeMode(EscapeMode.xhtml);
outputSettings.prettyPrint(false);
value = Jsoup.clean(value, "", Whitelist.none(), outputSettings);
return value;
}
示例5: parseContent
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
Document parseContent(final String content) {
Document document = Jsoup.parse(content);
document.outputSettings().escapeMode(EscapeMode.xhtml);
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
// remove script tags, they are not supported in pdf and can lead to
// not well formed document (e.g. <\/script> - escaped script tag)
document.select("script").remove();
return document;
}
示例6: parse
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public static IDocument parse(String s) {
org.jsoup.nodes.Document doc = Jsoup.parse(s);
if (NOT_USE_HTML_ENCODE) {
doc.outputSettings().escapeMode(EscapeMode.xhtml);
}
return new Document(doc);
}
示例7: clean
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
* Clean HTML string and return the cleaner version.
*
* @param html Input HTML string.
* @return Cleaned version of the HTML as string.
*/
public String clean(String html)
{
// Parser str into a Document
Document doc = Jsoup.parse(html);
// Clean the document
doc = new Cleaner(wl).clean(doc);
// Adjust escape mode
doc.outputSettings().escapeMode(EscapeMode.xhtml);
// Get back the string of the Document
return doc.html();
}
示例8: ScheduleDocument
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public ScheduleDocument(){
doc = Document.createShell("");
doc.outputSettings().escapeMode(EscapeMode.xhtml);
DocumentType type = new DocumentType("html", "", "", "");
doc.prependChild(type);
doc.select("html").attr("class", "js no-touch geolocation backgroundsize csstransforms csstransforms3d audio localstorage inlinesvg pointerevents webaudio mediaqueries getusermedia");
}
示例9: addHeaderToXml
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
static String addHeaderToXml(String xml_str) {
Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>");
mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
mDoc.outputSettings().prettyPrint(true);
mDoc.outputSettings().indentAmount(4);
// Add date
Date df = new Date();
String date_str = df.toString();
mDoc.select("kompendium").first().prependElement("date");
mDoc.select("date").first().text(date_str);
// Add language
mDoc.select("date").after("<lang></lang>");
if (DB_LANGUAGE.equals("de"))
mDoc.select("lang").first().text("DE");
else if (DB_LANGUAGE.equals("fr"))
mDoc.select("lang").first().text("FR");
// Fool jsoup.parse which seems to have its own "life"
mDoc.select("tbody").unwrap();
Elements img_elems = mDoc.select("img");
for (Element img_e : img_elems) {
if (!img_e.hasAttr("src"))
img_e.unwrap();
}
mDoc.select("img").tagName("image");
String final_xml_str = mDoc.select("kompendium").first().outerHtml();
return final_xml_str;
}
示例10: extractHtmlSection
import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
static String[] extractHtmlSection(MedicalInformations.MedicalInformation m) {
// Extract section titles and section ids
MedicalInformations.MedicalInformation.Sections med_sections = m.getSections();
List<MedicalInformations.MedicalInformation.Sections.Section> med_section_list = med_sections.getSection();
Document doc = Jsoup.parse(m.getContent());
doc.outputSettings().escapeMode(EscapeMode.xhtml);
// Clean html code
HtmlUtils html_utils = new HtmlUtils(m.getContent());
html_utils.clean();
// Extract registration number (swissmedic no5)
String regnr_str = "";
if (DB_LANGUAGE.equals("de"))
regnr_str = html_utils.extractRegNrDE(m.getTitle());
else if (DB_LANGUAGE.equals("fr"))
regnr_str = html_utils.extractRegNrFR(m.getTitle());
// Sanitize html
String html_sanitized = "";
// First check for bad boys (version=1! but actually version>1!)
if (!m.getVersion().equals("1") || m.getContent().substring(0, 20).contains("xml")) {
for (int i=1; i<22; ++i) {
html_sanitized += html_utils.sanitizeSection(i, m.getTitle(), DB_LANGUAGE);
}
html_sanitized = "<div id=\"monographie\">" + html_sanitized + "</div>" ;
} else {
html_sanitized = m.getContent();
}
// Update "Packungen" section and extract therapeutisches index
List<String> mTyIndex_list = new ArrayList<String>();
String mContent_str = updateSectionPackungen(m.getTitle(), package_info, regnr_str, html_sanitized, mTyIndex_list);
// Add meta-tag and link
mContent_str = mContent_str.replaceAll("<head>", "<head>" +
"<link href=\"amiko_stylesheet.css\" rel=\"stylesheet\" type=\"text/css\"></>" +
"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">");
m.setContent(mContent_str);
// Fix problem with wrong div class in original Swissmedic file
if (DB_LANGUAGE.equals("de")) {
m.setStyle(m.getStyle().replaceAll("untertitel", "untertitle"));
m.setStyle(m.getStyle().replaceAll("untertitel1", "untertitle1"));
}
// Correct formatting error introduced by Swissmedic
m.setAuthHolder(m.getAuthHolder().replaceAll("&","&"));
// Extracts only *first* registration number
/*
List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*"));
String[] swno5_content_map = {swissmedicno5_list.get(0), mContent_str};
*/
// Extract *all* registration numbers
String[] swno5_content_map = {regnr_str, mContent_str};
return swno5_content_map; //mContent_str;
}