当前位置: 首页>>代码示例>>Java>>正文


Java EscapeMode类代码示例

本文整理汇总了Java中org.jsoup.nodes.Entities.EscapeMode的典型用法代码示例。如果您正苦于以下问题:Java EscapeMode类的具体用法?Java EscapeMode怎么用?Java EscapeMode使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


EscapeMode类属于org.jsoup.nodes.Entities包,在下文中一共展示了EscapeMode类的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: cleanContent

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
 * Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
 * @param content html content
 * @param extraTags any other tags that you may want to keep, e. g. "a"
 * @return
 */
public String cleanContent(String content, String ... extraTags) {
	Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
	allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
	allowedTags.addTags(extraTags);
	allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
	allowedTags.addAttributes("img", "src", "style", "class"); 
	if (Arrays.asList(extraTags).contains("a")) {
		allowedTags.addAttributes("a", "href", "target"); 
	}
	Document dirty = Jsoup.parseBodyFragment(content, "");
	Cleaner cleaner = new Cleaner(allowedTags);
	Document clean = cleaner.clean(dirty);
	clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
	String safe = clean.body().html();
	return safe;
}
 
开发者ID:xtianus,项目名称:yadaframework,代码行数:23,代码来源:YadaWebUtil.java

示例2: extractPackSection

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public String extractPackSection() {	
	mDoc = Jsoup.parse(mHtmlStr);
	mDoc.outputSettings().escapeMode(EscapeMode.xhtml);	
	
	String pack_section = "";
	// Find all information between X and X+1
	Element start_elem = mDoc.select("p:contains(Packungen)").first();			
	Element stop_elem = mDoc.select("p:contains(Zulassungsinhaberin)").first();	
	// Alternative:
	/*
	Element start_elem = mDoc.select("p[id=section18]").first();			
	Element stop_elem = mDoc.select("p[id=section19]").first();	
	*/
	Element pe = start_elem.nextElementSibling(); 
	if (pe!=null && start_elem!=null && stop_elem!=null) {
		while (pe!=stop_elem) {
			System.out.println(pe.text());
			pe = pe.nextElementSibling();			
		}
	}		
	return pack_section;
}
 
开发者ID:zdavatz,项目名称:aips2xml,代码行数:23,代码来源:HtmlUtils.java

示例3: htmlTextToPlainText

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
 * Cleans some html text by stripping all tags but <code>br</code> and then
 * unescapes named entitiesl like '&quote';. brs will be replaced by
 * newlines.
 *
 * @param htmlText
 * @return
 */
String htmlTextToPlainText(final String htmlText) {
    final Whitelist whitelist = Whitelist.none();
    whitelist.addTags("br");
    final Cleaner cleaner = new Cleaner(whitelist);
    final Document cleanedDocument = cleaner.clean(Jsoup.parse(htmlText));
    cleanedDocument
            .outputSettings()
            .prettyPrint(false)
            .escapeMode(EscapeMode.xhtml)
            .charset(StandardCharsets.UTF_8);
    return Parser.unescapeEntities(cleanedDocument.body().html().trim(), true).replaceAll("<br(?: ?/)?>", "\r\n");
}
 
开发者ID:EuregJUG-Maas-Rhine,项目名称:site,代码行数:21,代码来源:RegistrationService.java

示例4: stripXSS

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
 * Strips any potential XSS threats out of the value
 *
 * @param value
 * @return
 */
public String stripXSS(String value) {
	if (StringUtils.isBlank(value)) {
		return null;
	}
	// try {
	// value = ESAPI.encoder().encodeForHTML(value);
	// } catch (Exception e) {
	// logger.warn(e.getMessage(),e); //
	// }

	// Use the ESAPI library to avoid encoded attacks.
	value = ESAPI.encoder().canonicalize(value);
	//
	// // Avoid null characters
	value = value.replaceAll("\0", "");
	value = value.replaceAll("<", "& lt;").replaceAll(">", "& gt;");
	value = value.replaceAll("\\(", "& #40;").replaceAll("\\)", "& #41;");
	value = value.replaceAll("'", "& #39;");
	value = value.replaceAll("eval\\((.*)\\)", "");
	value = value.replaceAll("[\\\"\\\'][\\s]*javascript:(.*)[\\\"\\\']", "\"\"");
	value = value.replaceAll("script", "");
	//
	// // Clean out HTML
	Document.OutputSettings outputSettings = new Document.OutputSettings();
	outputSettings.escapeMode(EscapeMode.xhtml);
	outputSettings.prettyPrint(false);
	value = Jsoup.clean(value, "", Whitelist.none(), outputSettings);
	return value;
}
 
开发者ID:xuegongzi,项目名称:rabbitframework,代码行数:36,代码来源:XSSFilter.java

示例5: parseContent

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
Document parseContent(final String content) {
    Document document = Jsoup.parse(content);
    document.outputSettings().escapeMode(EscapeMode.xhtml);
    document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);

    // remove script tags, they are not supported in pdf and can lead to
    // not well formed document (e.g. <\/script> - escaped script tag)
    document.select("script").remove();

    return document;
}
 
开发者ID:aleksandr-m,项目名称:struts2-pdfstream,代码行数:12,代码来源:PdfStreamResult.java

示例6: parse

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public static IDocument parse(String s) {
	org.jsoup.nodes.Document doc = Jsoup.parse(s);
	
	if (NOT_USE_HTML_ENCODE) {
		doc.outputSettings().escapeMode(EscapeMode.xhtml);
	}

	return new Document(doc);
}
 
开发者ID:Taulukko,项目名称:taulukko-commons-parsers,代码行数:10,代码来源:HTMLParser.java

示例7: clean

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
/**
 * Clean HTML string and return the cleaner version.
 * 
 * @param html Input HTML string.
 * @return Cleaned version of the HTML as string.
 */
public String clean(String html)
{
	// Parser str into a Document
	Document doc = Jsoup.parse(html);
	// Clean the document
	doc = new Cleaner(wl).clean(doc);
	// Adjust escape mode
	doc.outputSettings().escapeMode(EscapeMode.xhtml);

	// Get back the string of the Document
	return doc.html();
}
 
开发者ID:emir-munoz,项目名称:uraptor,代码行数:19,代码来源:HtmlCleaner.java

示例8: ScheduleDocument

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
public ScheduleDocument(){
	doc = Document.createShell("");
	doc.outputSettings().escapeMode(EscapeMode.xhtml);
	DocumentType type = new DocumentType("html", "", "", "");
	doc.prependChild(type);
	doc.select("html").attr("class", "js no-touch geolocation backgroundsize csstransforms csstransforms3d audio localstorage inlinesvg pointerevents webaudio mediaqueries getusermedia");
	
}
 
开发者ID:arscyper,项目名称:adan,代码行数:9,代码来源:ScheduleDocument.java

示例9: addHeaderToXml

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
static String addHeaderToXml(String xml_str) {	
	Document mDoc = Jsoup.parse("<kompendium>\n" + xml_str + "</kompendium>");
	mDoc.outputSettings().escapeMode(EscapeMode.xhtml);
	mDoc.outputSettings().prettyPrint(true);
	mDoc.outputSettings().indentAmount(4);
	
	// Add date
	Date df = new Date();
	String date_str = df.toString();
	mDoc.select("kompendium").first().prependElement("date");
	mDoc.select("date").first().text(date_str);
	// Add language
	mDoc.select("date").after("<lang></lang>");
	if (DB_LANGUAGE.equals("de"))
		mDoc.select("lang").first().text("DE");
	else if (DB_LANGUAGE.equals("fr"))
		mDoc.select("lang").first().text("FR");

	// Fool jsoup.parse which seems to have its own "life" 
	mDoc.select("tbody").unwrap();
	Elements img_elems = mDoc.select("img");
	for (Element img_e : img_elems) {
		if (!img_e.hasAttr("src"))
			img_e.unwrap();
	}
	mDoc.select("img").tagName("image");
	
	String final_xml_str = mDoc.select("kompendium").first().outerHtml();		
	
	return final_xml_str;
}
 
开发者ID:zdavatz,项目名称:aips2xml,代码行数:32,代码来源:Aips2Xml.java

示例10: extractHtmlSection

import org.jsoup.nodes.Entities.EscapeMode; //导入依赖的package包/类
static String[] extractHtmlSection(MedicalInformations.MedicalInformation m) {	
	// Extract section titles and section ids
	MedicalInformations.MedicalInformation.Sections med_sections = m.getSections();
	List<MedicalInformations.MedicalInformation.Sections.Section> med_section_list = med_sections.getSection();

	Document doc = Jsoup.parse(m.getContent());
	doc.outputSettings().escapeMode(EscapeMode.xhtml);
	
	// Clean html code
	HtmlUtils html_utils = new HtmlUtils(m.getContent());
	html_utils.clean();					
	
	// Extract registration number (swissmedic no5)
	String regnr_str = "";
	if (DB_LANGUAGE.equals("de"))
		regnr_str = html_utils.extractRegNrDE(m.getTitle());
	else if (DB_LANGUAGE.equals("fr"))
		regnr_str = html_utils.extractRegNrFR(m.getTitle());
	
	// Sanitize html
	String html_sanitized = "";								
	// First check for bad boys (version=1! but actually version>1!)
	if (!m.getVersion().equals("1") || m.getContent().substring(0, 20).contains("xml")) {
		for (int i=1; i<22; ++i) {
			html_sanitized += html_utils.sanitizeSection(i, m.getTitle(), DB_LANGUAGE);
		}
		html_sanitized = "<div id=\"monographie\">" + html_sanitized + "</div>" ;
	} else {
		html_sanitized = m.getContent();
	}
	
	// Update "Packungen" section and extract therapeutisches index
	List<String> mTyIndex_list = new ArrayList<String>();						
	String mContent_str = updateSectionPackungen(m.getTitle(), package_info, regnr_str, html_sanitized, mTyIndex_list);
	
	// Add meta-tag and link
	mContent_str = mContent_str.replaceAll("<head>", "<head>" +
			"<link href=\"amiko_stylesheet.css\" rel=\"stylesheet\" type=\"text/css\"></>" +
			"<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">");

	m.setContent(mContent_str);		
	
	// Fix problem with wrong div class in original Swissmedic file
	if (DB_LANGUAGE.equals("de")) {
		m.setStyle(m.getStyle().replaceAll("untertitel", "untertitle"));
		m.setStyle(m.getStyle().replaceAll("untertitel1", "untertitle1"));
	}
	
	// Correct formatting error introduced by Swissmedic
	m.setAuthHolder(m.getAuthHolder().replaceAll("&#038;","&"));
	
	// Extracts only *first* registration number
	/*
	List<String> swissmedicno5_list = Arrays.asList(regnr_str.split("\\s*,\\s*"));		
	String[] swno5_content_map = {swissmedicno5_list.get(0), mContent_str};
	*/
	// Extract *all* registration numbers
	String[] swno5_content_map = {regnr_str, mContent_str};
	
	return swno5_content_map; //mContent_str;
}
 
开发者ID:zdavatz,项目名称:aips2xml,代码行数:62,代码来源:Aips2Xml.java


注:本文中的org.jsoup.nodes.Entities.EscapeMode类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。