本文整理汇总了Java中org.jsoup.Jsoup.parseBodyFragment方法的典型用法代码示例。如果您正苦于以下问题:Java Jsoup.parseBodyFragment方法的具体用法?Java Jsoup.parseBodyFragment怎么用?Java Jsoup.parseBodyFragment使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.Jsoup
的用法示例。
在下文中一共展示了Jsoup.parseBodyFragment方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parseZhihuTopics1
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void parseZhihuTopics1(Page page, Result result) {
String json = page.getContent();
JSONObject object = JSON.parseObject(json);
JSONArray array = object.getJSONArray("msg");
if(array.size()==0) {
result.setSkip(true);
return;
}
for (int i = 0; i < array.size(); i++) {
String topicStr = array.getString(i);
Document doc = Jsoup.parseBodyFragment(topicStr);
Element a = doc.body().select("div.item").first().select("a[target]").first();
String href = "https://www.zhihu.com" + a.attr("href")+"/newest";
result.addRequest(new Request(href, HttpMethod.GET));
}
Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
JSONObject object1 = new JSONObject();
object1.put("topic_id", page.getRequest().getAddch("topic_id"));
object1.put("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20));
object1.put("hash_id", "22e50cd21ed9df7085ff76d62175e986");
request.addParame("method", "next")
.addParame("params", object1.toJSONString()).addAttach("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20)).addAttach("topic_id", page.getRequest().getAddch("topic_id"));
result.addRequest(request);
}
示例2: cleanContent
import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
* Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
* @param content html content
* @param extraTags any other tags that you may want to keep, e. g. "a"
* @return
*/
public String cleanContent(String content, String ... extraTags) {
Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
allowedTags.addTags(extraTags);
allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
allowedTags.addAttributes("img", "src", "style", "class");
if (Arrays.asList(extraTags).contains("a")) {
allowedTags.addAttributes("a", "href", "target");
}
Document dirty = Jsoup.parseBodyFragment(content, "");
Cleaner cleaner = new Cleaner(allowedTags);
Document clean = cleaner.clean(dirty);
clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
String safe = clean.body().html();
return safe;
}
示例3: assertContainsLink
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void assertContainsLink(String expected, StringBuffer actual) {
String linkifiedUri = actual.toString();
Document document = Jsoup.parseBodyFragment(linkifiedUri);
Element anchorElement = document.select("a").first();
assertNotNull("No <a> element found", anchorElement);
assertEquals(expected, anchorElement.text());
assertEquals(expected, anchorElement.attr("href"));
}
示例4: assertLinkOnly
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void assertLinkOnly(String expected, StringBuffer actual) {
String linkifiedUri = actual.toString();
Document document = Jsoup.parseBodyFragment(linkifiedUri);
Element anchorElement = document.select("a").first();
assertNotNull("No <a> element found", anchorElement);
assertEquals(expected, anchorElement.text());
assertEquals(expected, anchorElement.attr("href"));
assertAnchorElementIsSoleContent(document, anchorElement);
}
示例5: handle
import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
* Jsoup.parse(html)
* Jsoup.parse(html, baseUri)
* Jsoup.parseBodyFragment(bodyHtml)
* Jsoup.parseBodyFragment(bodyHtml, baseUri)
*/
@Override
public Document handle( String html,boolean fragment) throws IOException{
//获取Jsoup参数
String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
//使用Jsoup将html转换成Document对象
Document doc = fragment ? Jsoup.parseBodyFragment( html, baseUri) : Jsoup.parse( html,baseUri);
//返回Document对象
return doc;
}
示例6: postProcess
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String postProcess(String html) {
// Use a faked baseURI, otherwise all relative urls will be stripped out
Document body = Jsoup.parseBodyFragment(html, "http://localhost/sanitize");
Cleaner cleaner = new Cleaner(whiteList);
body = cleaner.clean(body);
for (HtmlTransformer transformer : htmlTransformers)
transformer.transform(body);
return body.body().html();
}
示例7: formatToXHtml
import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
* Uses Jsoup to convert from HTML to XHTML
*/
private byte[] formatToXHtml(String html, Charset charset) {
Document document = Jsoup.parseBodyFragment(html);
document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
document.outputSettings().charset(charset);
return document.toString().getBytes(charset);
}
示例8: generateFormattedTextObjects
import org.jsoup.Jsoup; //导入方法依赖的package包/类
private void generateFormattedTextObjects(String text) throws IllegalArgumentException {
Document document = Jsoup.parseBodyFragment(text);
document.outputSettings(new Document.OutputSettings().prettyPrint(false));
parseFormattedMessageNode(document.body(), new LinkedList<>());
}
示例9: getText
import org.jsoup.Jsoup; //导入方法依赖的package包/类
String getText(final HtmlBlock node) {
final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
return document.text();
}
示例10: htmlNodeToMap
import org.jsoup.Jsoup; //导入方法依赖的package包/类
private Map<String, ParameterDescription> htmlNodeToMap(final HtmlBlock htmlBlock) {
final String htmlBlockBody = prepareHTML(htmlBlock);
final Document document = Jsoup.parseBodyFragment(htmlBlockBody);
final Elements trs = document.select("tr");
Integer nameIdx = null;
Integer descriptionIdx = null;
Integer typeIdx = null;
Integer requiredIdx = null;
final Elements ths = trs.get(0).select("th");
if(ths.size() == 0) {
// Workaround for missing table header
nameIdx = 0;
switch(trs.get(0).select("td").size()) {
case 2:
descriptionIdx = 1;
break;
case 3:
typeIdx = 1;
descriptionIdx = 2;
break;
}
}
for (int i = 0; i < ths.size(); i++) {
final Element element = ths.get(i);
switch(element.text()) {
case "Name":
case "Code":
case "Form Part Name":
nameIdx = i;
break;
case "Description":
descriptionIdx = i;
break;
case "Media type":
case "Type":
case "Content Type":
case "Value":
typeIdx = i;
break;
case "Required?":
requiredIdx = i;
break;
default:
log.debug("Fieldname unknown: " + element.text());
break;
}
}
final HashMap<String, ParameterDescription> result = new HashMap<>();
for (final Element tr : trs) {
final Elements tds = tr.select("td");
if (tds.size() >= 2) {
final ParameterDescription.ParameterDescriptionBuilder builder = ParameterDescription.builder();
Optional.ofNullable(nameIdx).map(tds::get).map(Element::text).ifPresent(builder::id);
Optional.ofNullable(descriptionIdx).map(tds::get).map(Element::text).ifPresent(builder::description);
Optional.ofNullable(typeIdx).map(tds::get).map(Element::text).ifPresent(builder::type);
Optional.ofNullable(requiredIdx).map(tds::get).map(Element::text).map(o -> o.equals("Yes")).ifPresent(builder::required);
final ParameterDescription parameterDescription = builder.build();
result.put(parameterDescription.getId(), parameterDescription);
}
}
return result;
}