当前位置: 首页>>代码示例>>Java>>正文


Java Jsoup.parseBodyFragment方法代码示例

本文整理汇总了Java中org.jsoup.Jsoup.parseBodyFragment方法的典型用法代码示例。如果您正苦于以下问题:Java Jsoup.parseBodyFragment方法的具体用法?Java Jsoup.parseBodyFragment怎么用?Java Jsoup.parseBodyFragment使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.jsoup.Jsoup的用法示例。


在下文中一共展示了Jsoup.parseBodyFragment方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parseZhihuTopics1

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void parseZhihuTopics1(Page page, Result result) {
    String json = page.getContent();
    JSONObject object = JSON.parseObject(json);
    JSONArray array = object.getJSONArray("msg");
    if(array.size()==0) {
        result.setSkip(true);
        return;
    }
    for (int i = 0; i < array.size(); i++) {
        String topicStr = array.getString(i);
        Document doc = Jsoup.parseBodyFragment(topicStr);
        Element a = doc.body().select("div.item").first().select("a[target]").first();
        String href = "https://www.zhihu.com" + a.attr("href")+"/newest";
        result.addRequest(new Request(href, HttpMethod.GET));
    }
    Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
    JSONObject object1 = new JSONObject();
    object1.put("topic_id", page.getRequest().getAddch("topic_id"));
    object1.put("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20));
    object1.put("hash_id", "22e50cd21ed9df7085ff76d62175e986");
    request.addParame("method", "next")
            .addParame("params", object1.toJSONString()).addAttach("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20)).addAttach("topic_id", page.getRequest().getAddch("topic_id"));
    result.addRequest(request);
}
 
开发者ID:StevenKin,项目名称:ZhihuQuestionsSpider,代码行数:25,代码来源:ParseRegularUtil.java

示例2: cleanContent

import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
 * Cleans the html content leaving only the following tags: b, em, i, strong, u, br, cite, em, i, p, strong, img, li, ul, ol, sup, sub, s
 * @param content html content
 * @param extraTags any other tags that you may want to keep, e. g. "a"
 * @return
 */
public String cleanContent(String content, String ... extraTags) {
	Whitelist allowedTags = Whitelist.simpleText(); // This whitelist allows only simple text formatting: b, em, i, strong, u. All other HTML (tags and attributes) will be removed.
	allowedTags.addTags("br", "cite", "em", "i", "p", "strong", "img", "li", "ul", "ol", "sup", "sub", "s");
	allowedTags.addTags(extraTags);
	allowedTags.addAttributes("p", "style"); // Serve per l'allineamento a destra e sinistra
	allowedTags.addAttributes("img", "src", "style", "class"); 
	if (Arrays.asList(extraTags).contains("a")) {
		allowedTags.addAttributes("a", "href", "target"); 
	}
	Document dirty = Jsoup.parseBodyFragment(content, "");
	Cleaner cleaner = new Cleaner(allowedTags);
	Document clean = cleaner.clean(dirty);
	clean.outputSettings().escapeMode(EscapeMode.xhtml); // Non fa l'escape dei caratteri utf-8
	String safe = clean.body().html();
	return safe;
}
 
开发者ID:xtianus,项目名称:yadaframework,代码行数:23,代码来源:YadaWebUtil.java

示例3: assertContainsLink

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void assertContainsLink(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));
}
 
开发者ID:philipwhiuk,项目名称:q-mail,代码行数:9,代码来源:UriParserTestHelper.java

示例4: assertLinkOnly

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void assertLinkOnly(String expected, StringBuffer actual) {
    String linkifiedUri = actual.toString();
    Document document = Jsoup.parseBodyFragment(linkifiedUri);
    Element anchorElement = document.select("a").first();
    assertNotNull("No <a> element found", anchorElement);
    assertEquals(expected, anchorElement.text());
    assertEquals(expected, anchorElement.attr("href"));

    assertAnchorElementIsSoleContent(document, anchorElement);
}
 
开发者ID:philipwhiuk,项目名称:q-mail,代码行数:11,代码来源:UriParserTestHelper.java

示例5: handle

import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
 * Jsoup.parse(html)
 * Jsoup.parse(html, baseUri)
 * Jsoup.parseBodyFragment(bodyHtml)
 * Jsoup.parseBodyFragment(bodyHtml, baseUri)
 */
@Override
public Document handle( String html,boolean fragment) throws IOException{
	//获取Jsoup参数
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = fragment ? Jsoup.parseBodyFragment( html, baseUri) : Jsoup.parse( html,baseUri);
	//返回Document对象
	return doc;
}
 
开发者ID:vindell,项目名称:docx4j-template,代码行数:16,代码来源:XHTMLDocumentHandler.java

示例6: postProcess

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String postProcess(String html) {
	// Use a faked baseURI, otherwise all relative urls will be stripped out
	Document body = Jsoup.parseBodyFragment(html, "http://localhost/sanitize");
	
	Cleaner cleaner = new Cleaner(whiteList);
	body = cleaner.clean(body);

	for (HtmlTransformer transformer : htmlTransformers)
		transformer.transform(body);
	return body.body().html();
}
 
开发者ID:jmfgdev,项目名称:gitplex-mit,代码行数:13,代码来源:DefaultMarkdownManager.java

示例7: formatToXHtml

import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
 * Uses Jsoup to convert from HTML to XHTML
 */
private byte[] formatToXHtml(String html, Charset charset) {
    Document document = Jsoup.parseBodyFragment(html);
    document.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    document.outputSettings().charset(charset);
    return document.toString().getBytes(charset);
}
 
开发者ID:Asymmetrik,项目名称:nifi-nars,代码行数:10,代码来源:GetWebpage.java

示例8: generateFormattedTextObjects

import org.jsoup.Jsoup; //导入方法依赖的package包/类
private void generateFormattedTextObjects(String text) throws IllegalArgumentException {

		Document document = Jsoup.parseBodyFragment(text);
		document.outputSettings(new Document.OutputSettings().prettyPrint(false));
		parseFormattedMessageNode(document.body(), new LinkedList<>());
	}
 
开发者ID:Gurgy,项目名称:Cypher,代码行数:7,代码来源:EventListItemPresenter.java

示例9: getText

import org.jsoup.Jsoup; //导入方法依赖的package包/类
String getText(final HtmlBlock node) {
  final Document document = Jsoup.parseBodyFragment(node.getChars().toString());
  return document.text();
}
 
开发者ID:camunda,项目名称:camunda-bpm-swagger,代码行数:5,代码来源:HtmlDocumentInterpreter.java

示例10: htmlNodeToMap

import org.jsoup.Jsoup; //导入方法依赖的package包/类
private Map<String, ParameterDescription> htmlNodeToMap(final HtmlBlock htmlBlock) {
  final String htmlBlockBody = prepareHTML(htmlBlock);
  final Document document = Jsoup.parseBodyFragment(htmlBlockBody);
  final Elements trs = document.select("tr");
  Integer nameIdx = null;
  Integer descriptionIdx = null;
  Integer typeIdx = null;
  Integer requiredIdx = null;
  final Elements ths = trs.get(0).select("th");

  if(ths.size() == 0) {
    // Workaround for missing table header
    nameIdx = 0;
    switch(trs.get(0).select("td").size()) {
    case 2:
      descriptionIdx = 1;
      break;
    case 3:
      typeIdx = 1;
      descriptionIdx = 2;
      break;
    }
  }
  for (int i = 0; i < ths.size(); i++) {
    final Element element = ths.get(i);
    switch(element.text()) {
    case "Name":
    case "Code":
    case "Form Part Name":
      nameIdx = i;
      break;
    case "Description":
      descriptionIdx = i;
      break;
    case "Media type":
    case "Type":
    case "Content Type":
    case "Value":
      typeIdx = i;
      break;
    case "Required?":
      requiredIdx = i;
      break;
    default:
      log.debug("Fieldname unknown: " + element.text());
      break;
    }
  }
  final HashMap<String, ParameterDescription> result = new HashMap<>();
  for (final Element tr : trs) {
    final Elements tds = tr.select("td");
    if (tds.size() >= 2) {
      final ParameterDescription.ParameterDescriptionBuilder builder = ParameterDescription.builder();
      Optional.ofNullable(nameIdx).map(tds::get).map(Element::text).ifPresent(builder::id);
      Optional.ofNullable(descriptionIdx).map(tds::get).map(Element::text).ifPresent(builder::description);
      Optional.ofNullable(typeIdx).map(tds::get).map(Element::text).ifPresent(builder::type);
      Optional.ofNullable(requiredIdx).map(tds::get).map(Element::text).map(o -> o.equals("Yes")).ifPresent(builder::required);
      final ParameterDescription parameterDescription = builder.build();
      result.put(parameterDescription.getId(), parameterDescription);
    }
  }
  return result;
}
 
开发者ID:camunda,项目名称:camunda-bpm-swagger,代码行数:64,代码来源:HtmlDocumentInterpreter.java


注:本文中的org.jsoup.Jsoup.parseBodyFragment方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。