当前位置: 首页>>代码示例>>Java>>正文


Java Jsoup.clean方法代码示例

本文整理汇总了Java中org.jsoup.Jsoup.clean方法的典型用法代码示例。如果您正苦于以下问题:Java Jsoup.clean方法的具体用法?Java Jsoup.clean怎么用?Java Jsoup.clean使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.jsoup.Jsoup的用法示例。


在下文中一共展示了Jsoup.clean方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
开发者ID:bluetata,项目名称:crawler-jsoup-maven,代码行数:20,代码来源:JsoupTest.java

示例2: main

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void main(String[] args) {
	String baseUri = "http://www.baidu.com";
	String html = "<a href=\"http://www.baidu.com/gaoji/preferences.html\"name=\"tj_setting\">搜索设置</a>";
	String doc = Jsoup.clean(html, baseUri, Whitelist.none());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.simpleText());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basic());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basicWithImages());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.relaxed());
	System.out.println(doc);

}
 
开发者ID:vindell,项目名称:docx4j-template,代码行数:20,代码来源:XHTMLDocumentHandler.java

示例3: createReply

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public Reply createReply(ReplyDTO replyDTO, User user) {
    replyDTO.setUserId(user.getId());
    Reply reply = replyDTO.toReply();

    String content = Jsoup.clean(reply.getContent(), Whitelist.basicWithImages());
    content = updateAtUser(content);

    reply.setContent(content);
    reply.setStatus(ReplyStatus.ACTIVE);

    Reply result = replyRepository.save(reply);

    reply.setUser(user);

    afterCreatingReply(reply);
    return result;
}
 
开发者ID:ugouku,项目名称:shoucang,代码行数:18,代码来源:ReplyService.java

示例4: getDescription

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.size() == 0) {
            logger.debug("No description at " + page);
            throw new IOException("No description found");
        }
        logger.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        logger.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
开发者ID:RipMeApp,项目名称:ripme,代码行数:30,代码来源:FuraffinityRipper.java

示例5: stripHtmlFrom

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public String stripHtmlFrom(String original)

	{
		// If there's no content, return to sender unopened
		if( Check.isEmpty(original) )
		{
			return original;
		}

		String cleaned = Jsoup.clean(original, Whitelist.simpleText());

		return cleaned;
	}
 
开发者ID:equella,项目名称:Equella,代码行数:14,代码来源:FlickrSearchResultsSection.java

示例6: createPost

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Transactional
@Timed
@Caching(evict = {
    @CacheEvict(value = TagService.CACHE_COUNT_USER, key = "#postDTO.userId.toString().concat('_posts_count')"),
    @CacheEvict(value = CACHE_COUNT_USER_TAG_POSTS, key = "#postDTO.userId.toString().concat('_tags_posts_count')", allEntries = true),
})
public Optional<Post> createPost(PostDTO postDTO) throws JSONException {
    Post post = postMapper.postDTOToPost(postDTO);
    String result = getWebPost(String.format(SERVER_URL, post.getUrl()));

    if (result == null)
        return Optional.empty();

    JSONObject json = new JSONObject(result);
    String content = json.getString("content");

    // Filter html tags
    content = Jsoup.clean(content, Whitelist.relaxed());

    post.setTitle(json.getString("title"));
    post.setTitle(post.getTitle().substring(0, Math.min(255, post.getTitle().length())));
    post.setContent(content);
    post.setDomain(json.getString("host"));

    updateTags(post, null);
    saveNewPost(post);

    return Optional.of(post);
}
 
开发者ID:ugouku,项目名称:shoucang,代码行数:30,代码来源:PostService.java

示例7: getArticle

import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
 * Get full text of a post
 */
public void getArticle(Post post) {
    log.debug("Handle crawling article full text from source site, id=" + post.getId() + " , url=" + post.getUrl());

    try {
        String result = postService.getWebPost(String.format(PostService.SERVER_URL, post.getUrl()));
        if (result == null) {
            log.error("Failed to get article full text, id=" + post.getId());
            return;
        }

        Post resultPost = postRepository.findOne(post.getId());
        if (resultPost == null) {
            log.warn("Cancel crawling article full text of post id=" + post.getId() + ", because the post does not exist.");
            return;
        }

        JSONObject json = new JSONObject(result);
        String content = json.getString("content");
        content = Jsoup.clean(content, Whitelist.relaxed());
        resultPost.setContent(content);

        postService.saveNewPost(resultPost);
    } catch (Exception e) {
        log.error("Failed to resolve article full text, id=" + post.getId() + ", url=" + post.getUrl() + ", exception: " + e.getMessage());
    }
}
 
开发者ID:ugouku,项目名称:shoucang,代码行数:30,代码来源:RabbitConsumer.java

示例8: forClass

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static SimClassInfo forClass(Class<?> cls) {

        Optional<ClassJavadoc> javadoc = RuntimeJavadoc.getJavadoc(cls);
        String comment = javadoc.map(ClassJavadoc::getComment).map(Comment::toString).orElse("");
        String sanitizedComment = Jsoup.clean(comment, Whitelist.basic());
        return new SimClassInfo(cls.getCanonicalName(), sanitizedComment);
    }
 
开发者ID:etomica,项目名称:etomica,代码行数:8,代码来源:SimClassInfo.java

示例9: formatHtml

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
 
开发者ID:kamax-io,项目名称:matrix-appservice-email,代码行数:16,代码来源:GmailClientFormatter.java

示例10: formatHtml

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}
 
开发者ID:kamax-io,项目名称:matrix-appservice-email,代码行数:13,代码来源:ThunderbirdClientFormatter.java

示例11: extractText

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static String extractText(String html) {
    return Jsoup.clean(html, Whitelist.none());
}
 
开发者ID:philipwhiuk,项目名称:q-mail,代码行数:4,代码来源:HtmlHelper.java

示例12: htmlFilter

import org.jsoup.Jsoup; //导入方法依赖的package包/类
private static String htmlFilter(String html,String baseUri) {
	if(StringUtil.isBlank(html)) return "";
	return Jsoup.clean(html, baseUri,myWhiteList);
}
 
开发者ID:wrayzheng,项目名称:webpage-update-subscribe,代码行数:5,代码来源:PageParser.java

示例13: clean

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static String clean(String s, Whitelist list) {
	return Jsoup.clean(s, list);
}
 
开发者ID:rafjordao,项目名称:Nird2,代码行数:4,代码来源:HtmlUtils.java

示例14: getServerConsole

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Retryable(backoff = @Backoff(2000L))
public String getServerConsole(String subId) throws IOException {
    String bodyHtml = validate(getPanelView(subId, "console_log")).body().toString();
    return Jsoup.clean(bodyHtml, "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
}
 
开发者ID:quanticc,项目名称:sentry,代码行数:6,代码来源:GameAdminService.java

示例15: getEscapedValue

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public String getEscapedValue() {
	return Jsoup.clean(getValue(), Whitelist.basic());
}
 
开发者ID:kalsowerus,项目名称:Guestbook9001,代码行数:4,代码来源:EscapeTag.java


注:本文中的org.jsoup.Jsoup.clean方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。