当前位置: 首页>>代码示例>>Java>>正文


Java Jsoup.parse方法代码示例

本文整理汇总了Java中org.jsoup.Jsoup.parse方法的典型用法代码示例。如果您正苦于以下问题:Java Jsoup.parse方法的具体用法?Java Jsoup.parse怎么用?Java Jsoup.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.jsoup.Jsoup的用法示例。


在下文中一共展示了Jsoup.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFormMap_Kingo

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static Map<String, String> getFormMap_Kingo(String html, int formIndex) {
	Map<String, String> retVal = new HashMap<String, String>();
	try {
		Document doc = Jsoup.parse(html);
		Elements elements = doc.select("form");
		Element formElement = elements.get(formIndex);
		retVal.put("formAction", formElement.attr("action"));
		Elements inputElements = doc.select("input");
		// System.out.println(inputElements);
		for (Element element : inputElements) {
			if (element.nodeName().equals("select")) {
				Element element5 = inputElements.select("option").first();
				retVal.put(element.attr("name"), element5.attr("value"));
			} else {
				if (element.attr("name").equals("") || element.attr("name") == null) {
				} else {
					retVal.put(element.attr("name"), element.attr("value"));
				}
			}
		}
	} catch (Exception e) {
		retVal = null;
	}
	return retVal;
}
 
开发者ID:liaojiacan,项目名称:zhkuas_ssm_maven,代码行数:26,代码来源:HTMLUtil.java

示例2: parseProxys

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public List<Proxy> parseProxys(String content) {
    Document doc = Jsoup.parse(content);
    Elements elements = doc.select("div#list table tbody tr");
    List<Proxy> proxyList = new ArrayList<>();
    for(Element tr : elements){
        Elements tds = tr.children();
        String ip = tds.get(0).text().trim();
        Integer port = Integer.parseInt(tds.get(1).text());
        proxyList.add(new Proxy(ip,port));
    }
    return proxyList;
}
 
开发者ID:StevenKin,项目名称:ZhihuQuestionsSpider,代码行数:14,代码来源:KuaidailiProxySite.java

示例3: getClozeTestQuestionContentValidationResult

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Transient
private String getClozeTestQuestionContentValidationResult(JsonNode node) {
    String reason = null;
    String questionText = node.get("question").asText();
    if (!questionText.contains("cloze=\"true\"")) {
        reason = "no embedded answers";
    } else {
        Document doc = Jsoup.parse(questionText);
        Elements answers = doc.select("span[cloze=true]");
        Set<String> distinctIds = answers.stream().map(a -> a.attr("id")).collect(Collectors.toSet());
        if (answers.size() != distinctIds.size()) {
            reason = "duplicate ids found";
        } else if (answers.stream()
                .map(a -> a.attr("precision"))
                .anyMatch(p -> p.isEmpty() || !NumberUtils.isParsable(p))) {
            reason = "invalid precision found";
        } else if (answers.stream()
                .filter(a -> a.attr("numeric").equals("true"))
                .map(Element::text)
                .anyMatch(t -> !NumberUtils.isParsable(t))) {
            reason = "non-numeric correct answer for numeric question";
        }
    }
    return reason;
}
 
开发者ID:CSCfi,项目名称:exam,代码行数:26,代码来源:Question.java

示例4: getContent

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> data = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#pins a:has(img)");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();
        temp.setAlbumUrl(element.attr("href"));
        Elements elements1 = element.select("img");
        if (elements1.size() > 0) {
            Log.e("Mzitu", "getContent: " + elements1.get(0).attr("data-original"));
            temp.setPicUrl(elements1.get(0).attr("data-original").replace("http", "https"));
        }
        data.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, data);
    return resultMap;
}
 
开发者ID:lanyuanxiaoyao,项目名称:PicKing,代码行数:20,代码来源:Mzitu.java

示例5: doAnalysis

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public String doAnalysis(String html){
	if (html == null)
		return null;
	Document doc = null;
	doc = Jsoup.parse(html);
	Elements tables = doc.select("table");
	if (tables.size() < 1)
		return "获取不到选课结果";
	Element table = tables.get(0);
	Elements fonts = table.select("td").select("font");
	if (fonts.size() == 0)
		return "获取不到选课结果";
	String result = "";
	if (fonts.size() == 1)
		return fonts.get(0).html();
	for (Element font : fonts) {
		result += font.html() + "</br>";
	}
	return result;
}
 
开发者ID:liaojiacan,项目名称:zhkuas_ssm_maven,代码行数:21,代码来源:SubmitCourseResultAnalysiser.java

示例6: getContent

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> data = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#post-list-posts li div.inner a");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();
        temp.setAlbumUrl(baseUrl + element.attr("href"));
        Elements elements1 = element.select("img");
        if (elements1.size() > 0)
            temp.setPicUrl(elements1.get(0).attr("src"));
        data.add(temp);
    }

    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, data);
    return resultMap;
}
 
开发者ID:lanyuanxiaoyao,项目名称:PicKing,代码行数:19,代码来源:Yande.java

示例7: getDetailContent

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
开发者ID:lanyuanxiaoyao,项目名称:PicKing,代码行数:24,代码来源:Aitaotu.java

示例8: getSelectorByName

import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
 * 对与页面 的 一些动态通过js填充内容的select 的内容进行 提取,并封装成Doc 元素
 * @param html
 * @return
 */
public static Element getSelectorByName(String html,String selectName){
	if(html==null) return null;
	Document doc = Jsoup.parse(html);
	Elements selectors =null;
	//先去页面拿 ,如果拿不到,或者拿到的是空的 列表,则在js拿
	selectors=doc.select("select[name="+selectName+"]");
	if(selectors!=null&&selectors.select("option").size()>0&&selectors.text()!=null&&!selectors.text().trim().equals("")){
		return selectors.first();
	} 
	//首先去js里面拿,拿不到再去页面拿
	selectors=doc.select("script");
	if(selectors!=null&&selectors.size()>0){
		String seletorHtml=selectors.html().replaceAll("[\\s\\S]*(<select[\\w\\W]*>[\\w\\W]+</select>)", "$1");
		Document docTemp = Jsoup.parse("<html>"+seletorHtml+"</html>");
		return docTemp.select("select[name="+selectName+"]").first();
	}
	
	return null;
}
 
开发者ID:liaojiacan,项目名称:zhkuas_ssm_maven,代码行数:25,代码来源:HTMLUtil.java

示例9: jsoupIOTest03

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void jsoupIOTest03() throws IOException{

//        String h = "<dl class='test'>" +
//                   "  <dt>"+
//                   "    Category"+
//                   "  </dt>"+
//                   "  <dd> "+
//                   "    <a href='/free'>Free</a>" + 
//                   "  </dd> ";
        
        String h =   " <html>" +
                  "   <head>" +
                    "     <title>JsoupInputAndOutput</title>" + 
                    "   </head>" +
                    "     <body> hhhh<ACTxxx<body>" +
                    " </html>";
        
        Document d = Jsoup.parse(h);
        
        System.out.println(d.body().html());
        
//        String s2 = d.select("a").toString();
//        System.out.println(s2);
//        System.out.println(d.select("a").remove("a"));
//        System.out.println(d.select("a").removeAttr("href"));
//        System.out.println(d.select("a").removeAttr("a"));
            
    }
 
开发者ID:bluetata,项目名称:crawler-jsoup-maven,代码行数:29,代码来源:CSDNQA.java

示例10: getContentNext

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select("div.page a:containsOwn(下一页),div.page a:containsOwn(>)");
    if (elements.size() > 0) {
        if (elements.size() > 0) {
            Pattern pattern = Pattern.compile("http.*/");
            Matcher matcher = pattern.matcher(currentUrl);
            if (matcher.find()) {
                return matcher.group() + elements.get(0).attr("href");
            }
        }
    }
    return "";
}
 
开发者ID:lanyuanxiaoyao,项目名称:PicKing,代码行数:16,代码来源:Meizi4493.java

示例11: getSnpPageUrlWithWrongPage

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Test(expected = NotLoggedInErrorException.class)
public void getSnpPageUrlWithWrongPage() throws Exception {
    Document wrongPageDocument = Jsoup.parse(
            FixtureHelper.getAsString(getClass().getResourceAsStream("OcenyWszystkie-semester.html"))
    );

    Mockito.when(snp.getPageByUrl(Mockito.anyString())).thenReturn(wrongPageDocument);
    Mockito.when(snp.getStartPageUrl()).thenReturn("http://wulkan.io");
    Mockito.when(snp.getId()).thenCallRealMethod();

    Mockito.when(snp.getSnpPageUrl()).thenCallRealMethod();

    snp.getSnpPageUrl();
}
 
开发者ID:wulkanowy,项目名称:wulkanowy,代码行数:15,代码来源:StudentAndParentTest.java

示例12: onResponse

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public void onResponse(String response) {
    // Hide icon
    findViewById(R.id.noNetwork).setVisibility(View.INVISIBLE);

    Document doc = Jsoup.parse(response);

    // If article was loaded from an external App, no image was passed from MainActivity,
    // so it must be fetched in the Collapsing Toolbar
    if (Intent.ACTION_VIEW.equals(getIntent().getAction())) {
        Elements image = doc.select("meta[property=og:image]");
        if (atLeastOneChild(image)) {
            Picasso.with(ArticleActivity.this)
                    .load(image.first().attr("content"))
                    .into((ImageView) findViewById(R.id.imageArticle));
        }
    }

    // Article is from a hosted blog
    List<Model> items;
    Element content = doc.getElementById("content");
    if (content != null) {
        items = extractBlogArticle(content);
        setTagInHeader(R.string.blog_article, R.color.accent_complementary, Color.WHITE);
    } else {
        Elements category = doc.select("div.tt_rubrique_ombrelle");
        if (atLeastOneChild(category)) {
            Log.d(TAG, "Cat: " + category.text());
            setTitle(category.text());
        }
        Elements articles = doc.getElementsByTag("article");
        Element largeFormat = doc.getElementById("hors_format");
        if (largeFormat != null) {
            items = new ArrayList<>();
            setTagInHeader(R.string.large_article, R.color.primary_dark, Color.WHITE);
        } else if (articles.isEmpty()) {
            // Video
            items = extractVideo(doc);
            setTagInHeader(R.string.video_article, R.color.accent_complementary, Color.WHITE);
        } else {
            // Standard article
            items = extractStandardArticle(articles);
            // Full article is restricted to paid members
            if (doc.getElementById("teaser_article") != null) {
                if (menu != null) {
                    MenuItem menuItem = menu.findItem(R.id.action_share);
                    if (menuItem != null) {
                        menuItem.setIcon(getResources().getDrawable(R.drawable.ic_share_black));
                    }
                } else {
                    Log.e(TAG, "menu should not be null at this point!");
                }

                CollapsingToolbarLayout collapsingToolbar = findViewById(R.id.collapsing_toolbar);
                collapsingToolbar.setContentScrimResource(R.color.accent);
                setTagInHeader(R.string.paid_article, R.color.accent, Color.BLACK);

                if (getSupportActionBar() != null) {
                    final Drawable upArrow = getResources().getDrawable(R.drawable.ic_arrow_back_black_24dp);
                    getSupportActionBar().setHomeAsUpIndicator(upArrow);
                }
            }
            // After parsing the article, start a new request for comments
            Element react = doc.getElementById("liste_reactions");
            if (react != null) {
                Elements dataAjURI = react.select("[^data-aj-uri]");
                if (atLeastOneChild(dataAjURI)) {
                    String commentPreviewURI = Constants.BASE_URL2 + dataAjURI.first().attr("data-aj-uri");
                    REQUEST_QUEUE.add(new StringRequest(Request.Method.GET, commentPreviewURI, commentsReceived, errorResponse));
                }
            }
        }
    }
    articleAdapter.insertItems(items);
    findViewById(R.id.articleLoader).setVisibility(View.GONE);
}
 
开发者ID:MBach,项目名称:LeMondeRssReader,代码行数:77,代码来源:ArticleActivity.java

示例13: fetch

import org.jsoup.Jsoup; //导入方法依赖的package包/类
public void fetch(HttpUrl url) throws IOException {
  // Skip hosts that we've visited many times.
  AtomicInteger hostnameCount = new AtomicInteger();
  AtomicInteger previous = hostnames.putIfAbsent(url.host(), hostnameCount);
  if (previous != null) hostnameCount = previous;
  if (hostnameCount.incrementAndGet() > 100) return;

  Request request = new Request.Builder()
      .url(url)
      .build();
  Response response = client.newCall(request).execute();
  int responseCode = response.code();

  String contentType = response.header("Content-Type");
  if (responseCode != 200 || contentType == null) {
    response.body().close();
    return;
  }

  MediaType mediaType = MediaType.parse(contentType);
  if (mediaType == null || !mediaType.subtype().equalsIgnoreCase("html")) {
    response.body().close();
    return;
  }

  Document document = Jsoup.parse(response.body().string(), url.toString());
  for (Element element : document.select("a[href]")) {
    String href = element.attr("href");
    HttpUrl link = response.request().url().resolve(href);
    if (link == null) continue; // URL is either invalid or its scheme isn't http/https.
    if (!"google.com".equals(link.topPrivateDomain())) continue;

    queue.add(link.newBuilder().fragment(null).build());
  }
}
 
开发者ID:dave-r12,项目名称:okhttp-byte-counter,代码行数:36,代码来源:Crawler.java

示例14: getContentNext

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#pageNum a:containsOwn(下一页)");
    if (elements.size() > 0)
        return baseUrl + elements.get(0).attr("href");
    return "";
}
 
开发者ID:lanyuanxiaoyao,项目名称:PicKing,代码行数:9,代码来源:Aitaotu.java

示例15: createOrGetModel

import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public SipNodes createOrGetModel() {
    if (model == null) {
        try {
            Document document = Jsoup.parse(getRawText(), getBaseUrl());
            if (document == null) {
                throw new RuntimeException();
            }
            model = new SipNodes(SIPNode.e(document));
        } catch (Exception e) {
            model = new SipNodes(SIPNode.t(getRawText()));
        }
    }
    return model;
}
 
开发者ID:virjar,项目名称:vscrawler,代码行数:16,代码来源:XpathNode.java


注:本文中的org.jsoup.Jsoup.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。