本文整理汇总了Java中org.jsoup.Jsoup.parse方法的典型用法代码示例。如果您正苦于以下问题:Java Jsoup.parse方法的具体用法?Java Jsoup.parse怎么用?Java Jsoup.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.Jsoup
的用法示例。
在下文中一共展示了Jsoup.parse方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFormMap_Kingo
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static Map<String, String> getFormMap_Kingo(String html, int formIndex) {
Map<String, String> retVal = new HashMap<String, String>();
try {
Document doc = Jsoup.parse(html);
Elements elements = doc.select("form");
Element formElement = elements.get(formIndex);
retVal.put("formAction", formElement.attr("action"));
Elements inputElements = doc.select("input");
// System.out.println(inputElements);
for (Element element : inputElements) {
if (element.nodeName().equals("select")) {
Element element5 = inputElements.select("option").first();
retVal.put(element.attr("name"), element5.attr("value"));
} else {
if (element.attr("name").equals("") || element.attr("name") == null) {
} else {
retVal.put(element.attr("name"), element.attr("value"));
}
}
}
} catch (Exception e) {
retVal = null;
}
return retVal;
}
示例2: parseProxys
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public List<Proxy> parseProxys(String content) {
Document doc = Jsoup.parse(content);
Elements elements = doc.select("div#list table tbody tr");
List<Proxy> proxyList = new ArrayList<>();
for(Element tr : elements){
Elements tds = tr.children();
String ip = tds.get(0).text().trim();
Integer port = Integer.parseInt(tds.get(1).text());
proxyList.add(new Proxy(ip,port));
}
return proxyList;
}
示例3: getClozeTestQuestionContentValidationResult
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Transient
private String getClozeTestQuestionContentValidationResult(JsonNode node) {
String reason = null;
String questionText = node.get("question").asText();
if (!questionText.contains("cloze=\"true\"")) {
reason = "no embedded answers";
} else {
Document doc = Jsoup.parse(questionText);
Elements answers = doc.select("span[cloze=true]");
Set<String> distinctIds = answers.stream().map(a -> a.attr("id")).collect(Collectors.toSet());
if (answers.size() != distinctIds.size()) {
reason = "duplicate ids found";
} else if (answers.stream()
.map(a -> a.attr("precision"))
.anyMatch(p -> p.isEmpty() || !NumberUtils.isParsable(p))) {
reason = "invalid precision found";
} else if (answers.stream()
.filter(a -> a.attr("numeric").equals("true"))
.map(Element::text)
.anyMatch(t -> !NumberUtils.isParsable(t))) {
reason = "non-numeric correct answer for numeric question";
}
}
return reason;
}
示例4: getContent
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
List<AlbumInfo> data = new ArrayList<>();
Document document = Jsoup.parse(new String(result, "utf-8"));
Elements elements = document.select("#pins a:has(img)");
for (Element element : elements) {
AlbumInfo temp = new AlbumInfo();
temp.setAlbumUrl(element.attr("href"));
Elements elements1 = element.select("img");
if (elements1.size() > 0) {
Log.e("Mzitu", "getContent: " + elements1.get(0).attr("data-original"));
temp.setPicUrl(elements1.get(0).attr("data-original").replace("http", "https"));
}
data.add(temp);
}
resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
resultMap.put(ContentsActivity.parameter.RESULT, data);
return resultMap;
}
示例5: doAnalysis
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public String doAnalysis(String html){
if (html == null)
return null;
Document doc = null;
doc = Jsoup.parse(html);
Elements tables = doc.select("table");
if (tables.size() < 1)
return "获取不到选课结果";
Element table = tables.get(0);
Elements fonts = table.select("td").select("font");
if (fonts.size() == 0)
return "获取不到选课结果";
String result = "";
if (fonts.size() == 1)
return fonts.get(0).html();
for (Element font : fonts) {
result += font.html() + "</br>";
}
return result;
}
示例6: getContent
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
List<AlbumInfo> data = new ArrayList<>();
Document document = Jsoup.parse(new String(result, "utf-8"));
Elements elements = document.select("#post-list-posts li div.inner a");
for (Element element : elements) {
AlbumInfo temp = new AlbumInfo();
temp.setAlbumUrl(baseUrl + element.attr("href"));
Elements elements1 = element.select("img");
if (elements1.size() > 0)
temp.setPicUrl(elements1.get(0).attr("src"));
data.add(temp);
}
resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
resultMap.put(ContentsActivity.parameter.RESULT, data);
return resultMap;
}
示例7: getDetailContent
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
List<PicInfo> urls = new ArrayList<>();
Document document = Jsoup.parse(new String(result, "utf-8"));
Elements title = document.select("#photos h1");
String sTitle = "";
if (title.size() > 0)
sTitle = title.get(0).text();
Elements time = document.select(".tsmaincont-desc span");
String sTime = "";
if (time.size() > 0)
sTime = time.get(0).text();
Elements elements = document.select("#big-pic img");
for (Element element : elements) {
urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
}
resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
resultMap.put(DetailActivity.parameter.RESULT, urls);
return resultMap;
}
示例8: getSelectorByName
import org.jsoup.Jsoup; //导入方法依赖的package包/类
/**
* 对与页面 的 一些动态通过js填充内容的select 的内容进行 提取,并封装成Doc 元素
* @param html
* @return
*/
public static Element getSelectorByName(String html,String selectName){
if(html==null) return null;
Document doc = Jsoup.parse(html);
Elements selectors =null;
//先去页面拿 ,如果拿不到,或者拿到的是空的 列表,则在js拿
selectors=doc.select("select[name="+selectName+"]");
if(selectors!=null&&selectors.select("option").size()>0&&selectors.text()!=null&&!selectors.text().trim().equals("")){
return selectors.first();
}
//首先去js里面拿,拿不到再去页面拿
selectors=doc.select("script");
if(selectors!=null&&selectors.size()>0){
String seletorHtml=selectors.html().replaceAll("[\\s\\S]*(<select[\\w\\W]*>[\\w\\W]+</select>)", "$1");
Document docTemp = Jsoup.parse("<html>"+seletorHtml+"</html>");
return docTemp.select("select[name="+selectName+"]").first();
}
return null;
}
示例9: jsoupIOTest03
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public static void jsoupIOTest03() throws IOException{
// String h = "<dl class='test'>" +
// " <dt>"+
// " Category"+
// " </dt>"+
// " <dd> "+
// " <a href='/free'>Free</a>" +
// " </dd> ";
String h = " <html>" +
" <head>" +
" <title>JsoupInputAndOutput</title>" +
" </head>" +
" <body> hhhh<ACTxxx<body>" +
" </html>";
Document d = Jsoup.parse(h);
System.out.println(d.body().html());
// String s2 = d.select("a").toString();
// System.out.println(s2);
// System.out.println(d.select("a").remove("a"));
// System.out.println(d.select("a").removeAttr("href"));
// System.out.println(d.select("a").removeAttr("a"));
}
示例10: getContentNext
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
Document document = Jsoup.parse(new String(result, "gb2312"));
Elements elements = document.select("div.page a:containsOwn(下一页),div.page a:containsOwn(>)");
if (elements.size() > 0) {
if (elements.size() > 0) {
Pattern pattern = Pattern.compile("http.*/");
Matcher matcher = pattern.matcher(currentUrl);
if (matcher.find()) {
return matcher.group() + elements.get(0).attr("href");
}
}
}
return "";
}
示例11: getSnpPageUrlWithWrongPage
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Test(expected = NotLoggedInErrorException.class)
public void getSnpPageUrlWithWrongPage() throws Exception {
Document wrongPageDocument = Jsoup.parse(
FixtureHelper.getAsString(getClass().getResourceAsStream("OcenyWszystkie-semester.html"))
);
Mockito.when(snp.getPageByUrl(Mockito.anyString())).thenReturn(wrongPageDocument);
Mockito.when(snp.getStartPageUrl()).thenReturn("http://wulkan.io");
Mockito.when(snp.getId()).thenCallRealMethod();
Mockito.when(snp.getSnpPageUrl()).thenCallRealMethod();
snp.getSnpPageUrl();
}
示例12: onResponse
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public void onResponse(String response) {
// Hide icon
findViewById(R.id.noNetwork).setVisibility(View.INVISIBLE);
Document doc = Jsoup.parse(response);
// If article was loaded from an external App, no image was passed from MainActivity,
// so it must be fetched in the Collapsing Toolbar
if (Intent.ACTION_VIEW.equals(getIntent().getAction())) {
Elements image = doc.select("meta[property=og:image]");
if (atLeastOneChild(image)) {
Picasso.with(ArticleActivity.this)
.load(image.first().attr("content"))
.into((ImageView) findViewById(R.id.imageArticle));
}
}
// Article is from a hosted blog
List<Model> items;
Element content = doc.getElementById("content");
if (content != null) {
items = extractBlogArticle(content);
setTagInHeader(R.string.blog_article, R.color.accent_complementary, Color.WHITE);
} else {
Elements category = doc.select("div.tt_rubrique_ombrelle");
if (atLeastOneChild(category)) {
Log.d(TAG, "Cat: " + category.text());
setTitle(category.text());
}
Elements articles = doc.getElementsByTag("article");
Element largeFormat = doc.getElementById("hors_format");
if (largeFormat != null) {
items = new ArrayList<>();
setTagInHeader(R.string.large_article, R.color.primary_dark, Color.WHITE);
} else if (articles.isEmpty()) {
// Video
items = extractVideo(doc);
setTagInHeader(R.string.video_article, R.color.accent_complementary, Color.WHITE);
} else {
// Standard article
items = extractStandardArticle(articles);
// Full article is restricted to paid members
if (doc.getElementById("teaser_article") != null) {
if (menu != null) {
MenuItem menuItem = menu.findItem(R.id.action_share);
if (menuItem != null) {
menuItem.setIcon(getResources().getDrawable(R.drawable.ic_share_black));
}
} else {
Log.e(TAG, "menu should not be null at this point!");
}
CollapsingToolbarLayout collapsingToolbar = findViewById(R.id.collapsing_toolbar);
collapsingToolbar.setContentScrimResource(R.color.accent);
setTagInHeader(R.string.paid_article, R.color.accent, Color.BLACK);
if (getSupportActionBar() != null) {
final Drawable upArrow = getResources().getDrawable(R.drawable.ic_arrow_back_black_24dp);
getSupportActionBar().setHomeAsUpIndicator(upArrow);
}
}
// After parsing the article, start a new request for comments
Element react = doc.getElementById("liste_reactions");
if (react != null) {
Elements dataAjURI = react.select("[^data-aj-uri]");
if (atLeastOneChild(dataAjURI)) {
String commentPreviewURI = Constants.BASE_URL2 + dataAjURI.first().attr("data-aj-uri");
REQUEST_QUEUE.add(new StringRequest(Request.Method.GET, commentPreviewURI, commentsReceived, errorResponse));
}
}
}
}
articleAdapter.insertItems(items);
findViewById(R.id.articleLoader).setVisibility(View.GONE);
}
示例13: fetch
import org.jsoup.Jsoup; //导入方法依赖的package包/类
public void fetch(HttpUrl url) throws IOException {
// Skip hosts that we've visited many times.
AtomicInteger hostnameCount = new AtomicInteger();
AtomicInteger previous = hostnames.putIfAbsent(url.host(), hostnameCount);
if (previous != null) hostnameCount = previous;
if (hostnameCount.incrementAndGet() > 100) return;
Request request = new Request.Builder()
.url(url)
.build();
Response response = client.newCall(request).execute();
int responseCode = response.code();
String contentType = response.header("Content-Type");
if (responseCode != 200 || contentType == null) {
response.body().close();
return;
}
MediaType mediaType = MediaType.parse(contentType);
if (mediaType == null || !mediaType.subtype().equalsIgnoreCase("html")) {
response.body().close();
return;
}
Document document = Jsoup.parse(response.body().string(), url.toString());
for (Element element : document.select("a[href]")) {
String href = element.attr("href");
HttpUrl link = response.request().url().resolve(href);
if (link == null) continue; // URL is either invalid or its scheme isn't http/https.
if (!"google.com".equals(link.topPrivateDomain())) continue;
queue.add(link.newBuilder().fragment(null).build());
}
}
示例14: getContentNext
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
Document document = Jsoup.parse(new String(result, "utf-8"));
Elements elements = document.select("#pageNum a:containsOwn(下一页)");
if (elements.size() > 0)
return baseUrl + elements.get(0).attr("href");
return "";
}
示例15: createOrGetModel
import org.jsoup.Jsoup; //导入方法依赖的package包/类
@Override
public SipNodes createOrGetModel() {
if (model == null) {
try {
Document document = Jsoup.parse(getRawText(), getBaseUrl());
if (document == null) {
throw new RuntimeException();
}
model = new SipNodes(SIPNode.e(document));
} catch (Exception e) {
model = new SipNodes(SIPNode.t(getRawText()));
}
}
return model;
}