当前位置: 首页>>代码示例>>Java>>正文


Java Document.getElementsByClass方法代码示例

本文整理汇总了Java中org.jsoup.nodes.Document.getElementsByClass方法的典型用法代码示例。如果您正苦于以下问题:Java Document.getElementsByClass方法的具体用法?Java Document.getElementsByClass怎么用?Java Document.getElementsByClass使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.jsoup.nodes.Document的用法示例。


在下文中一共展示了Document.getElementsByClass方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getTopTopics

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private ArrayList<Topic> getTopTopics(Document doc) throws Exception{
    ArrayList<Topic> topTopics = new ArrayList<>();
    Elements elements = doc.getElementsByClass("col-12 col-sm-6 col-md-4 mb-4");
    for (Element element : elements) {
        Element idElement = element.select("a").first();
        Element imageElement = element.select("a > img").first();
        Element titleElement = element.select("a > p").get(0);
        Element descElement = element.select("a > p").get(1);

        String id = idElement.attr("href");
        id = id.substring(id.lastIndexOf("/") + 1);
        String name = titleElement.textNodes().get(0).text();
        String desc = descElement.textNodes().get(0).text();
        String image = imageElement == null ? null : imageElement.attr("src");

        Topic topic = new Topic()
                .setId(id)
                .setName(name)
                .setDesc(desc)
                .setImage(image);
        topTopics.add(topic);
    }
    return topTopics;
}
 
开发者ID:ThirtyDegreesRay,项目名称:OpenHub,代码行数:25,代码来源:TopicsPresenter.java

示例2: getFeaturedTopics

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private ArrayList<Topic> getFeaturedTopics(Document doc) throws Exception{
    ArrayList<Topic> topTopics = new ArrayList<>();
    Elements topElements = doc.getElementsByClass("py-4 border-bottom");
    for (Element element : topElements) {
        Element idElement = element.select("a").first();
        Element imageElement = element.select("a > img").first();
        Element titleElement = element.select("a > div > p").get(0);
        Element descElement = element.select("a > div > p").get(1);

        String id = idElement.attr("href");
        id = id.substring(id.lastIndexOf("/") + 1);
        String name = titleElement.textNodes().get(0).text();
        String desc = descElement.textNodes().get(0).text();
        String image = imageElement == null ? null : imageElement.attr("src");

        Topic topic = new Topic()
                .setId(id)
                .setName(name)
                .setDesc(desc)
                .setImage(image);
        topTopics.add(topic);
    }
    return topTopics;
}
 
开发者ID:ThirtyDegreesRay,项目名称:OpenHub,代码行数:25,代码来源:TopicsPresenter.java

示例3: getSynonyms

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public HashMap<String, ArrayList<String>> getSynonyms() throws IOException, InterruptedException {
    if (synonyms.isEmpty()) {
        for (String word : wordList) {
            try {
                Document doc = Jsoup.connect("http://www.thesaurus.com/browse/" + word)
                        .userAgent("Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0")
                        .timeout(3000)
                        .get();
                Elements block = doc.getElementsByClass("relevancy-block");
                Elements list = block.select(".relevancy-list");
                Elements text = list.select(".text");
                ArrayList<String> synonymList = new ArrayList<>();
                for (int j = 0; j < text.size(); j++) {
                    synonymList.add(text.get(j).text());
                }
                synonyms.put(word, synonymList);
            } catch (IOException e) {
            }
        }
    }
    return synonyms;
}
 
开发者ID:jatanrathod,项目名称:Idea-Plagiarism,代码行数:23,代码来源:Thesaurus.java

示例4: takeHistoryNews

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 历史新闻
 *
 * @param str
 */
public List<IHistoryHistoryNews> takeHistoryNews(String str) {
    Document document = Jsoup.parse(str);
    List<IHistoryHistoryNews> historyNewses = new ArrayList<>();
    Elements elements = document.getElementsByClass("miwen");
    Elements element = elements.get(0).getElementsByTag("ul");
    for (Element element2 : element) {
        IHistoryHistoryNews historyNews = new IHistoryHistoryNews();
        historyNews.setTitle(element2.getElementsByClass("info").get(0).getElementsByTag("a").text());//title
        historyNews.setTime(element2.getElementsByClass("time").get(0).text());//time
        historyNews.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByClass("info").get(0).getElementsByTag("a").attr("href"));//href
        historyNews.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
        historyNewses.add(historyNews);
    }
    return historyNewses;
}
 
开发者ID:shenhuanet,项目名称:OpenEyesReading-android,代码行数:21,代码来源:HttpApiImpl.java

示例5: getVacancies

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public List<Vacancy> getVacancies(String searchString)
{
    List<Vacancy> Vacancies = new ArrayList<>();
    int pageNum = 0;
    Document doc = null;
    while(true)
    {
        try {
            doc = getDocument(searchString, pageNum);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Elements vacancies = doc.getElementsByClass("job");
        if (vacancies.size()==0) break;
        for (Element element: vacancies)
        {
            if (element != null)
            {
                Vacancy vac = new Vacancy();
                vac.setTitle(element.getElementsByAttributeValue("class", "title").text());
                vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text());
                vac.setSiteName(URL_FORMAT);
                vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href"));
                String salary = element.getElementsByAttributeValue("class", "salary").text();
                String city = element.getElementsByAttributeValue("class", "location").text();
                vac.setSalary(salary.length()==0 ? "" : salary);
                vac.setCity(city.length()==0 ? "" : city);
                Vacancies.add(vac);
            }
        }
        pageNum++;
    }
    return Vacancies;
}
 
开发者ID:avedensky,项目名称:JavaRushTasks,代码行数:36,代码来源:HHStrategy.java

示例6: getCategoryList

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
开发者ID:handexing,项目名称:frameworkAggregate,代码行数:33,代码来源:MyJsoup.java

示例7: getHtmlByClass

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static String getHtmlByClass(String url,String eleName){
    String ret = "";
    try {
        Document doc = Jsoup.connect(url).post();
        Elements els = doc.getElementsByClass(eleName);
        ret = els.toString();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return ret;
}
 
开发者ID:dragon-yuan,项目名称:Ins_fb_pictureSpider_WEB,代码行数:12,代码来源:DomUtil.java

示例8: takeClickRank

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 点击排行
 *
 * @param str
 */
public List<IHistoryClickRank> takeClickRank(String str) {
    Document document = Jsoup.parse(str);
    List<IHistoryClickRank> clickRankList = new ArrayList<>();
    Elements elements = document.getElementsByClass("click");
    Elements element = elements.get(0).getElementsByTag("li");
    for (Element element2 : element) {
        IHistoryClickRank clickRank = new IHistoryClickRank();
        clickRank.setTitle(element2.getElementsByTag("a").text());// title
        clickRank.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));// href
        clickRankList.add(clickRank);
    }
    return clickRankList;
}
 
开发者ID:shenhuanet,项目名称:OpenEyesReading-android,代码行数:19,代码来源:HttpApiImpl.java

示例9: takeProposeRead

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 推荐阅读
 *
 * @param str
 */
public List<IHistoryOldPhoto> takeProposeRead(String str) {
    Document document = Jsoup.parse(str);
    List<IHistoryOldPhoto> proposeReads = new ArrayList<>();
    Elements elements = document.getElementsByClass("oldpic");
    Elements element = elements.get(0).getElementsByTag("li");
    for (Element element2 : element) {
        IHistoryOldPhoto proposeRead = new IHistoryOldPhoto();
        proposeRead.setTitle(element2.getElementsByTag("img").attr("title"));//title
        proposeRead.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));//href
        proposeRead.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
        proposeReads.add(proposeRead);
    }
    return proposeReads;
}
 
开发者ID:shenhuanet,项目名称:OpenEyesReading-android,代码行数:20,代码来源:HttpApiImpl.java

示例10: convertBrToDoc

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * get structure value with scriptDef
 * @param br
 * @param list
 * @return
 */
public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
	String line;
	StringBuffer buf = new StringBuffer();
	ArrayList<String> retList = new ArrayList<String>();
	
	try {
		while((line = br.readLine())!=null){
			buf.append(line);
			buf.append("\r\n");
		}
		
		br.close();
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	Document doc = Jsoup.parse(buf.toString());
	Elements eList = doc.getAllElements();
	
	int size = list.size();
	
	for(int i = 0 ; i < size ; i++){
		ElementDEF eDef = list.get(i);
		Elements eleList = null;
		if(eDef.ElementType.equals("CLASS")){
			eleList = doc.getElementsByClass(eDef.ElementValue);
		} else if(eDef.ElementType.equals("TAG")){
			eleList = doc.getElementsByTag(eDef.ElementValue);
		}
		
		if(eleList != null){
			if(eDef.ElementValueType.equals("html")){
				retList.add(eDef.ElementName+"\t"+eleList.toString());
			} else if (eDef.ElementValueType.equals("text")){
				retList.add(eDef.ElementName+"\t"+eleList.text());
			}
			
		}
	}
	return retList;
}
 
开发者ID:onycom-ankus,项目名称:ankus_crawler,代码行数:49,代码来源:ParseHTML.java

示例11: findNewIsinFromHTML

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static String findNewIsinFromHTML(Document doc) throws IOException, ComparisonException {

        Elements fundData = doc.getElementsByClass("fund-block");
        if (fundData == null || fundData.isEmpty()) throw new SourceHTMLChangedException();

        Element isinData = fundData.first().select("p").first();
        if (isinData == null) throw new SourceHTMLChangedException();

        String[] isinDataAttributes = isinData.text().split(" ");
        if (isinDataAttributes.length != 2) throw new SourceHTMLChangedException();

        String isin = isinDataAttributes[1];

        if (!isinCodeValid(isin)) throw new IsinNotFoundException("Invalid ISIN format scraped from source page");

        return isin;

    }
 
开发者ID:TulevaEE,项目名称:onboarding-service,代码行数:19,代码来源:PensionikeskusCodeToIsin.java

示例12: crawlQuoraPage

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private void crawlQuoraPage(String url, boolean crawlRelated) {
	// log("crawling for Quora url : " + url);
	uniqueQuoraLinks.add(url);
	Document doc = getDocument(url);
	if (doc == null) {
		// log("document is null for quora url: " + url);
		return;
	}

	Elements statsElements = doc.select("div.QuestionStats");
	for (Element statsElement : statsElements) {
		System.out.print(count++ + "\t" + url);
		Elements statsElementValues = statsElement.getElementsByTag("strong");
		int i = 1;
		for (Element statsElementValue : statsElementValues) {
			if (i > 2) {
				break;
			}
			System.out.print("\t" + statsElementValue.getElementsByTag("strong").text());
			i++;
		}
	}

	Elements activityElements = doc.getElementsByClass("QuestionLastActivityTime");
	if (activityElements.size() > 0) {
		for (Element activityElement : activityElements) {
			String lastAsked = StringUtils.remove(activityElement.text(), "Last asked: ");
			if (!lastAsked.contains("201")) {
				lastAsked = lastAsked + ", 2016";
			}
			System.out.println("\t" + lastAsked);
		}
	} else {
		System.out.println();
	}

	if (crawlRelated) {
		// log("crawlRelated is true for : " + url);
		Elements relatedQuestions = doc.getElementsByClass("question_related");
		List<Element> relatedQuestionList = new ArrayList<>();
		for (Element relatedQuestion : relatedQuestions) {
			Elements relatedQuestionHrefs = relatedQuestion.select("a[href]");
			for (Element element : relatedQuestionHrefs) {
				relatedQuestionList.add(element);
			}
		}

		relatedQuestionList = Utils.getSublist(relatedQuestionList, QuoraConstants.numReLatedQuestion);
		for (Element relatedQuestionHref : relatedQuestionList) {
			String relatedUrl = relatedQuestionHref.absUrl("href");
			if (!uniqueQuoraLinks.contains(relatedUrl)) {
				crawlQuoraPage(relatedUrl, false);
			}
		}

	}
}
 
开发者ID:thekosmix,项目名称:Quora-Marketing,代码行数:58,代码来源:QuoraCrawler.java


注:本文中的org.jsoup.nodes.Document.getElementsByClass方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。