本文整理汇总了Java中org.jsoup.nodes.Document.getElementsByClass方法的典型用法代码示例。如果您正苦于以下问题:Java Document.getElementsByClass方法的具体用法?Java Document.getElementsByClass怎么用?Java Document.getElementsByClass使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.nodes.Document
的用法示例。
在下文中一共展示了Document.getElementsByClass方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getTopTopics
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private ArrayList<Topic> getTopTopics(Document doc) throws Exception{
ArrayList<Topic> topTopics = new ArrayList<>();
Elements elements = doc.getElementsByClass("col-12 col-sm-6 col-md-4 mb-4");
for (Element element : elements) {
Element idElement = element.select("a").first();
Element imageElement = element.select("a > img").first();
Element titleElement = element.select("a > p").get(0);
Element descElement = element.select("a > p").get(1);
String id = idElement.attr("href");
id = id.substring(id.lastIndexOf("/") + 1);
String name = titleElement.textNodes().get(0).text();
String desc = descElement.textNodes().get(0).text();
String image = imageElement == null ? null : imageElement.attr("src");
Topic topic = new Topic()
.setId(id)
.setName(name)
.setDesc(desc)
.setImage(image);
topTopics.add(topic);
}
return topTopics;
}
示例2: getFeaturedTopics
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private ArrayList<Topic> getFeaturedTopics(Document doc) throws Exception{
ArrayList<Topic> topTopics = new ArrayList<>();
Elements topElements = doc.getElementsByClass("py-4 border-bottom");
for (Element element : topElements) {
Element idElement = element.select("a").first();
Element imageElement = element.select("a > img").first();
Element titleElement = element.select("a > div > p").get(0);
Element descElement = element.select("a > div > p").get(1);
String id = idElement.attr("href");
id = id.substring(id.lastIndexOf("/") + 1);
String name = titleElement.textNodes().get(0).text();
String desc = descElement.textNodes().get(0).text();
String image = imageElement == null ? null : imageElement.attr("src");
Topic topic = new Topic()
.setId(id)
.setName(name)
.setDesc(desc)
.setImage(image);
topTopics.add(topic);
}
return topTopics;
}
示例3: getSynonyms
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public HashMap<String, ArrayList<String>> getSynonyms() throws IOException, InterruptedException {
if (synonyms.isEmpty()) {
for (String word : wordList) {
try {
Document doc = Jsoup.connect("http://www.thesaurus.com/browse/" + word)
.userAgent("Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0")
.timeout(3000)
.get();
Elements block = doc.getElementsByClass("relevancy-block");
Elements list = block.select(".relevancy-list");
Elements text = list.select(".text");
ArrayList<String> synonymList = new ArrayList<>();
for (int j = 0; j < text.size(); j++) {
synonymList.add(text.get(j).text());
}
synonyms.put(word, synonymList);
} catch (IOException e) {
}
}
}
return synonyms;
}
示例4: takeHistoryNews
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 历史新闻
*
* @param str
*/
public List<IHistoryHistoryNews> takeHistoryNews(String str) {
Document document = Jsoup.parse(str);
List<IHistoryHistoryNews> historyNewses = new ArrayList<>();
Elements elements = document.getElementsByClass("miwen");
Elements element = elements.get(0).getElementsByTag("ul");
for (Element element2 : element) {
IHistoryHistoryNews historyNews = new IHistoryHistoryNews();
historyNews.setTitle(element2.getElementsByClass("info").get(0).getElementsByTag("a").text());//title
historyNews.setTime(element2.getElementsByClass("time").get(0).text());//time
historyNews.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByClass("info").get(0).getElementsByTag("a").attr("href"));//href
historyNews.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
historyNewses.add(historyNews);
}
return historyNewses;
}
示例5: getVacancies
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public List<Vacancy> getVacancies(String searchString)
{
List<Vacancy> Vacancies = new ArrayList<>();
int pageNum = 0;
Document doc = null;
while(true)
{
try {
doc = getDocument(searchString, pageNum);
} catch (IOException e) {
e.printStackTrace();
}
Elements vacancies = doc.getElementsByClass("job");
if (vacancies.size()==0) break;
for (Element element: vacancies)
{
if (element != null)
{
Vacancy vac = new Vacancy();
vac.setTitle(element.getElementsByAttributeValue("class", "title").text());
vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text());
vac.setSiteName(URL_FORMAT);
vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href"));
String salary = element.getElementsByAttributeValue("class", "salary").text();
String city = element.getElementsByAttributeValue("class", "location").text();
vac.setSalary(salary.length()==0 ? "" : salary);
vac.setCity(city.length()==0 ? "" : city);
Vacancies.add(vac);
}
}
pageNum++;
}
return Vacancies;
}
示例6: getCategoryList
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static List<FlowerCategory> getCategoryList() {
List<FlowerCategory> categories = new ArrayList<FlowerCategory>();
try {
Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
Elements catelist = doc.getElementsByClass("catelist");
Element cates = catelist.first();
List<Node> childNodes = cates.childNodes();
for (int i = 0; i < childNodes.size(); i++) {
Node node = childNodes.get(i);
List<Node> childs = node.childNodes();
if (childs != null && childs.size() > 0) {
FlowerCategory category = new FlowerCategory();
for (int j = 0; j < childs.size(); j++) {
Node child = childs.get(j);
if ("a".equals(child.nodeName())) {
category.setUrl(child.attr("href"));
category.setImgPath(child.childNode(1).attr("src"));
} else if ("h2".equals(child.nodeName())) {
category.setName(child.attr("title"));
}
}
categories.add(category);
}
}
} catch (IOException e) {
e.printStackTrace();
}
return categories;
}
示例7: getHtmlByClass
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static String getHtmlByClass(String url,String eleName){
String ret = "";
try {
Document doc = Jsoup.connect(url).post();
Elements els = doc.getElementsByClass(eleName);
ret = els.toString();
} catch (IOException e) {
e.printStackTrace();
}
return ret;
}
示例8: takeClickRank
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 点击排行
*
* @param str
*/
public List<IHistoryClickRank> takeClickRank(String str) {
Document document = Jsoup.parse(str);
List<IHistoryClickRank> clickRankList = new ArrayList<>();
Elements elements = document.getElementsByClass("click");
Elements element = elements.get(0).getElementsByTag("li");
for (Element element2 : element) {
IHistoryClickRank clickRank = new IHistoryClickRank();
clickRank.setTitle(element2.getElementsByTag("a").text());// title
clickRank.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));// href
clickRankList.add(clickRank);
}
return clickRankList;
}
示例9: takeProposeRead
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 推荐阅读
*
* @param str
*/
public List<IHistoryOldPhoto> takeProposeRead(String str) {
Document document = Jsoup.parse(str);
List<IHistoryOldPhoto> proposeReads = new ArrayList<>();
Elements elements = document.getElementsByClass("oldpic");
Elements element = elements.get(0).getElementsByTag("li");
for (Element element2 : element) {
IHistoryOldPhoto proposeRead = new IHistoryOldPhoto();
proposeRead.setTitle(element2.getElementsByTag("img").attr("title"));//title
proposeRead.setHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("a").attr("href"));//href
proposeRead.setImgHref(AppUtils.Constants.URL_ILISHI + element2.getElementsByTag("img").attr("src"));//imgHref
proposeReads.add(proposeRead);
}
return proposeReads;
}
示例10: convertBrToDoc
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* get structure value with scriptDef
* @param br
* @param list
* @return
*/
public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
String line;
StringBuffer buf = new StringBuffer();
ArrayList<String> retList = new ArrayList<String>();
try {
while((line = br.readLine())!=null){
buf.append(line);
buf.append("\r\n");
}
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Document doc = Jsoup.parse(buf.toString());
Elements eList = doc.getAllElements();
int size = list.size();
for(int i = 0 ; i < size ; i++){
ElementDEF eDef = list.get(i);
Elements eleList = null;
if(eDef.ElementType.equals("CLASS")){
eleList = doc.getElementsByClass(eDef.ElementValue);
} else if(eDef.ElementType.equals("TAG")){
eleList = doc.getElementsByTag(eDef.ElementValue);
}
if(eleList != null){
if(eDef.ElementValueType.equals("html")){
retList.add(eDef.ElementName+"\t"+eleList.toString());
} else if (eDef.ElementValueType.equals("text")){
retList.add(eDef.ElementName+"\t"+eleList.text());
}
}
}
return retList;
}
示例11: findNewIsinFromHTML
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static String findNewIsinFromHTML(Document doc) throws IOException, ComparisonException {
Elements fundData = doc.getElementsByClass("fund-block");
if (fundData == null || fundData.isEmpty()) throw new SourceHTMLChangedException();
Element isinData = fundData.first().select("p").first();
if (isinData == null) throw new SourceHTMLChangedException();
String[] isinDataAttributes = isinData.text().split(" ");
if (isinDataAttributes.length != 2) throw new SourceHTMLChangedException();
String isin = isinDataAttributes[1];
if (!isinCodeValid(isin)) throw new IsinNotFoundException("Invalid ISIN format scraped from source page");
return isin;
}
示例12: crawlQuoraPage
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private void crawlQuoraPage(String url, boolean crawlRelated) {
// log("crawling for Quora url : " + url);
uniqueQuoraLinks.add(url);
Document doc = getDocument(url);
if (doc == null) {
// log("document is null for quora url: " + url);
return;
}
Elements statsElements = doc.select("div.QuestionStats");
for (Element statsElement : statsElements) {
System.out.print(count++ + "\t" + url);
Elements statsElementValues = statsElement.getElementsByTag("strong");
int i = 1;
for (Element statsElementValue : statsElementValues) {
if (i > 2) {
break;
}
System.out.print("\t" + statsElementValue.getElementsByTag("strong").text());
i++;
}
}
Elements activityElements = doc.getElementsByClass("QuestionLastActivityTime");
if (activityElements.size() > 0) {
for (Element activityElement : activityElements) {
String lastAsked = StringUtils.remove(activityElement.text(), "Last asked: ");
if (!lastAsked.contains("201")) {
lastAsked = lastAsked + ", 2016";
}
System.out.println("\t" + lastAsked);
}
} else {
System.out.println();
}
if (crawlRelated) {
// log("crawlRelated is true for : " + url);
Elements relatedQuestions = doc.getElementsByClass("question_related");
List<Element> relatedQuestionList = new ArrayList<>();
for (Element relatedQuestion : relatedQuestions) {
Elements relatedQuestionHrefs = relatedQuestion.select("a[href]");
for (Element element : relatedQuestionHrefs) {
relatedQuestionList.add(element);
}
}
relatedQuestionList = Utils.getSublist(relatedQuestionList, QuoraConstants.numReLatedQuestion);
for (Element relatedQuestionHref : relatedQuestionList) {
String relatedUrl = relatedQuestionHref.absUrl("href");
if (!uniqueQuoraLinks.contains(relatedUrl)) {
crawlQuoraPage(relatedUrl, false);
}
}
}
}