当前位置: 首页>>代码示例>>Java>>正文


Java Document.getElementsByTag方法代码示例

本文整理汇总了Java中org.jsoup.nodes.Document.getElementsByTag方法的典型用法代码示例。如果您正苦于以下问题:Java Document.getElementsByTag方法的具体用法?Java Document.getElementsByTag怎么用?Java Document.getElementsByTag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.jsoup.nodes.Document的用法示例。


在下文中一共展示了Document.getElementsByTag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: test6

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
    public void test6() throws Exception {
        Document document = Jsoup.parse(FileUtils.readFileToString(new File("/Users/eddy/Desktop/content")));
        Elements elements = document.getElementsByTag("script");
//        String result = elements.stream().filter(e -> e.data().contains("globalRepeatSubmitToken") && e.childNodes().size() > 0)
//                .findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);

        ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
        ScriptEngine engine = scriptEngineManager.getEngineByExtension("js");
//        engine.eval(result);
//        Object o = engine.get("globalRepeatSubmitToken");
//        System.out.println(o);

//        ticketInfoForPassengerForm
        String ticketInfo = elements.stream().filter(e -> e.data().contains("ticketInfoForPassengerForm") && e.childNodes().size() > 0)
                .findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);
        ticketInfo = ticketInfo.substring(0, ticketInfo.lastIndexOf("var"));
        engine.eval(ticketInfo);
        ScriptObjectMirror o2 = (ScriptObjectMirror) engine.get("ticketInfoForPassengerForm");
        System.out.println(o2.get("purpose_codes"));

    }
 
开发者ID:justice-code,项目名称:Thrush,代码行数:23,代码来源:LoginTest.java

示例2: getResourceArrayMap

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
protected Map<String, List<String>> getResourceArrayMap(String tag) {
    Map<String, List<String>> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements elements = document.getElementsByTag(tag);

    for (Element element : elements) {
        Elements items = element.getElementsByTag("item");

        List<String> itemsText = new ArrayList<>();

        for (Element item : items) {
            String text = ((TextNode) item.childNode(0)).text();

            itemsText.add(text);
        }

        String name = element.attr("name");
        map.put(name, itemsText);
    }
    return map;
}
 
开发者ID:kkmike999,项目名称:KBUnitTest,代码行数:23,代码来源:ShadowResources.java

示例3: visit

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void visit(Page page, CrawlDatums venus) {
	String imgsrc;
	page.getUrl();
	Document doc = page.doc();
	Elements img = doc.getElementsByTag("img");
	download VenusImg = new download();
	DetectFace check = new DetectFace();
	//�жϴ洢·���Ƿ�����������ھ��½�һ��
	File sf=new File("Save");
	if(!sf.exists()){  
           sf.mkdirs();  
       }
	for (Element x : img) {
		//ÿһ�θ�ֵ������ѡ����ѡ��DOM��SRC����
		imgsrc = (String) x.attr("src");
		// ����ȡ������ͼƬ��ַ����download������ʱ�ļ�
		String imgname = VenusImg.DownloadS(imgsrc);
		//���ļ�����Ϊ�����ж��Ƿ�����������opencv�����������ļ�
		if (imgname != null) {
			check.Start("Save", imgname);
		}else {
			System.out.println("ͼƬ��ȱʧ");
		}
	}
}
 
开发者ID:DoVenus,项目名称:FaceSpider,代码行数:27,代码来源:webCollector.java

示例4: getStringResNameAndValueMap

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 获取strings.xml 资源名-值 映射表
 *
 * @return
 */
protected Map<String, String> getStringResNameAndValueMap() {
    Map<String, String> map = new HashMap<>();

    Document document = getValuesXmlDocument();
    Elements strings  = document.getElementsByTag("string");

    for (int i = 0; i < strings.size(); i++) {
        Element element = strings.get(i);
        String  name    = element.attr("name");

        if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
            String text = ((TextNode) element.childNode(0)).text();

            map.put(name, text);
        }
    }

    return map;
}
 
开发者ID:kkmike999,项目名称:KBUnitTest,代码行数:25,代码来源:ShadowResources.java

示例5: run

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void run() {
    Document parsedDocument = null;
    try {
        parsedDocument = Jsoup.connect(href).timeout(600 * 1000).get();
    } catch (IOException e) {
        e.printStackTrace();
    }
    Elements scriptElements = parsedDocument.getElementsByTag("script");

    Pattern pattern = Pattern.compile(".*value\":\"(.*)\"\\},\\{\"name.*");
    for (Element element : scriptElements) {
        for (DataNode node : element.dataNodes()) {
            Matcher matcher = pattern.matcher(node.getWholeData().replaceAll("\n", ""));
            if (matcher.matches()) {
                System.out.println(teamCity + "," + StringUtils.capitalize(teamName) + "," + matcher.group(1).toUpperCase());
            }
        }
    }
    Thread.currentThread().interrupt();
}
 
开发者ID:riteshakya037,项目名称:Android-Scrapper,代码行数:22,代码来源:ExampleUnitTest.java

示例6: appendGames

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private void appendGames(Document document) {
    if (document != null) {
        Elements scriptElements = document.getElementsByTag("script");
        Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*");
        for (Element element : scriptElements) {
            for (DataNode node : element.dataNodes()) {
                if (node.getWholeData().startsWith("window.espn.scoreboardData")) {
                    Matcher matcher = pattern.matcher(node.getWholeData());
                    if (matcher.matches()) {
                        EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class);
                        teamsList.putAll(espnJson.getTeams());
                    }
                }
            }
        }
    }
}
 
开发者ID:riteshakya037,项目名称:Android-Scrapper,代码行数:18,代码来源:EspnScoreboardParser.java

示例7: getRawData

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static String[][] getRawData(Document doc){
    String[][] rawData = new String[doc.getElementsByTag("tr").size()][];
    int j = 0;
    for (Element curr : doc.getElementsByTag("tr")) {
        int i = 0;
        rawData[j] = new String[curr.getElementsByTag("td").size() - 2];//one of the 2 base cases
        for (Element info : curr.getElementsByTag("td")) {
            if(info.text().equals("Add")) continue;
            if(info.text().matches("\\(\\d+\\)")) continue;//number between parenthesis, the ratings
            rawData[j][i] = info.text();
            i++;
        }
        j++;
    }
    return rawData;
}
 
开发者ID:asdiamond,项目名称:CodeMineProject1,代码行数:17,代码来源:PartPickerScraper.java

示例8: login

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 登陆报工系统
 */
public boolean login() {
	HttpPost post = new HttpPost(Api.loginUrl);
	List<NameValuePair> params = new ArrayList<NameValuePair>();
	params.add(new BasicNameValuePair("username", SessionUtil.getUsername()));
	params.add(new BasicNameValuePair("password", SessionUtil.getPassword()));
	try {
		post.setEntity(new UrlEncodedFormEntity(params, Consts.UTF_8));
		HttpResponse resp = client.execute(post);// 登陆
		String charset = HttpHeaderUtil.getResponseCharset(resp);
		String respHtml = StringUtil.removeEmptyLine(resp.getEntity().getContent(), charset == null ? "utf-8" : charset);

		Document doc = Jsoup.parse(respHtml);
		Elements titles = doc.getElementsByTag("TITLE");
		for (Element title : titles) {
			if (title.hasText() && title.text().contains("Success")) {
				return true;// 登陆成功
			}
		}
	} catch (Exception e) {
		logger.error("登陆失败:", e);
	}
	return false;
}
 
开发者ID:ichatter,项目名称:dcits-report,代码行数:27,代码来源:UserService.java

示例9: onResponse

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void onResponse(String response) {
    // Hide icon
    findViewById(R.id.noNetwork).setVisibility(View.INVISIBLE);

    Document doc = Jsoup.parse(response);

    // If article was loaded from an external App, no image was passed from MainActivity,
    // so it must be fetched in the Collapsing Toolbar
    if (Intent.ACTION_VIEW.equals(getIntent().getAction())) {
        Elements image = doc.select("meta[property=og:image]");
        if (atLeastOneChild(image)) {
            Picasso.with(ArticleActivity.this)
                    .load(image.first().attr("content"))
                    .into((ImageView) findViewById(R.id.imageArticle));
        }
    }

    // Article is from a hosted blog
    List<Model> items;
    Element content = doc.getElementById("content");
    if (content != null) {
        items = extractBlogArticle(content);
        setTagInHeader(R.string.blog_article, R.color.accent_complementary, Color.WHITE);
    } else {
        Elements category = doc.select("div.tt_rubrique_ombrelle");
        if (atLeastOneChild(category)) {
            Log.d(TAG, "Cat: " + category.text());
            setTitle(category.text());
        }
        Elements articles = doc.getElementsByTag("article");
        Element largeFormat = doc.getElementById("hors_format");
        if (largeFormat != null) {
            items = new ArrayList<>();
            setTagInHeader(R.string.large_article, R.color.primary_dark, Color.WHITE);
        } else if (articles.isEmpty()) {
            // Video
            items = extractVideo(doc);
            setTagInHeader(R.string.video_article, R.color.accent_complementary, Color.WHITE);
        } else {
            // Standard article
            items = extractStandardArticle(articles);
            // Full article is restricted to paid members
            if (doc.getElementById("teaser_article") != null) {
                if (menu != null) {
                    MenuItem menuItem = menu.findItem(R.id.action_share);
                    if (menuItem != null) {
                        menuItem.setIcon(getResources().getDrawable(R.drawable.ic_share_black));
                    }
                } else {
                    Log.e(TAG, "menu should not be null at this point!");
                }

                CollapsingToolbarLayout collapsingToolbar = findViewById(R.id.collapsing_toolbar);
                collapsingToolbar.setContentScrimResource(R.color.accent);
                setTagInHeader(R.string.paid_article, R.color.accent, Color.BLACK);

                if (getSupportActionBar() != null) {
                    final Drawable upArrow = getResources().getDrawable(R.drawable.ic_arrow_back_black_24dp);
                    getSupportActionBar().setHomeAsUpIndicator(upArrow);
                }
            }
            // After parsing the article, start a new request for comments
            Element react = doc.getElementById("liste_reactions");
            if (react != null) {
                Elements dataAjURI = react.select("[^data-aj-uri]");
                if (atLeastOneChild(dataAjURI)) {
                    String commentPreviewURI = Constants.BASE_URL2 + dataAjURI.first().attr("data-aj-uri");
                    REQUEST_QUEUE.add(new StringRequest(Request.Method.GET, commentPreviewURI, commentsReceived, errorResponse));
                }
            }
        }
    }
    articleAdapter.insertItems(items);
    findViewById(R.id.articleLoader).setVisibility(View.GONE);
}
 
开发者ID:MBach,项目名称:LeMondeRssReader,代码行数:77,代码来源:ArticleActivity.java

示例10: getTicketInfoForPassengerForm

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static String getTicketInfoForPassengerForm(String html, String key) throws ScriptException {
    Document document = Jsoup.parse(html);

    Elements elements = document.getElementsByTag("script");
    String jsContent = elements.stream().filter(e -> e.data().contains("ticketInfoForPassengerForm") && e.childNodes().size() > 0)
            .findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);
    jsContent = jsContent.substring(0, jsContent.lastIndexOf("var"));

    ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
    ScriptEngine engine = scriptEngineManager.getEngineByExtension("js");
    engine.eval(jsContent);
    ScriptObjectMirror objectMirror = (ScriptObjectMirror) engine.get("ticketInfoForPassengerForm");
    return (String) objectMirror.get(key);
}
 
开发者ID:justice-code,项目名称:Thrush,代码行数:15,代码来源:TokenUtil.java

示例11: collectDetail

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private TorrentDetail collectDetail(String body) {
    TorrentDetail detail = new TorrentDetail();
    Map<String, String> info = new HashMap<>();

    //获取种子信息
    Document doc = Jsoup.parse(body);

    if(doc.select("dl").size() < 1) {
        return null;
    }

    Elements dtEles = doc.getElementsByTag("dt");
    Elements ddEles = doc.getElementsByTag("dd");
    if(dtEles.size() == ddEles.size()) {
        for(int i = 0; i < dtEles.size() - 1; i++) {
            String key = dtEles.get(i).text().trim();
            key = key.substring(0, key.length() - 1);
            String value = ddEles.get(i).text().trim();
            info.put(key, value);
        }
    }
    detail.setInfo(info);

    //获取种子的磁力链接
    Element linkEle = doc.select("div.download a").first();
    String link = linkEle.attr("href");
    detail.setLink(link);

    //获取种子的介绍
    Element introEle = doc.select("div.nfo pre").first();
    String intro = introEle.text();
    detail.setIntro(intro);

    return detail;
}
 
开发者ID:zhiqing-lee,项目名称:thepiratebayapi,代码行数:36,代码来源:ThePirateBaySpider.java

示例12: getURLsFromPage

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public List<String> getURLsFromPage(Document page) {
    List<String> res = new ArrayList<>(100);
    for (Element e : page.getElementsByTag("post")) {
        res.add(e.absUrl("file_url") + "#" + e.attr("id"));
    }
    return res;
}
 
开发者ID:RipMeApp,项目名称:ripme,代码行数:9,代码来源:XbooruRipper.java

示例13: getURLs

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static List<URL> getURLs(URL url) throws IOException{

        Response resp = Http.url(url)
                            .ignoreContentType()
                            .response();

        Document doc = resp.parse();

        List<URL> URLs = new ArrayList<>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
        for (Element img : imgs) {
            if (img.hasClass("album-image")) {
                String imageURL = img.attr("src");
                imageURL = "https:" + imageURL;
                URLs.add(new URL(imageURL));
            }
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
        for (Element vid : vids) {
            if (vid.hasClass("album-video")) {
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(new URL(videoURL));
            }
        }

        return URLs;
    }
 
开发者ID:RipMeApp,项目名称:ripme,代码行数:31,代码来源:EromeRipper.java

示例14: parseAnything

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
public void parseAnything() throws IOException{

	final String url = "http://www.pcgameshardware.de/Internet-Thema-34041/News/VPN-schuetzt-nicht-1149669";
	
	final Document doc = Jsoup.connect(url)
			.userAgent(USER_AGENT)
			.timeout(15000)
			.get();
	final Elements pTags = doc.getElementsByTag("p");
	int maxLength = 0;
	Element biggestElement = null;
	for (final Element pTag : pTags){
		if (pTag.text().length() > maxLength && !isIllegalStringInTag(pTag)){
			maxLength = pTag.text().length();
			biggestElement = pTag;
		}
	}
			
	final Elements parentElements = biggestElement.parent().getAllElements();
	
		
	final List<Element> validElements = new ArrayList<>();
	
	for(final Element parentElement : parentElements){
		if (!isIllegalStringInTag(parentElement)){
			validElements.add(parentElement);
		}
	}
	
	String mainText = "";
	
	for(final Element validElement : validElements){
		mainText += HtmlUtil.removeTags(validElement.text(), false)+"\n";
	}

	LOGGER.debug(mainText);
}
 
开发者ID:XMBomb,项目名称:InComb,代码行数:39,代码来源:JSoupTest.java

示例15: findImages

import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
 * 获取页面上所有图片地址 (<a>标签的href值)
 *
 * @param html
 * @return
 */
public static Set<String> findImages(Document html) {

    Elements imgs = html.getElementsByTag("img");

    Set<String> images = new HashSet<String>();
    if (imgs!=null && imgs.size() > 0) {
        for (Element element: imgs) {
            String imgSrc = element.attr("abs:src");
            images.add(imgSrc);
        }
    }

    return images;
}
 
开发者ID:xuxueli,项目名称:xxl-crawler,代码行数:21,代码来源:JsoupUtil.java


注:本文中的org.jsoup.nodes.Document.getElementsByTag方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。