本文整理汇总了Java中org.jsoup.nodes.Document.getElementsByTag方法的典型用法代码示例。如果您正苦于以下问题:Java Document.getElementsByTag方法的具体用法?Java Document.getElementsByTag怎么用?Java Document.getElementsByTag使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.jsoup.nodes.Document
的用法示例。
在下文中一共展示了Document.getElementsByTag方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: test6
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
public void test6() throws Exception {
Document document = Jsoup.parse(FileUtils.readFileToString(new File("/Users/eddy/Desktop/content")));
Elements elements = document.getElementsByTag("script");
// String result = elements.stream().filter(e -> e.data().contains("globalRepeatSubmitToken") && e.childNodes().size() > 0)
// .findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);
ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
ScriptEngine engine = scriptEngineManager.getEngineByExtension("js");
// engine.eval(result);
// Object o = engine.get("globalRepeatSubmitToken");
// System.out.println(o);
// ticketInfoForPassengerForm
String ticketInfo = elements.stream().filter(e -> e.data().contains("ticketInfoForPassengerForm") && e.childNodes().size() > 0)
.findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);
ticketInfo = ticketInfo.substring(0, ticketInfo.lastIndexOf("var"));
engine.eval(ticketInfo);
ScriptObjectMirror o2 = (ScriptObjectMirror) engine.get("ticketInfoForPassengerForm");
System.out.println(o2.get("purpose_codes"));
}
示例2: getResourceArrayMap
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
protected Map<String, List<String>> getResourceArrayMap(String tag) {
Map<String, List<String>> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements elements = document.getElementsByTag(tag);
for (Element element : elements) {
Elements items = element.getElementsByTag("item");
List<String> itemsText = new ArrayList<>();
for (Element item : items) {
String text = ((TextNode) item.childNode(0)).text();
itemsText.add(text);
}
String name = element.attr("name");
map.put(name, itemsText);
}
return map;
}
示例3: visit
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void visit(Page page, CrawlDatums venus) {
String imgsrc;
page.getUrl();
Document doc = page.doc();
Elements img = doc.getElementsByTag("img");
download VenusImg = new download();
DetectFace check = new DetectFace();
//�жϴ洢·���Ƿ�����������ھ��½�һ��
File sf=new File("Save");
if(!sf.exists()){
sf.mkdirs();
}
for (Element x : img) {
//ÿһ�θ�ֵ������ѡ����ѡ��DOM��SRC����
imgsrc = (String) x.attr("src");
// ����ȡ������ͼƬ��ַ����download������ʱ�ļ�
String imgname = VenusImg.DownloadS(imgsrc);
//���ļ�����Ϊ�����ж��Ƿ�����������opencv�����������ļ�
if (imgname != null) {
check.Start("Save", imgname);
}else {
System.out.println("ͼƬ��ȱʧ");
}
}
}
示例4: getStringResNameAndValueMap
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 获取strings.xml 资源名-值 映射表
*
* @return
*/
protected Map<String, String> getStringResNameAndValueMap() {
Map<String, String> map = new HashMap<>();
Document document = getValuesXmlDocument();
Elements strings = document.getElementsByTag("string");
for (int i = 0; i < strings.size(); i++) {
Element element = strings.get(i);
String name = element.attr("name");
if (element.childNodeSize() > 0 && element.childNode(0) instanceof TextNode) {
String text = ((TextNode) element.childNode(0)).text();
map.put(name, text);
}
}
return map;
}
示例5: run
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void run() {
Document parsedDocument = null;
try {
parsedDocument = Jsoup.connect(href).timeout(600 * 1000).get();
} catch (IOException e) {
e.printStackTrace();
}
Elements scriptElements = parsedDocument.getElementsByTag("script");
Pattern pattern = Pattern.compile(".*value\":\"(.*)\"\\},\\{\"name.*");
for (Element element : scriptElements) {
for (DataNode node : element.dataNodes()) {
Matcher matcher = pattern.matcher(node.getWholeData().replaceAll("\n", ""));
if (matcher.matches()) {
System.out.println(teamCity + "," + StringUtils.capitalize(teamName) + "," + matcher.group(1).toUpperCase());
}
}
}
Thread.currentThread().interrupt();
}
示例6: appendGames
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private void appendGames(Document document) {
if (document != null) {
Elements scriptElements = document.getElementsByTag("script");
Pattern pattern = Pattern.compile("window.espn.scoreboardData[\\s\t]*= (.*);.*window.espn.scoreboardSettings.*");
for (Element element : scriptElements) {
for (DataNode node : element.dataNodes()) {
if (node.getWholeData().startsWith("window.espn.scoreboardData")) {
Matcher matcher = pattern.matcher(node.getWholeData());
if (matcher.matches()) {
EspnJson espnJson = new Gson().fromJson(matcher.group(1), EspnJson.class);
teamsList.putAll(espnJson.getTeams());
}
}
}
}
}
}
示例7: getRawData
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static String[][] getRawData(Document doc){
String[][] rawData = new String[doc.getElementsByTag("tr").size()][];
int j = 0;
for (Element curr : doc.getElementsByTag("tr")) {
int i = 0;
rawData[j] = new String[curr.getElementsByTag("td").size() - 2];//one of the 2 base cases
for (Element info : curr.getElementsByTag("td")) {
if(info.text().equals("Add")) continue;
if(info.text().matches("\\(\\d+\\)")) continue;//number between parenthesis, the ratings
rawData[j][i] = info.text();
i++;
}
j++;
}
return rawData;
}
示例8: login
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 登陆报工系统
*/
public boolean login() {
HttpPost post = new HttpPost(Api.loginUrl);
List<NameValuePair> params = new ArrayList<NameValuePair>();
params.add(new BasicNameValuePair("username", SessionUtil.getUsername()));
params.add(new BasicNameValuePair("password", SessionUtil.getPassword()));
try {
post.setEntity(new UrlEncodedFormEntity(params, Consts.UTF_8));
HttpResponse resp = client.execute(post);// 登陆
String charset = HttpHeaderUtil.getResponseCharset(resp);
String respHtml = StringUtil.removeEmptyLine(resp.getEntity().getContent(), charset == null ? "utf-8" : charset);
Document doc = Jsoup.parse(respHtml);
Elements titles = doc.getElementsByTag("TITLE");
for (Element title : titles) {
if (title.hasText() && title.text().contains("Success")) {
return true;// 登陆成功
}
}
} catch (Exception e) {
logger.error("登陆失败:", e);
}
return false;
}
示例9: onResponse
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public void onResponse(String response) {
// Hide icon
findViewById(R.id.noNetwork).setVisibility(View.INVISIBLE);
Document doc = Jsoup.parse(response);
// If article was loaded from an external App, no image was passed from MainActivity,
// so it must be fetched in the Collapsing Toolbar
if (Intent.ACTION_VIEW.equals(getIntent().getAction())) {
Elements image = doc.select("meta[property=og:image]");
if (atLeastOneChild(image)) {
Picasso.with(ArticleActivity.this)
.load(image.first().attr("content"))
.into((ImageView) findViewById(R.id.imageArticle));
}
}
// Article is from a hosted blog
List<Model> items;
Element content = doc.getElementById("content");
if (content != null) {
items = extractBlogArticle(content);
setTagInHeader(R.string.blog_article, R.color.accent_complementary, Color.WHITE);
} else {
Elements category = doc.select("div.tt_rubrique_ombrelle");
if (atLeastOneChild(category)) {
Log.d(TAG, "Cat: " + category.text());
setTitle(category.text());
}
Elements articles = doc.getElementsByTag("article");
Element largeFormat = doc.getElementById("hors_format");
if (largeFormat != null) {
items = new ArrayList<>();
setTagInHeader(R.string.large_article, R.color.primary_dark, Color.WHITE);
} else if (articles.isEmpty()) {
// Video
items = extractVideo(doc);
setTagInHeader(R.string.video_article, R.color.accent_complementary, Color.WHITE);
} else {
// Standard article
items = extractStandardArticle(articles);
// Full article is restricted to paid members
if (doc.getElementById("teaser_article") != null) {
if (menu != null) {
MenuItem menuItem = menu.findItem(R.id.action_share);
if (menuItem != null) {
menuItem.setIcon(getResources().getDrawable(R.drawable.ic_share_black));
}
} else {
Log.e(TAG, "menu should not be null at this point!");
}
CollapsingToolbarLayout collapsingToolbar = findViewById(R.id.collapsing_toolbar);
collapsingToolbar.setContentScrimResource(R.color.accent);
setTagInHeader(R.string.paid_article, R.color.accent, Color.BLACK);
if (getSupportActionBar() != null) {
final Drawable upArrow = getResources().getDrawable(R.drawable.ic_arrow_back_black_24dp);
getSupportActionBar().setHomeAsUpIndicator(upArrow);
}
}
// After parsing the article, start a new request for comments
Element react = doc.getElementById("liste_reactions");
if (react != null) {
Elements dataAjURI = react.select("[^data-aj-uri]");
if (atLeastOneChild(dataAjURI)) {
String commentPreviewURI = Constants.BASE_URL2 + dataAjURI.first().attr("data-aj-uri");
REQUEST_QUEUE.add(new StringRequest(Request.Method.GET, commentPreviewURI, commentsReceived, errorResponse));
}
}
}
}
articleAdapter.insertItems(items);
findViewById(R.id.articleLoader).setVisibility(View.GONE);
}
示例10: getTicketInfoForPassengerForm
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private static String getTicketInfoForPassengerForm(String html, String key) throws ScriptException {
Document document = Jsoup.parse(html);
Elements elements = document.getElementsByTag("script");
String jsContent = elements.stream().filter(e -> e.data().contains("ticketInfoForPassengerForm") && e.childNodes().size() > 0)
.findFirst().map(e -> e.childNode(0).outerHtml()).orElse(StringUtils.EMPTY);
jsContent = jsContent.substring(0, jsContent.lastIndexOf("var"));
ScriptEngineManager scriptEngineManager = new ScriptEngineManager();
ScriptEngine engine = scriptEngineManager.getEngineByExtension("js");
engine.eval(jsContent);
ScriptObjectMirror objectMirror = (ScriptObjectMirror) engine.get("ticketInfoForPassengerForm");
return (String) objectMirror.get(key);
}
示例11: collectDetail
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
private TorrentDetail collectDetail(String body) {
TorrentDetail detail = new TorrentDetail();
Map<String, String> info = new HashMap<>();
//获取种子信息
Document doc = Jsoup.parse(body);
if(doc.select("dl").size() < 1) {
return null;
}
Elements dtEles = doc.getElementsByTag("dt");
Elements ddEles = doc.getElementsByTag("dd");
if(dtEles.size() == ddEles.size()) {
for(int i = 0; i < dtEles.size() - 1; i++) {
String key = dtEles.get(i).text().trim();
key = key.substring(0, key.length() - 1);
String value = ddEles.get(i).text().trim();
info.put(key, value);
}
}
detail.setInfo(info);
//获取种子的磁力链接
Element linkEle = doc.select("div.download a").first();
String link = linkEle.attr("href");
detail.setLink(link);
//获取种子的介绍
Element introEle = doc.select("div.nfo pre").first();
String intro = introEle.text();
detail.setIntro(intro);
return detail;
}
示例12: getURLsFromPage
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Override
public List<String> getURLsFromPage(Document page) {
List<String> res = new ArrayList<>(100);
for (Element e : page.getElementsByTag("post")) {
res.add(e.absUrl("file_url") + "#" + e.attr("id"));
}
return res;
}
示例13: getURLs
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
public static List<URL> getURLs(URL url) throws IOException{
Response resp = Http.url(url)
.ignoreContentType()
.response();
Document doc = resp.parse();
List<URL> URLs = new ArrayList<>();
//Pictures
Elements imgs = doc.getElementsByTag("img");
for (Element img : imgs) {
if (img.hasClass("album-image")) {
String imageURL = img.attr("src");
imageURL = "https:" + imageURL;
URLs.add(new URL(imageURL));
}
}
//Videos
Elements vids = doc.getElementsByTag("video");
for (Element vid : vids) {
if (vid.hasClass("album-video")) {
Elements source = vid.getElementsByTag("source");
String videoURL = source.first().attr("src");
URLs.add(new URL(videoURL));
}
}
return URLs;
}
示例14: parseAnything
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
@Test
public void parseAnything() throws IOException{
final String url = "http://www.pcgameshardware.de/Internet-Thema-34041/News/VPN-schuetzt-nicht-1149669";
final Document doc = Jsoup.connect(url)
.userAgent(USER_AGENT)
.timeout(15000)
.get();
final Elements pTags = doc.getElementsByTag("p");
int maxLength = 0;
Element biggestElement = null;
for (final Element pTag : pTags){
if (pTag.text().length() > maxLength && !isIllegalStringInTag(pTag)){
maxLength = pTag.text().length();
biggestElement = pTag;
}
}
final Elements parentElements = biggestElement.parent().getAllElements();
final List<Element> validElements = new ArrayList<>();
for(final Element parentElement : parentElements){
if (!isIllegalStringInTag(parentElement)){
validElements.add(parentElement);
}
}
String mainText = "";
for(final Element validElement : validElements){
mainText += HtmlUtil.removeTags(validElement.text(), false)+"\n";
}
LOGGER.debug(mainText);
}
示例15: findImages
import org.jsoup.nodes.Document; //导入方法依赖的package包/类
/**
* 获取页面上所有图片地址 (<a>标签的href值)
*
* @param html
* @return
*/
public static Set<String> findImages(Document html) {
Elements imgs = html.getElementsByTag("img");
Set<String> images = new HashSet<String>();
if (imgs!=null && imgs.size() > 0) {
for (Element element: imgs) {
String imgSrc = element.attr("abs:src");
images.add(imgSrc);
}
}
return images;
}