当前位置: 首页>>代码示例>>Java>>正文


Java Xsoup类代码示例

本文整理汇总了Java中us.codecraft.xsoup.Xsoup的典型用法代码示例。如果您正苦于以下问题:Java Xsoup类的具体用法?Java Xsoup怎么用?Java Xsoup使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Xsoup类属于us.codecraft.xsoup包,在下文中一共展示了Xsoup类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getByXpath

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public static List<List<String>> getByXpath(Document doc, List<String> xpath, int sleepTime){
		List<List<String>> res = new ArrayList<List<String>>();
		for (int i = 0; i < xpath.size(); i++) {
//			res.add(Xsoup.select(doc,xpath.get(i)).list().toString());
			List<String> t = Xsoup.select(doc,xpath.get(i)).list();
				res.add(t);
//			Object[] get =doc.select(xpath.get(i)).toArray();
//			if(get.length==1)
//				res.add((String)get[0]);
//			else res.add(get.toString());
		}
		try {
			Thread.sleep(sleepTime);
		} catch (InterruptedException e) {
			// TODO Auto-generated catch block
e.printStackTrace();logger.error("Exception",e);
		}
		return res;
	}
 
开发者ID:zrtzrt,项目名称:CrawlerSYS,代码行数:20,代码来源:WebCrawler.java

示例2: process

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Override
public Result<Collection<Task>> process(Task task, Page page) throws ProcessException {
    try {
        Collection<Task> tasks = new LinkedHashSet<>();
        List resultData = new ArrayList();
        Document document = parse(page);
        handlerChain.forEach((xpath, handler) -> {
            List<String> links = Xsoup.compile(xpath).evaluate(document).list();
            Collection<Task> newTasks = handler.handle(task, links, resultData);
            if (newTasks != null) {
                tasks.addAll(newTasks);
            }
        });
        tasks.removeIf(filter.negate());
        Result<Collection<Task>> result = new Result<>(tasks, resultData);
        result.setPage(page);
        return result;
    } catch (Throwable throwable) {
        throw new ProcessException(throwable.getMessage(), throwable);
    }
}
 
开发者ID:ZhangJiupeng,项目名称:Gospy,代码行数:22,代码来源:XPathProcessor.java

示例3: getRecordFromTr

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
private Record getRecordFromTr(Element tr) {
    Record record = new Record();
    Elements tds = Xsoup.select(tr, "//td").getElements();
    List<Float> fields = new ArrayList<Float>();
    for (int idx = 0; idx < tds.size(); ++idx) {
        switch (idx) {
            case 0:
                record.setCode(tds.get(idx).getElementsByTag("a").text());
                break;
            case 1:
            case 17:
                break;
            default:
                fields.add(TextProcess.getFloat(tds.get(idx).text()));
                break;
        }
    }
    record.setFields(fields);
    return record;
}
 
开发者ID:douglaswei,项目名称:stock,代码行数:21,代码来源:PageProcessor.java

示例4: getSeedCodes

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
private List<String> getSeedCodes(String url) {
    HttpResponse response = PageCrawler.crawl(url, null, 5000, 10000, true, 10);
    List<String> seeds = new ArrayList<String>();
    String content = new String(response.getContent());
    Elements elements = Xsoup.select(content, "//tbody[@id='datalist']//td/a").getElements();
    Pattern p = Pattern.compile("(\\d{6})");
    int count = 0;
    for (Element element : elements) {
        Matcher m = p.matcher(element.text());
        if (m.find()) {
            seeds.add(m.group(1));
            logger.info("[{}] get [{}] ", url, m.group(1));
            count++;
        }
    }
    logger.info("[{}] get total {} codes", url, seeds.size());
    return seeds;
}
 
开发者ID:douglaswei,项目名称:stock,代码行数:19,代码来源:StringGeneratorOnline.java

示例5: testByAttribute

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testByAttribute() throws XPathExpressionException {

    org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));

    assertThat(getNodeValue(document, "//a[@href]")).isEqualTo("<a href=\"https://github.com\">github.com</a>");

    assertThat(getNodeValue(document, "//a[@id]")).isNull();

    String expectedDiv = "<div id=\"test\">\n" +
            " aaa\n" +
            " <div>\n" +
            "  <a href=\"https://github.com\">github.com</a>\n" +
            " </div>\n" +
            "</div>";


    //TODO: illegal
    //assertThat(getNodeValue(document,"//div[@id=test]")).isEqualTo(expectedDiv);

    assertThat(getNodeValue(document, "//div[@id='test']")).isEqualTo(expectedDiv);
    assertThat(getNodeValue(document, "//div[@id=\"test\"]")).isEqualTo(expectedDiv);
}
 
开发者ID:code4craft,项目名称:xsoup,代码行数:24,代码来源:W3cEvaluatorTest.java

示例6: testNth

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testNth() throws XPathExpressionException {

    org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(htmlClass));

    assertThat(getNodeValue(document, "//body/div[1]")).isEqualTo("<div class=\"a b c\">\n" +
            " <div>\n" +
            "  <a href=\"https://github.com\">github.com</a>\n" +
            " </div>\n" +
            "</div>");

    assertThat(getNodeValue(document, "//body/div[2]")).isEqualTo("<div>\n" +
            " b\n" +
            "</div>");

    String htmlSVG = "<div><svg>1</svg><svg>2</svg></div>";

    document = Xsoup.convertDocument(Jsoup.parse(htmlSVG));
    assertThat(getNodeValue(document, "//div/svg[1]")).isEqualTo("<svg>\n" +
            " 1\n" +
            "</svg>");
    assertThat(getNodeValue(document, "//div/svg[2]")).isEqualTo("<svg>\n" +
            " 2\n" +
            "</svg>");
}
 
开发者ID:code4craft,项目名称:xsoup,代码行数:26,代码来源:W3cEvaluatorTest.java

示例7: analysis

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Override
public List<String> analysis(String url) throws Exception {
	List<String> relist = null;
	try {

		String node = this.fetch(site, url);
		// System.out.println(node);
		String downUrl = Xsoup
				.select(node,
						"//DIV[@class='opratebar2']/A[@class='opratebar-download']/@href")
				.get();

		relist = new ArrayList<String>();
		relist.add(downUrl);
	} catch (Exception e) {
		logger.error("error when get real url at OpenCourse", url);
	}
	return relist;
}
 
开发者ID:caorong,项目名称:MediaCrawler,代码行数:20,代码来源:OpenCourseAnalyzer.java

示例8: main

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
/**
 * @param args
 */
public static void main(String[] args) {
	String url = "http://v.163.com/special/opencourse/russian.html";
	Open163Test op = new Open163Test();
	String document = op.fetch(opensourceSite, url);
	// System.out.println(document);

	Long start = new Date().getTime();
	for (int i1 = 0; i1 < 1; i1++) {
		List<String> urlNodes = Xsoup
				.compile(
						"//TABLE[@id='list2']/TBODY/TR/TD[@class='u-ctitle']")
				.evaluate(document).list();
		for (int i = 0; i < urlNodes.size(); i++) {
			HashMap<String, String> map = new HashMap<String, String>();

			String hrefNode = Xsoup.compile("//A/@href")
					.evaluate(urlNodes.get(i)).get();
			String titleNode = Xsoup.compile("//A/text()")
					.evaluate(urlNodes.get(i)).get();
		}
	}
	Long end = new Date().getTime();
	System.out.println(end - start);
}
 
开发者ID:caorong,项目名称:MediaCrawler,代码行数:28,代码来源:Open163Test.java

示例9: processHtml

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public List<Record> processHtml(String html) {
    Elements eles = Xsoup.select(html, "//tbody/tr").getElements();
    ArrayList<Record> records = new ArrayList<>();
    for (Element ele : eles) {
        Record record = getRecordFromTr(ele);
        if (record != null) {
            records.add(record);
        }
    }
    return records;
}
 
开发者ID:douglaswei,项目名称:stock,代码行数:12,代码来源:GeguPageProcess.java

示例10: verifyLocator

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
/**
 * verify an element locator as unique
 * @param e
 * @param locator
 * @return
 * @throws Exception 
 */
private static String verifyLocator( Element e, String locator) throws Exception {
	Element rootElement = e.parents().last();
	if(!locator.startsWith("//")) {
		Elements selected = rootElement.select(locator);
		if (selected.size() == 1) {
			if (!uniqueLocators.containsKey(e)) {
				uniqueLocators.put(e, locator);
			}
			return locator + " UNIQUE = "+selected.first();
		} else if (selected.size() > 1) {
			return locator + " NON-UNIQUE = "+selected;
		} else {
			return locator +" NOT FOUND - PROBLEM";
		}
	} else if(locator.startsWith("//")) { //xpath 
	    XElements elements = Xsoup.select(rootElement, locator);
	    if (elements.getElements().size() > 1) {
	    	return locator + " NON-UNIQUE!!! ";
	    } else if (elements.getElements().size() == 0) {
	    	return locator +" NOT FOUND - PROBLEM";
	    }
	    if (!uniqueLocators.containsKey(e)) {
			uniqueLocators.put(e, locator);
		}
	    return locator + " UNIQUE = "+ elements.getElements().get(0);
	    
	}
	
	return locator + " XPATH?";
}
 
开发者ID:persado,项目名称:stevia,代码行数:38,代码来源:SelectorFetcher.java

示例11: testSelect

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testSelect() throws XPathExpressionException {

    String html = "<html><div><a href='https://github.com'>github.com</a></div>" +
            "<table><tr><td>a</td><td>b</td></tr></table></html>";

    org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));

    assertThat(getStringValue(document, "//div/a/@href")).isEqualTo("https://github.com");

    List<String> nodeListValue = getNodeListValue(document, "//tr/td");
    assertThat(nodeListValue.get(0)).isEqualTo("<td>a</td>");
    assertThat(nodeListValue.get(1)).isEqualTo("<td>b</td>");
}
 
开发者ID:code4craft,项目名称:xsoup,代码行数:15,代码来源:W3cEvaluatorTest.java

示例12: testContains

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testContains() throws XPathExpressionException {

    org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));

    assertThat(getNodeValue(document,"//div[contains(@id,'te')]")).isEqualTo("<div id=\"test\">\n" +
            " aaa\n" +
            " <div>\n" +
            "  <a href=\"https://github.com\">github.com</a>\n" +
            " </div>\n" +
            "</div>");

}
 
开发者ID:code4craft,项目名称:xsoup,代码行数:14,代码来源:W3cEvaluatorTest.java

示例13: XpathSelector

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public XpathSelector(String xpathStr) {
    this.xPathEvaluator = Xsoup.compile(xpathStr);
}
 
开发者ID:fengzhizi715,项目名称:NetDiscovery,代码行数:4,代码来源:XpathSelector.java

示例14: single

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public String single(String xpathStr) {
	XPathEvaluator xPathEvaluator = Xsoup.compile(xpathStr);
    return xPathEvaluator.evaluate(doc).get();
}
 
开发者ID:xbynet,项目名称:crawler,代码行数:5,代码来源:XpathParser.java

示例15: list

import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public List<String> list(String xpathStr) {
	XPathEvaluator xPathEvaluator = Xsoup.compile(xpathStr);
    return xPathEvaluator.evaluate(doc).list();
}
 
开发者ID:xbynet,项目名称:crawler,代码行数:5,代码来源:XpathParser.java


注:本文中的us.codecraft.xsoup.Xsoup类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。