本文整理汇总了Java中us.codecraft.xsoup.Xsoup类的典型用法代码示例。如果您正苦于以下问题:Java Xsoup类的具体用法?Java Xsoup怎么用?Java Xsoup使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Xsoup类属于us.codecraft.xsoup包,在下文中一共展示了Xsoup类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getByXpath
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public static List<List<String>> getByXpath(Document doc, List<String> xpath, int sleepTime){
List<List<String>> res = new ArrayList<List<String>>();
for (int i = 0; i < xpath.size(); i++) {
// res.add(Xsoup.select(doc,xpath.get(i)).list().toString());
List<String> t = Xsoup.select(doc,xpath.get(i)).list();
res.add(t);
// Object[] get =doc.select(xpath.get(i)).toArray();
// if(get.length==1)
// res.add((String)get[0]);
// else res.add(get.toString());
}
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();logger.error("Exception",e);
}
return res;
}
示例2: process
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Override
public Result<Collection<Task>> process(Task task, Page page) throws ProcessException {
try {
Collection<Task> tasks = new LinkedHashSet<>();
List resultData = new ArrayList();
Document document = parse(page);
handlerChain.forEach((xpath, handler) -> {
List<String> links = Xsoup.compile(xpath).evaluate(document).list();
Collection<Task> newTasks = handler.handle(task, links, resultData);
if (newTasks != null) {
tasks.addAll(newTasks);
}
});
tasks.removeIf(filter.negate());
Result<Collection<Task>> result = new Result<>(tasks, resultData);
result.setPage(page);
return result;
} catch (Throwable throwable) {
throw new ProcessException(throwable.getMessage(), throwable);
}
}
示例3: getRecordFromTr
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
private Record getRecordFromTr(Element tr) {
Record record = new Record();
Elements tds = Xsoup.select(tr, "//td").getElements();
List<Float> fields = new ArrayList<Float>();
for (int idx = 0; idx < tds.size(); ++idx) {
switch (idx) {
case 0:
record.setCode(tds.get(idx).getElementsByTag("a").text());
break;
case 1:
case 17:
break;
default:
fields.add(TextProcess.getFloat(tds.get(idx).text()));
break;
}
}
record.setFields(fields);
return record;
}
示例4: getSeedCodes
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
private List<String> getSeedCodes(String url) {
HttpResponse response = PageCrawler.crawl(url, null, 5000, 10000, true, 10);
List<String> seeds = new ArrayList<String>();
String content = new String(response.getContent());
Elements elements = Xsoup.select(content, "//tbody[@id='datalist']//td/a").getElements();
Pattern p = Pattern.compile("(\\d{6})");
int count = 0;
for (Element element : elements) {
Matcher m = p.matcher(element.text());
if (m.find()) {
seeds.add(m.group(1));
logger.info("[{}] get [{}] ", url, m.group(1));
count++;
}
}
logger.info("[{}] get total {} codes", url, seeds.size());
return seeds;
}
示例5: testByAttribute
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testByAttribute() throws XPathExpressionException {
org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));
assertThat(getNodeValue(document, "//a[@href]")).isEqualTo("<a href=\"https://github.com\">github.com</a>");
assertThat(getNodeValue(document, "//a[@id]")).isNull();
String expectedDiv = "<div id=\"test\">\n" +
" aaa\n" +
" <div>\n" +
" <a href=\"https://github.com\">github.com</a>\n" +
" </div>\n" +
"</div>";
//TODO: illegal
//assertThat(getNodeValue(document,"//div[@id=test]")).isEqualTo(expectedDiv);
assertThat(getNodeValue(document, "//div[@id='test']")).isEqualTo(expectedDiv);
assertThat(getNodeValue(document, "//div[@id=\"test\"]")).isEqualTo(expectedDiv);
}
示例6: testNth
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testNth() throws XPathExpressionException {
org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(htmlClass));
assertThat(getNodeValue(document, "//body/div[1]")).isEqualTo("<div class=\"a b c\">\n" +
" <div>\n" +
" <a href=\"https://github.com\">github.com</a>\n" +
" </div>\n" +
"</div>");
assertThat(getNodeValue(document, "//body/div[2]")).isEqualTo("<div>\n" +
" b\n" +
"</div>");
String htmlSVG = "<div><svg>1</svg><svg>2</svg></div>";
document = Xsoup.convertDocument(Jsoup.parse(htmlSVG));
assertThat(getNodeValue(document, "//div/svg[1]")).isEqualTo("<svg>\n" +
" 1\n" +
"</svg>");
assertThat(getNodeValue(document, "//div/svg[2]")).isEqualTo("<svg>\n" +
" 2\n" +
"</svg>");
}
示例7: analysis
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Override
public List<String> analysis(String url) throws Exception {
List<String> relist = null;
try {
String node = this.fetch(site, url);
// System.out.println(node);
String downUrl = Xsoup
.select(node,
"//DIV[@class='opratebar2']/A[@class='opratebar-download']/@href")
.get();
relist = new ArrayList<String>();
relist.add(downUrl);
} catch (Exception e) {
logger.error("error when get real url at OpenCourse", url);
}
return relist;
}
示例8: main
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
/**
* @param args
*/
public static void main(String[] args) {
String url = "http://v.163.com/special/opencourse/russian.html";
Open163Test op = new Open163Test();
String document = op.fetch(opensourceSite, url);
// System.out.println(document);
Long start = new Date().getTime();
for (int i1 = 0; i1 < 1; i1++) {
List<String> urlNodes = Xsoup
.compile(
"//TABLE[@id='list2']/TBODY/TR/TD[@class='u-ctitle']")
.evaluate(document).list();
for (int i = 0; i < urlNodes.size(); i++) {
HashMap<String, String> map = new HashMap<String, String>();
String hrefNode = Xsoup.compile("//A/@href")
.evaluate(urlNodes.get(i)).get();
String titleNode = Xsoup.compile("//A/text()")
.evaluate(urlNodes.get(i)).get();
}
}
Long end = new Date().getTime();
System.out.println(end - start);
}
示例9: processHtml
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public List<Record> processHtml(String html) {
Elements eles = Xsoup.select(html, "//tbody/tr").getElements();
ArrayList<Record> records = new ArrayList<>();
for (Element ele : eles) {
Record record = getRecordFromTr(ele);
if (record != null) {
records.add(record);
}
}
return records;
}
示例10: verifyLocator
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
/**
* verify an element locator as unique
* @param e
* @param locator
* @return
* @throws Exception
*/
private static String verifyLocator( Element e, String locator) throws Exception {
Element rootElement = e.parents().last();
if(!locator.startsWith("//")) {
Elements selected = rootElement.select(locator);
if (selected.size() == 1) {
if (!uniqueLocators.containsKey(e)) {
uniqueLocators.put(e, locator);
}
return locator + " UNIQUE = "+selected.first();
} else if (selected.size() > 1) {
return locator + " NON-UNIQUE = "+selected;
} else {
return locator +" NOT FOUND - PROBLEM";
}
} else if(locator.startsWith("//")) { //xpath
XElements elements = Xsoup.select(rootElement, locator);
if (elements.getElements().size() > 1) {
return locator + " NON-UNIQUE!!! ";
} else if (elements.getElements().size() == 0) {
return locator +" NOT FOUND - PROBLEM";
}
if (!uniqueLocators.containsKey(e)) {
uniqueLocators.put(e, locator);
}
return locator + " UNIQUE = "+ elements.getElements().get(0);
}
return locator + " XPATH?";
}
示例11: testSelect
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testSelect() throws XPathExpressionException {
String html = "<html><div><a href='https://github.com'>github.com</a></div>" +
"<table><tr><td>a</td><td>b</td></tr></table></html>";
org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));
assertThat(getStringValue(document, "//div/a/@href")).isEqualTo("https://github.com");
List<String> nodeListValue = getNodeListValue(document, "//tr/td");
assertThat(nodeListValue.get(0)).isEqualTo("<td>a</td>");
assertThat(nodeListValue.get(1)).isEqualTo("<td>b</td>");
}
示例12: testContains
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
@Test
public void testContains() throws XPathExpressionException {
org.w3c.dom.Document document = Xsoup.convertDocument(Jsoup.parse(html));
assertThat(getNodeValue(document,"//div[contains(@id,'te')]")).isEqualTo("<div id=\"test\">\n" +
" aaa\n" +
" <div>\n" +
" <a href=\"https://github.com\">github.com</a>\n" +
" </div>\n" +
"</div>");
}
示例13: XpathSelector
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public XpathSelector(String xpathStr) {
this.xPathEvaluator = Xsoup.compile(xpathStr);
}
示例14: single
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public String single(String xpathStr) {
XPathEvaluator xPathEvaluator = Xsoup.compile(xpathStr);
return xPathEvaluator.evaluate(doc).get();
}
示例15: list
import us.codecraft.xsoup.Xsoup; //导入依赖的package包/类
public List<String> list(String xpathStr) {
XPathEvaluator xPathEvaluator = Xsoup.compile(xpathStr);
return xPathEvaluator.evaluate(doc).list();
}