本文整理汇总了Java中com.gargoylesoftware.htmlunit.html.HtmlAnchor.getHrefAttribute方法的典型用法代码示例。如果您正苦于以下问题:Java HtmlAnchor.getHrefAttribute方法的具体用法?Java HtmlAnchor.getHrefAttribute怎么用?Java HtmlAnchor.getHrefAttribute使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.gargoylesoftware.htmlunit.html.HtmlAnchor
的用法示例。
在下文中一共展示了HtmlAnchor.getHrefAttribute方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getAllTvStationOfCity
import com.gargoylesoftware.htmlunit.html.HtmlAnchor; //导入方法依赖的package包/类
/**
* 抓取指定城市下的所有电视台
*
* @param htmlPage
* @param city
* @return
*/
private List<TvStation> getAllTvStationOfCity(HtmlPage htmlPage, String city) {
List<TvStation> resultList = new ArrayList<TvStation>();
List<?> elements = htmlPage
.getByXPath("//div[@class='chlsnav']//div[@class='plst']/parent::*");
for (int i = 0, size = elements == null ? 0 : elements.size(); i < size; i++) {
try {
HtmlAnchor anchor = (HtmlAnchor) elements.get(i);
String href = anchor.getHrefAttribute();
if (!href.startsWith("/program/")) {
continue;
}
logger.debug(anchor.getTextContent()
+ " program table of tvmao: " + ", url: " + href);
TimeUnit.MILLISECONDS.sleep(getRandomSleepTime());
HtmlPage p = (HtmlPage) WebCrawler.crawl(TV_MAO_URL_PREFIX
+ href);
resultList.addAll(getTvStations(p, city));
} catch (Exception e) {
logger.error("error occur while get all tv station of city: "
+ city, e);
continue;
}
}
return resultList;
}
示例2: create
import com.gargoylesoftware.htmlunit.html.HtmlAnchor; //导入方法依赖的package包/类
/**
* Starts the CSV link creation process.
*
* @throws Exception If there was an error while getting the CSV specific information from the forum thread.
*/
public void create() throws Exception{
java.util.logging.Logger.getLogger("com.gargoylesoftware").setLevel(java.util.logging.Level.OFF);
WebClient webClient = new WebClient(BrowserVersion.CHROME);
HtmlPage page = (HtmlPage) webClient.getPage(forumThreadLink);
webClient.waitForBackgroundJavaScriptStartingBefore(waitForJavaScript);
HtmlAnchor anchor = (HtmlAnchor) page.getByXPath(xPath).get(0);
csvFileLink = anchor.getHrefAttribute();
csvFileLink = "http://www.pathofexile.com" + csvFileLink.subSequence(0, csvFileLink.length()-1);
webClient.close();
}
示例3: crawlProgramTable
import com.gargoylesoftware.htmlunit.html.HtmlAnchor; //导入方法依赖的package包/类
private List<ProgramTable> crawlProgramTable(TvMaoCrawlTask task) {
TvStation station = task.tvStation;
String queryDate = task.date;
String stationName = station.getName();
logger.info("crawl program table of " + stationName + " at "
+ queryDate);
HtmlPage htmlPage = (HtmlPage) WebCrawler.crawl(TV_MAO_URL);
try {
htmlPage = searchStation(htmlPage, station);
} catch (Exception e) {
logger.error("error occur while search station: " + stationName, e);
return null;
}
if (htmlPage == null) {
logger.debug("cannot get station data from " + TV_MAO_URL + " of "
+ stationName);
return null;
}
if (!queryDate.equals(DateUtils.today())) {
Set<String> availableQueryDate = new HashSet<String>();
String[] dates = DateUtils.getWeek(new Date(), "yyyy-MM-dd");
for (String d : dates) {
availableQueryDate.add(d);
}
if (availableQueryDate.contains(queryDate)) {
List<?> dateElements = htmlPage
.getByXPath("//div[@class='pgnav_wrap']//div[@class='epghdc lt']//dl[@class='commtab clear']/dd/a");
for (int i = 0, size = dateElements == null ? 0 : dateElements
.size(); i < size; i++) {
HtmlAnchor anchor = (HtmlAnchor) dateElements.get(i);
String value = anchor.getTextContent().trim();
if (value.endsWith(")")
&& queryDate.equals(Calendar.getInstance().get(
Calendar.YEAR)
+ "-"
+ value.substring(2, value.length() - 1))) {
String href = anchor.getHrefAttribute();
htmlPage = (HtmlPage) WebCrawler
.crawl(TV_MAO_URL_PREFIX + href);
break;
}
}
}
}
String html = htmlPage.asXml();
List<ProgramTable> ptList = parseProgramTable(html);
MyTvUtils.outputCrawlData(queryDate, html, queryDate
+ Constant.UNDERLINE + getCrawlerName() + Constant.UNDERLINE
+ stationName);
for (CrawlEventListener listener : listeners) {
listener.crawlEnd(new ProgramTableCrawlEndEvent(this, ptList,
station.getName(), queryDate));
}
return ptList;
}