本文整理汇总了Java中org.htmlparser.Node.toPlainTextString方法的典型用法代码示例。如果您正苦于以下问题:Java Node.toPlainTextString方法的具体用法?Java Node.toPlainTextString怎么用?Java Node.toPlainTextString使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlparser.Node
的用法示例。
在下文中一共展示了Node.toPlainTextString方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: processNodeList
import org.htmlparser.Node; //导入方法依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
// 迭代开始
SimpleNodeIterator iterator = list.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
// 得到该节点的子节点列表
NodeList childList = node.getChildren();
// 孩子节点为空,说明是值节点
if (null == childList) {
// 得到值节点的值
String result = node.toPlainTextString();
// 若包含关键字,则简单打印出来文本
if (result.indexOf(keyword) != -1)
System.out.println(result);
} // end if
// 孩子节点不为空,继续迭代该孩子节点
else {
processNodeList(childList, keyword);
}// end else
}// end wile
}
示例2: parsePageInfo
import org.htmlparser.Node; //导入方法依赖的package包/类
/***
* 解析小区的页数
*
* @param url
* @return
* @throws IOException
* @throws ParserException
*/
private int parsePageInfo(final String url) throws IOException, ParserException {
Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));
NodeFilter nodeFilter = new HasAttributeFilter("class", "pagenumber");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
for (Node node : nodeList.toNodeArray()) {
if (!(node instanceof Div)) {
continue;
}
for (Node innerNode : node.getChildren().elementAt(1).getChildren().toNodeArray()) {
if (!(innerNode instanceof TextNode)) {
continue;
}
String pageStr = innerNode.toPlainTextString();
if (!pageStr.contains("/")) {
continue;
}
pageStr = pageStr.substring(pageStr.indexOf("/") + 1);
try {
return Integer.parseInt(pageStr);
} catch (Exception e) {
}
}
}
return 0;
}
示例3: parsePageInfo
import org.htmlparser.Node; //导入方法依赖的package包/类
/**
* 爬取当前楼幢的页数
*
* @return
* @throws InterruptedException
* @throws IOException
* @throws Exception
*/
public int parsePageInfo(String url, DepartmentInfo departmentInfo) throws ParserException, IOException {
Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));
int page = 0;
//解析页数
NodeFilter nodeFilter = new HasAttributeFilter("class", "spagenext");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
if (nodeList.size() == 0) {
return page;
}
for (Node pageNode : nodeList.elementAt(0).getChildren().toNodeArray()) {
if (pageNode instanceof Span) {
try {
String tmp = pageNode.toPlainTextString();
page = Integer.parseInt(tmp.substring(tmp.indexOf("/") + 1, tmp.indexOf("总数") - 1).trim());
break;
} catch (Exception e) {
}
}
}
log.info("get total page [{}] for department:[{}]", page, departmentInfo.toString());
return page;
}
示例4: parseLinkTag
import org.htmlparser.Node; //导入方法依赖的package包/类
/**
* 解析楼幢数
*
* @param nodeList
* @return
*/
private String parseLinkTag(NodeList nodeList) {
for (Node node : nodeList.toNodeArray()) {
if (node instanceof LinkTag) {
return node.toPlainTextString();
}
}
return StringUtils.EMPTY;
}
示例5: parseDiv
import org.htmlparser.Node; //导入方法依赖的package包/类
/**
* 解析房号、状态
*
* @param nodeList
* @return
*/
private String parseDiv(NodeList nodeList) {
for (Node node : nodeList.toNodeArray()) {
if (node instanceof Div) {
return node.toPlainTextString();
}
}
return StringUtils.EMPTY;
}
示例6: list
import org.htmlparser.Node; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "dstlist", results = { @Result(type = "json", params = {
"root", "list" }) })
public String list() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "dstlist" + page;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
try {
StringBuffer retstr = CommonUtil.postWebRequest(RD+"/news.aspx?info_lb=822", ("__EVENTTARGET=_ctl0$ContentPlaceHolder1$Pager22&__EVENTARGUMENT="+page).getBytes("utf-8"), "application/x-www-form-urlencoded");
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
NodeList ls = p
.extractAllNodesThatMatch(new AttributeRegexFilter(
"href", "show\\.aspx\\?.+"));
SimpleNodeIterator i = ls.elements();
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
News news = new News();
String href = tn.getAttribute("href");
news.setId(href);
news.setTitle(tn.toPlainTextString().trim());
Node tmp=tn.getParent().getNextSibling();
while(tmp!=null &&!(tmp instanceof Span))
tmp=tmp.getNextSibling();
if(tmp!=null){
String dtstr=tmp.toPlainTextString();
if(dtstr!=null &&dtstr.length()>2)
news.setPubdate(dtstr.substring(1,dtstr.length()-1));
}
list.add(news);
}
}
c.put(new Element(ckey, list));
} catch (Exception e) {
e.printStackTrace();
}
}
return SUCCESS;
}