本文整理汇总了Java中org.htmlparser.util.NodeList类的典型用法代码示例。如果您正苦于以下问题:Java NodeList类的具体用法?Java NodeList怎么用?Java NodeList使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
NodeList类属于org.htmlparser.util包,在下文中一共展示了NodeList类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parserUrl
import org.htmlparser.util.NodeList; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
NodeFilter hrefNodeFilter = new NodeFilter() {
@Override
public boolean accept(Node node) {
if (node.getText().startsWith("a href=")) {
return true;
} else {
return false;
}
}
};
try {
return parser.extractAllNodesThatMatch(hrefNodeFilter);
} catch (ParserException e) {
e.printStackTrace();
}
return null;
}
示例2: dealTag
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
* 处理标签
* @param tag 所要处理的标签
*/
private void dealTag(Node tag) {
NodeList list = tag.getChildren();
if(list != null) {
NodeIterator nit = list.elements();
try {
while(nit.hasMoreNodes()) {
Node node = nit.nextNode();
parserNode(node); // 递归调用分析结点
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
示例3: assertion
import org.htmlparser.util.NodeList; //导入依赖的package包/类
private Assertion assertion() throws SyntaxError {
NodeList headerColumns = new NodeList();
headerColumns.add( new TableColumn() );
TableHeader tableHeader = new TableHeader();
tableHeader.setChildren( headerColumns );
NodeList rowColumns = new NodeList();
rowColumns.add( new TableColumn() );
TableRow tableRow = new TableRow();
tableRow.setChildren( rowColumns );
NodeList rows = new NodeList();
rows.add( tableHeader );
rows.add( tableRow );
TableTag tableTag = new TableTag();
tableTag.setChildren( rows );
TestPage testPage = new WikiTestPage( new WikiPageDummy());
SlimTestContext testContext = new SlimTestContextImpl( testPage );
Table table = new HtmlTable( tableTag );
List<SlimAssertion> list = new QueryTable( table, "id", testContext ).getAssertions();
return list.get( 0 );
}
示例4: parseDetailInfo
import org.htmlparser.util.NodeList; //导入依赖的package包/类
private Map<String, String> parseDetailInfo(NodeList nodeList) {
Map<String, String> InfoMap = Maps.newHashMap();
if (nodeList.size() == 0) {
return InfoMap;
}
for (Node pageNode : nodeList.elementAt(0).getChildren().toNodeArray()) {
try {
if (pageNode instanceof LinkTag) {
String rawId = ((LinkTag) pageNode).getAttribute("id");
if (StringUtils.isBlank(rawId)) {
continue;
}
if (rawId.contains("all")) {
continue;
}
String id = rawId.substring(rawId.indexOf("_") + 1);
InfoMap.put(id, pageNode.toPlainTextString());
}
} catch (Exception e) {
log.error("parse parseDetailInfo catch Exception:", e);
}
}
return InfoMap;
}
示例5: parseSpan
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/***
* 解析其他
*
* @param nodeList
* @return
*/
private String parseSpan(NodeList nodeList) {
StringBuilder sb = new StringBuilder();
for (Node node : nodeList.toNodeArray()) {
if (node instanceof Div) {
if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
return "0";
}
NodeList spanNodeList = node.getChildren();
for (Node spanNode : spanNodeList.toNodeArray()) {
if (spanNode instanceof Span) {
String attribute = ((Span) spanNode).getAttribute("class");
sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
}
}
}
}
return sb.toString();
}
示例6: processNodeList
import org.htmlparser.util.NodeList; //导入依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
// 迭代开始
SimpleNodeIterator iterator = list.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
// 得到该节点的子节点列表
NodeList childList = node.getChildren();
// 孩子节点为空,说明是值节点
if (null == childList) {
// 得到值节点的值
String result = node.toPlainTextString();
// 若包含关键字,则简单打印出来文本
if (result.indexOf(keyword) != -1)
System.out.println(result);
} // end if
// 孩子节点不为空,继续迭代该孩子节点
else {
processNodeList(childList, keyword);
}// end else
}// end wile
}
示例7: parseMessage
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
* parses the body of the message, and returns a parsed representation
* See {@link http://htmlparser.sourceforge.net/} for details
* @param url the url that the message resulted from
* @param message the Message to parse
* @return a NodeList containing the various Nodes making up the page
*/
public Object parseMessage(HttpUrl url, Message message) {
String contentType = message.getHeader("Content-Type");
if (contentType == null || !contentType.matches("text/html.*")) {
return null;
}
byte[] content = message.getContent();
if (content == null || content.length == 0) {
return null;
}
Parser parser = Parser.createParser(new String(content), null);
try {
NodeList nodelist = parser.extractAllNodesThatMatch(new NodeFilter() {
public boolean accept(Node node) {
return true;
}
});
return nodelist;
} catch (ParserException pe) {
_logger.severe(pe.toString());
return null;
}
}
示例8: analyse
import org.htmlparser.util.NodeList; //导入依赖的package包/类
public void analyse(ConversationID id, Request request, Response response, String origin) {
HttpUrl base = request.getURL();
if (response.getStatus().equals("302")) {
String location = response.getHeader("Location");
if (location != null) {
try {
HttpUrl url = new HttpUrl(base, location);
_model.addUnseenLink(url, base);
} catch (MalformedURLException mue) {
_logger.warning("Badly formed Location header : " + location);
}
} else {
_logger.warning("302 received, but no Location header!");
}
return;
}
Object parsed = Parser.parse(base, response);
if (parsed != null && parsed instanceof NodeList) { // the parsed content is HTML
NodeList nodelist = (NodeList) parsed;
processHtml(base, nodelist);
// recurseHtmlNodes(nodelist, base);
} // else maybe it is a parsed Flash document? Anyone? :-)
}
示例9: getGangliaAttribute
import org.htmlparser.util.NodeList; //导入依赖的package包/类
public List<String> getGangliaAttribute(String clusterName)
throws ParserException, MalformedURLException, IOException {
String url = gangliaMetricUrl.replaceAll(clusterPattern, clusterName);
Parser parser = new Parser(new URL(url).openConnection());
NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
new HasAttributeFilter("id", "metrics-picker"));
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
SimpleNodeIterator iterator = nodeList.elements();
List<String> metricList = new ArrayList<String>();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
SimpleNodeIterator childIterator = node.getChildren().elements();
while (childIterator.hasMoreNodes()) {
OptionTag children = (OptionTag) childIterator.nextNode();
metricList.add(children.getOptionText());
}
}
return metricList;
}
示例10: main
import org.htmlparser.util.NodeList; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Parser parser = new Parser(new URL("http://10.8.75.3/ganglia/?r=hour&cs=&ce=&s=by+name&c=Zookeeper_Cluster&tab=m&vn=&hide-hf=false").openConnection());
NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
new HasAttributeFilter("id", "metrics-picker"));
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
SimpleNodeIterator iterator = nodeList.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
SimpleNodeIterator childIterator = node.getChildren().elements();
while (childIterator.hasMoreNodes()) {
OptionTag children = (OptionTag) childIterator.nextNode();
System.out.println(children.getOptionText());
}
}
}
示例11: splitHtml
import org.htmlparser.util.NodeList; //导入依赖的package包/类
private List<String> splitHtml() {
List<String> resultList = new ArrayList<String>();
try {
Parser parser = Parser.createParser(content, "UTF-8");
NodeList nodeList = parser.parse(null);
resultList = recusiveSplitHtml(nodeList);
StringBuffer lastPageContent = new StringBuffer();
for (TagNode tagNode : tagNodeList) {
if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
lastPageContent.append("<");
lastPageContent.append(tagNode.getText());
lastPageContent.append(">");
}
}
lastPageContent.append(content.substring(startPosition));
Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
NodeList pageContentNodeList = lastPageContentParser.parse(null);
resultList.add(pageContentNodeList.toHtml());
} catch (ParserException e) {
e.printStackTrace();
}
return resultList;
}
示例12: getHtmlRoot
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
* Get HTML root element as node list, save script segments in given map
* collection
*
* @param html
* @param scriptSegs
* script segments will be stored here, a script place holder
* will be saved in node list
* @return
*/
public static NodeList getHtmlRoot(String html,
Map<String, String> scriptSegs) {
Matcher m = ptnScript.matcher(html);
StringBuilder sbHtml = new StringBuilder();
int idx = 0;
while (m.find(idx)) {
++SCRIPT_ID;
sbHtml.append(html.substring(idx, m.start()));
sbHtml.append(m.group(1));
String id = String.format("___SCRIPT_%d_TPIRCS___", SCRIPT_ID);
sbHtml.append(id);
scriptSegs.put(id, m.group(2));
sbHtml.append(m.group(3));
idx = m.end();
}
sbHtml.append(html.substring(idx));
html = sbHtml.toString();
return getHtmlRoot(html);
}
示例13: getValidHtml
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
* Get valid HTML, auto close HTML tags if necessary
*
* @param html
* @return
*/
public static String getValidHtml(String html) {
try {
NodeList nodes = getHtmlRoot(html);
StringBuilder newHtml = new StringBuilder();
for (Node n : nodes.toNodeArray()) {
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
if (tn.isEndTag())
continue;
}
newHtml.append(n.toHtml());
}
return newHtml.toString();
} catch (Exception e) {
// do not through exceptions, just return input html
return html;
}
}
示例14: run
import org.htmlparser.util.NodeList; //导入依赖的package包/类
@Override
public void run() {
try {
parser = new Parser(content);
logger.info(currentThread().getName() + "开始解析Post请求响应的HTML!,并存储到HBASE中!");
NodeIterator rootList = parser.elements();
rootList.nextNode();
NodeList nodeList = rootList.nextNode().getChildren();
// System.out.println("===================="+nodeList.size());
/*
* 判断该HTML响应是否有具体的内容,在出错或者到所有数据读取完毕时起效
* 如果起效,修改endFlag标志位,停止开启新的线程,结束当前任务!
*/
if (nodeList.size() <= 4) {
program.endFlag = true;
}
/*
* 找到对应的tag记录,然后解析
*/
nodeList.remove(0);
nodeList.remove(0);
SimpleNodeIterator childList = nodeList.elements();
while (childList.hasMoreNodes()) {
Node node = childList.nextNode();
if (node.getChildren() != null) {
toObject(node);
}
}
} catch (Exception e) {
logger.error(currentThread().getName() + "解析HTML文件出现异常!\n"+e.getMessage()+"\n");
} finally {
logger.info(currentThread().getName() + "HTML文件解析结束!");
store.close();
}
}
示例15: parsePageInfo
import org.htmlparser.util.NodeList; //导入依赖的package包/类
/***
* 解析小区的页数
*
* @param url
* @return
* @throws IOException
* @throws ParserException
*/
private int parsePageInfo(final String url) throws IOException, ParserException {
Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));
NodeFilter nodeFilter = new HasAttributeFilter("class", "pagenumber");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
for (Node node : nodeList.toNodeArray()) {
if (!(node instanceof Div)) {
continue;
}
for (Node innerNode : node.getChildren().elementAt(1).getChildren().toNodeArray()) {
if (!(innerNode instanceof TextNode)) {
continue;
}
String pageStr = innerNode.toPlainTextString();
if (!pageStr.contains("/")) {
continue;
}
pageStr = pageStr.substring(pageStr.indexOf("/") + 1);
try {
return Integer.parseInt(pageStr);
} catch (Exception e) {
}
}
}
return 0;
}