当前位置: 首页>>代码示例>>Java>>正文


Java NodeList类代码示例

本文整理汇总了Java中org.htmlparser.util.NodeList的典型用法代码示例。如果您正苦于以下问题:Java NodeList类的具体用法?Java NodeList怎么用?Java NodeList使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


NodeList类属于org.htmlparser.util包,在下文中一共展示了NodeList类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parserUrl

import org.htmlparser.util.NodeList; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
	NodeFilter hrefNodeFilter = new NodeFilter() {
		@Override
		public boolean accept(Node node) {
			if (node.getText().startsWith("a href=")) {
				return true;
			} else {
				return false;
			}
		}
	};
	try {
		return parser.extractAllNodesThatMatch(hrefNodeFilter);
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return null;
}
 
开发者ID:PerkinsZhu,项目名称:WebSprider,代码行数:20,代码来源:HtmlParser01.java

示例2: dealTag

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
 * 处理标签
 * @param tag 所要处理的标签
 */
private void dealTag(Node tag) {		
	NodeList list = tag.getChildren();
	if(list != null) {
		NodeIterator nit = list.elements();
		try {
			while(nit.hasMoreNodes()) {
				Node node = nit.nextNode();
				parserNode(node);	// 递归调用分析结点
			}
		}
		catch(ParserException exc) {
			System.out.println("ParserException");
			//exc.printStackTrace();
		}
	}
}
 
开发者ID:uraplutonium,项目名称:hadoop-distributed-crawler,代码行数:21,代码来源:URLAnalyzer.java

示例3: assertion

import org.htmlparser.util.NodeList; //导入依赖的package包/类
private Assertion assertion() throws SyntaxError {
   NodeList headerColumns = new NodeList();
   headerColumns.add( new TableColumn() );
   TableHeader tableHeader = new TableHeader();
   tableHeader.setChildren( headerColumns );
   NodeList rowColumns = new NodeList();
   rowColumns.add( new TableColumn() );
   TableRow tableRow = new TableRow();
   tableRow.setChildren( rowColumns );
   NodeList rows = new NodeList();
   rows.add( tableHeader );
   rows.add( tableRow );
   TableTag tableTag = new TableTag();
   tableTag.setChildren( rows );
   TestPage testPage = new WikiTestPage( new WikiPageDummy());
   SlimTestContext testContext = new SlimTestContextImpl( testPage );
   Table table = new HtmlTable( tableTag );
   List<SlimAssertion> list = new QueryTable( table, "id", testContext ).getAssertions();
   return list.get( 0 );
}
 
开发者ID:ZsZs,项目名称:FitNesseLauncher,代码行数:21,代码来源:DelegatingResultsListenerTest.java

示例4: parseDetailInfo

import org.htmlparser.util.NodeList; //导入依赖的package包/类
private Map<String, String> parseDetailInfo(NodeList nodeList) {
    Map<String, String> InfoMap = Maps.newHashMap();
    if (nodeList.size() == 0) {
        return InfoMap;
    }
    for (Node pageNode : nodeList.elementAt(0).getChildren().toNodeArray()) {
        try {
            if (pageNode instanceof LinkTag) {
                String rawId = ((LinkTag) pageNode).getAttribute("id");
                if (StringUtils.isBlank(rawId)) {
                    continue;
                }
                if (rawId.contains("all")) {
                    continue;
                }
                String id = rawId.substring(rawId.indexOf("_") + 1);

                InfoMap.put(id, pageNode.toPlainTextString());
            }
        } catch (Exception e) {
            log.error("parse parseDetailInfo catch Exception:", e);
        }
    }
    return InfoMap;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:26,代码来源:HouseParser.java

示例5: parseSpan

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/***
 * 解析其他
 *
 * @param nodeList
 * @return
 */
private String parseSpan(NodeList nodeList) {
    StringBuilder sb = new StringBuilder();
    for (Node node : nodeList.toNodeArray()) {
        if (node instanceof Div) {
            if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
                return "0";
            }
            NodeList spanNodeList = node.getChildren();
            for (Node spanNode : spanNodeList.toNodeArray()) {
                if (spanNode instanceof Span) {
                    String attribute = ((Span) spanNode).getAttribute("class");
                    sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
                }
            }
        }
    }
    return sb.toString();
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:25,代码来源:HouseParser.java

示例6: processNodeList

import org.htmlparser.util.NodeList; //导入依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
	// 迭代开始
	SimpleNodeIterator iterator = list.elements();
	while (iterator.hasMoreNodes()) {
		Node node = iterator.nextNode();
		// 得到该节点的子节点列表
		NodeList childList = node.getChildren();
		// 孩子节点为空,说明是值节点
		if (null == childList) {
			// 得到值节点的值
			String result = node.toPlainTextString();
			// 若包含关键字,则简单打印出来文本
			if (result.indexOf(keyword) != -1)
				System.out.println(result);
		} // end if
			// 孩子节点不为空,继续迭代该孩子节点
		else {
			processNodeList(childList, keyword);
		}// end else
	}// end wile
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:22,代码来源:HtmlParserTest.java

示例7: parseMessage

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
   * parses the body of the message, and returns a parsed representation
   * See {@link http://htmlparser.sourceforge.net/} for details
   * @param url the url that the message resulted from
   * @param message the Message to parse
   * @return a NodeList containing the various Nodes making up the page
   */
  public Object parseMessage(HttpUrl url, Message message) {
      String contentType = message.getHeader("Content-Type");
      if (contentType == null || !contentType.matches("text/html.*")) {
          return null;
      }
      byte[] content = message.getContent();
      if (content == null || content.length == 0) {
          return null;
      }
      Parser parser = Parser.createParser(new String(content), null);
      try {
          NodeList nodelist = parser.extractAllNodesThatMatch(new NodeFilter() {
public boolean accept(Node node) {
                  return true;
              }
          });
          return nodelist;
      } catch (ParserException pe) {
          _logger.severe(pe.toString());
          return null;
      }
  }
 
开发者ID:Neraud,项目名称:PADListener,代码行数:30,代码来源:HTMLParser.java

示例8: analyse

import org.htmlparser.util.NodeList; //导入依赖的package包/类
public void analyse(ConversationID id, Request request, Response response, String origin) {
    HttpUrl base = request.getURL();
    if (response.getStatus().equals("302")) {
        String location = response.getHeader("Location");
        if (location != null) {
            try {
                HttpUrl url = new HttpUrl(base, location);
                _model.addUnseenLink(url, base);
            } catch (MalformedURLException mue) {
                _logger.warning("Badly formed Location header : " + location);
            }
        } else {
            _logger.warning("302 received, but no Location header!");
        }
        return;
    }
    Object parsed = Parser.parse(base, response);
    if (parsed != null && parsed instanceof NodeList) { // the parsed content is HTML
        NodeList nodelist = (NodeList) parsed;
        processHtml(base, nodelist);
        // recurseHtmlNodes(nodelist, base);
    } // else maybe it is a parsed Flash document? Anyone? :-)
}
 
开发者ID:Neraud,项目名称:PADListener,代码行数:24,代码来源:Spider.java

示例9: getGangliaAttribute

import org.htmlparser.util.NodeList; //导入依赖的package包/类
public List<String> getGangliaAttribute(String clusterName)
		throws ParserException, MalformedURLException, IOException {
	String url = gangliaMetricUrl.replaceAll(clusterPattern, clusterName);
	Parser parser = new Parser(new URL(url).openConnection());
	NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
			new HasAttributeFilter("id", "metrics-picker"));
	NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
	SimpleNodeIterator iterator = nodeList.elements();
	List<String> metricList = new ArrayList<String>();
	while (iterator.hasMoreNodes()) {
		Node node = iterator.nextNode();

		SimpleNodeIterator childIterator = node.getChildren().elements();
		while (childIterator.hasMoreNodes()) {
			OptionTag children = (OptionTag) childIterator.nextNode();
			metricList.add(children.getOptionText());
		}
	}

	return metricList;

}
 
开发者ID:Ctrip-DI,项目名称:Hue-Ctrip-DI,代码行数:23,代码来源:GangliaHttpParser.java

示例10: main

import org.htmlparser.util.NodeList; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
	Parser parser = new Parser(new URL("http://10.8.75.3/ganglia/?r=hour&cs=&ce=&s=by+name&c=Zookeeper_Cluster&tab=m&vn=&hide-hf=false").openConnection());
	NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
			new HasAttributeFilter("id", "metrics-picker"));
	NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
	SimpleNodeIterator iterator = nodeList.elements();
	while (iterator.hasMoreNodes()) {
		Node node = iterator.nextNode();

		SimpleNodeIterator childIterator = node.getChildren().elements();
		while (childIterator.hasMoreNodes()) {
			OptionTag children = (OptionTag) childIterator.nextNode();
			System.out.println(children.getOptionText());
		}
	}

}
 
开发者ID:Ctrip-DI,项目名称:Hue-Ctrip-DI,代码行数:18,代码来源:TestGangliaHttpParser.java

示例11: splitHtml

import org.htmlparser.util.NodeList; //导入依赖的package包/类
private List<String> splitHtml() {
	List<String> resultList = new ArrayList<String>();
	try {
		Parser parser = Parser.createParser(content, "UTF-8");
		NodeList nodeList = parser.parse(null);
		resultList = recusiveSplitHtml(nodeList);
		StringBuffer lastPageContent = new StringBuffer();
		for (TagNode tagNode : tagNodeList) {
			if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
				lastPageContent.append("<");
				lastPageContent.append(tagNode.getText());
				lastPageContent.append(">");
			}
		}
		lastPageContent.append(content.substring(startPosition));
		Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
		NodeList pageContentNodeList = lastPageContentParser.parse(null);
		resultList.add(pageContentNodeList.toHtml());
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return resultList;
}
 
开发者ID:wangko27,项目名称:SelfSoftShop,代码行数:24,代码来源:Article.java

示例12: getHtmlRoot

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
 * Get HTML root element as node list, save script segments in given map
 * collection
 * 
 * @param html
 * @param scriptSegs
 *            script segments will be stored here, a script place holder
 *            will be saved in node list
 * @return
 */
public static NodeList getHtmlRoot(String html,
		Map<String, String> scriptSegs) {
	Matcher m = ptnScript.matcher(html);
	StringBuilder sbHtml = new StringBuilder();
	int idx = 0;
	while (m.find(idx)) {
		++SCRIPT_ID;
		sbHtml.append(html.substring(idx, m.start()));
		sbHtml.append(m.group(1));
		String id = String.format("___SCRIPT_%d_TPIRCS___", SCRIPT_ID);
		sbHtml.append(id);
		scriptSegs.put(id, m.group(2));
		sbHtml.append(m.group(3));
		idx = m.end();
	}
	sbHtml.append(html.substring(idx));
	html = sbHtml.toString();

	return getHtmlRoot(html);
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:31,代码来源:HtmlUtil.java

示例13: getValidHtml

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/**
 * Get valid HTML, auto close HTML tags if necessary
 * 
 * @param html
 * @return
 */
public static String getValidHtml(String html) {
	try {
		NodeList nodes = getHtmlRoot(html);
		StringBuilder newHtml = new StringBuilder();
		for (Node n : nodes.toNodeArray()) {
			if (n instanceof TagNode) {
				TagNode tn = (TagNode) n;
				if (tn.isEndTag())
					continue;
			}
			newHtml.append(n.toHtml());
		}
		return newHtml.toString();
	} catch (Exception e) {
		// do not through exceptions, just return input html
		return html;
	}
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:25,代码来源:HtmlUtil.java

示例14: run

import org.htmlparser.util.NodeList; //导入依赖的package包/类
@Override
public void run() {
	try {
		parser = new Parser(content);
		logger.info(currentThread().getName() + "开始解析Post请求响应的HTML!,并存储到HBASE中!");
		NodeIterator rootList = parser.elements();
		rootList.nextNode();
		NodeList nodeList = rootList.nextNode().getChildren();
		// System.out.println("===================="+nodeList.size());
		/*
		 * 判断该HTML响应是否有具体的内容,在出错或者到所有数据读取完毕时起效
		 * 如果起效,修改endFlag标志位,停止开启新的线程,结束当前任务!
		 */
		if (nodeList.size() <= 4) {
			program.endFlag = true;
		}
		/*
		 * 找到对应的tag记录,然后解析
		 */
		nodeList.remove(0);
		nodeList.remove(0);
		SimpleNodeIterator childList = nodeList.elements();
		while (childList.hasMoreNodes()) {
			Node node = childList.nextNode();
			if (node.getChildren() != null) {
				toObject(node);
			}
		}
	} catch (Exception e) {
		logger.error(currentThread().getName() + "解析HTML文件出现异常!\n"+e.getMessage()+"\n");
	} finally {
		logger.info(currentThread().getName() + "HTML文件解析结束!");
		store.close();
	}
}
 
开发者ID:husky00,项目名称:worm,代码行数:36,代码来源:PostRequestHtmlParser.java

示例15: parsePageInfo

import org.htmlparser.util.NodeList; //导入依赖的package包/类
/***
 * 解析小区的页数
 *
 * @param url
 * @return
 * @throws IOException
 * @throws ParserException
 */
private int parsePageInfo(final String url) throws IOException, ParserException {
    Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));

    NodeFilter nodeFilter = new HasAttributeFilter("class", "pagenumber");
    NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
    for (Node node : nodeList.toNodeArray()) {
        if (!(node instanceof Div)) {
            continue;
        }
        for (Node innerNode : node.getChildren().elementAt(1).getChildren().toNodeArray()) {
            if (!(innerNode instanceof TextNode)) {
                continue;
            }
            String pageStr = innerNode.toPlainTextString();
            if (!pageStr.contains("/")) {
                continue;
            }
            pageStr = pageStr.substring(pageStr.indexOf("/") + 1);
            try {
                return Integer.parseInt(pageStr);
            } catch (Exception e) {

            }
        }
    }
    return 0;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:36,代码来源:DepartmentParser.java


注:本文中的org.htmlparser.util.NodeList类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。