本文整理汇总了Java中org.htmlparser.util.NodeList.elements方法的典型用法代码示例。如果您正苦于以下问题:Java NodeList.elements方法的具体用法?Java NodeList.elements怎么用?Java NodeList.elements使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlparser.util.NodeList
的用法示例。
在下文中一共展示了NodeList.elements方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: dealTag
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* 处理标签
* @param tag 所要处理的标签
*/
private void dealTag(Node tag) {
NodeList list = tag.getChildren();
if(list != null) {
NodeIterator nit = list.elements();
try {
while(nit.hasMoreNodes()) {
Node node = nit.nextNode();
parserNode(node); // 递归调用分析结点
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
示例2: processNodeList
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
// 迭代开始
SimpleNodeIterator iterator = list.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
// 得到该节点的子节点列表
NodeList childList = node.getChildren();
// 孩子节点为空,说明是值节点
if (null == childList) {
// 得到值节点的值
String result = node.toPlainTextString();
// 若包含关键字,则简单打印出来文本
if (result.indexOf(keyword) != -1)
System.out.println(result);
} // end if
// 孩子节点不为空,继续迭代该孩子节点
else {
processNodeList(childList, keyword);
}// end else
}// end wile
}
示例3: getGangliaAttribute
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public List<String> getGangliaAttribute(String clusterName)
throws ParserException, MalformedURLException, IOException {
String url = gangliaMetricUrl.replaceAll(clusterPattern, clusterName);
Parser parser = new Parser(new URL(url).openConnection());
NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
new HasAttributeFilter("id", "metrics-picker"));
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
SimpleNodeIterator iterator = nodeList.elements();
List<String> metricList = new ArrayList<String>();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
SimpleNodeIterator childIterator = node.getChildren().elements();
while (childIterator.hasMoreNodes()) {
OptionTag children = (OptionTag) childIterator.nextNode();
metricList.add(children.getOptionText());
}
}
return metricList;
}
示例4: main
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Parser parser = new Parser(new URL("http://10.8.75.3/ganglia/?r=hour&cs=&ce=&s=by+name&c=Zookeeper_Cluster&tab=m&vn=&hide-hf=false").openConnection());
NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
new HasAttributeFilter("id", "metrics-picker"));
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
SimpleNodeIterator iterator = nodeList.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
SimpleNodeIterator childIterator = node.getChildren().elements();
while (childIterator.hasMoreNodes()) {
OptionTag children = (OptionTag) childIterator.nextNode();
System.out.println(children.getOptionText());
}
}
}
示例5: run
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@Override
public void run() {
try {
parser = new Parser(content);
logger.info(currentThread().getName() + "开始解析Post请求响应的HTML!,并存储到HBASE中!");
NodeIterator rootList = parser.elements();
rootList.nextNode();
NodeList nodeList = rootList.nextNode().getChildren();
// System.out.println("===================="+nodeList.size());
/*
* 判断该HTML响应是否有具体的内容,在出错或者到所有数据读取完毕时起效
* 如果起效,修改endFlag标志位,停止开启新的线程,结束当前任务!
*/
if (nodeList.size() <= 4) {
program.endFlag = true;
}
/*
* 找到对应的tag记录,然后解析
*/
nodeList.remove(0);
nodeList.remove(0);
SimpleNodeIterator childList = nodeList.elements();
while (childList.hasMoreNodes()) {
Node node = childList.nextNode();
if (node.getChildren() != null) {
toObject(node);
}
}
} catch (Exception e) {
logger.error(currentThread().getName() + "解析HTML文件出现异常!\n"+e.getMessage()+"\n");
} finally {
logger.info(currentThread().getName() + "HTML文件解析结束!");
store.close();
}
}
示例6: main
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public static void main(String[] args) {
try {
URL url = new URL(pro.getProperty("mlink"));
SocketAddress address = new InetSocketAddress(pro.getProperty("host"), Integer.parseInt(pro.getProperty("port")));
Proxy proxy = new Proxy(Proxy.Type.HTTP, address);
URLConnection conn = url.openConnection(proxy);
Authenticator.setDefault(new MyAuthenticator(pro.getProperty("username"), pro.getProperty("password")));
conn.setConnectTimeout(Integer.parseInt(pro.getProperty("timeout")));
Parser parser = new Parser(conn);
NodeList nodeList = parser.parse(new TagNameFilter("A"));
System.out.println(nodeList.size());
for (SimpleNodeIterator it = nodeList.elements(); it.hasMoreNodes(); ) {
TagNode node = (TagNode) it.nextNode();
String href = node.getAttribute("href");
String dhref = URLDecoder.decode(href, "UTF-8");
if (CommonHelper.checkIsAlink(dhref)) {
System.out.println(dhref);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
示例7: processResponse
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private boolean processResponse(HttpResponse resp, Document doc, Element root) {
if(resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
System.out.println("[INFO] HTTP Status OK.");
System.out.println("[INFO] Extracting html page...");
String html = extractHtml(resp);
if(html == null) return false;
System.out.println("[INFO] " + html.length() + "B html page extracted.");
if(html.length() < 500) {
System.out.println("[INFO] EOF reached, task completed.");
return false;
} else {
System.out.println("[INFO] Parsing html page...");
try {
Parser parser = new Parser(html);
NodeList weibo_list = parser.extractAllNodesThatMatch(
new HasAttributeFilter("action-type", "feed_list_item"));
System.out.println("[INFO] " + weibo_list.size() + " entries detected.");
SimpleNodeIterator iter = weibo_list.elements();
while(iter.hasMoreNodes()) {
System.out.println("[INFO] processing entry #" + (++total) + "...");
Element elem = extractContent(iter.nextNode(), doc);
if(elem == null) {
System.out.println("[ERROR] Data extraction failed.");
return false;
}
root.appendChild(elem);
}
if(weibo_list.size() != 15) return false;
} catch (ParserException e) {
System.out.println("[ERROR] Parser failed.");
e.printStackTrace();
return false;
}
}
} else {
return false;
}
return true;
}
示例8: parserNode
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* 对结点进行词法分析
* @param node 所要分析的结点
*/
private void parserNode(Node node) {
depth ++;
String regex = "[ \b\t\n\f\r]*";
if(node instanceof TextNode) { // 若为文本结点,则进行分词
if(depth == 1) {
System.out.println("TextNode!");
Lexer lexer = new Lexer(node.getPage());
Parser parser = new Parser(lexer, Parser.STDOUT);
//TODO filter script & style
OrFilter it = new OrFilter(new NotFilter(new TagNameFilter("script ")), new NotFilter(new TagNameFilter("style ")));
try {
NodeList nl = parser.extractAllNodesThatMatch(it);
NodeIterator nit = nl.elements();
while(nit.hasMoreNodes()) {
Node n = nit.nextNode();
if(n instanceof TextNode) {
if(!(n.getText().matches(regex))) { // 用正则表达式进行匹配,对非空的文本进行分词
segment(n.getText()); // 对网页中的文本进行分词
}
}
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
else if(node instanceof TagNode) { // 若为链接结点,则扩展外链
if(node instanceof LinkTag) {
LinkTag tag = (LinkTag)node;
if(!(tag.getLink().matches(regex))) {
urlInfo.addExtendedURL(tag.getLink()); // 将得到的外链加入到urlInfo中
}
}
dealTag(node);
}
depth --;
}
示例9: preencheMapaFeriadosEstaduais
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private void preencheMapaFeriadosEstaduais() throws IOException, ParserException,ParseException {
String estadosPage = recuperarDadosEstado();
StringBuilder stringBuilder = new StringBuilder(estadosPage);
stringBuilder.delete(0,estadosPage.indexOf("<h3"));
NodeList nodeEstadoList = filterTable(stringBuilder.toString());
String todosMeses[] = {"janeiro", "fevereiro", "março", "abril", "maio", "junho", "julho", "agosto", "setembro", "outubro", "novembro", "dezembro"};
Map<String,String> mapaMeses = new HashMap<String,String>();
int i = 1;
for (String mes:todosMeses){
String valor = String.valueOf(i++);
if(valor.length()< 2){
valor ="0"+valor;
}
mapaMeses.put(mes,valor);
}
String estado = null;
for (Node node:nodeEstadoList.toNodeArray()){
if(node instanceof TableTag){
NodeList lista = ((TableTag) node).searchFor(TableColumn.class, true);
SimpleNodeIterator iterator = lista.elements();
while (iterator.hasMoreNodes()){
Feriado feriado = new Feriado();
Node data = iterator.nextNode();
String[] dataExtenso = data.toPlainTextString().split(" de ");
feriado.setData(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015");
Node nome = iterator.nextNode();
feriado.setNome(nome.toPlainTextString());
Node lei = iterator.nextNode();
if(dataExtenso[0].length()==1){
dataExtenso[0] = "0"+dataExtenso[0];
}
System.out.println(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015,"+nome.toPlainTextString()+","+mapaEstados.get(estado));
mapaFeriadosEstado.get(estado).add(feriado);
}
}
if(node instanceof HeadingTag){
estado = node.getChildren().toHtml().trim();
if(node.getChildren().elementAt(0).getChildren() != null){
estado = node.getChildren().elementAt(0).getChildren().toHtml().trim();
}
mapaFeriadosEstado.put(estado,new ArrayList<Feriado>());
}
}
}
示例10: filter
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private AX2JClassTranslator filter(String content) {
try {
Parser parser = Parser.createParser(content, Config.ENCODE);
AndFilter andFilter1 =
new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class","alt-color api apilevel-"));
AndFilter andFilter2 =
//kill me, the " api apilevel-" has a space at the start
new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class"," api apilevel-"));
OrFilter orFilter = new OrFilter(andFilter1, andFilter2);
NodeList tableNodeList = parser.parse(orFilter);
NodeIterator tableIt = tableNodeList.elements();
AX2JClassTranslator map = new AX2JClassTranslator(type);
while(tableIt.hasMoreNodes()) {
Node trNode = tableIt.nextNode();
NodeList trNodeList = trNode.getChildren();
/**
* ***** trNodeList example *****
* Txt (268[6,37],269[7,0]): \nTag (269[7,0],292[7,23]): td class="jd-linkcol"
* Tag (292[7,23],381[7,112]): a href="../../../reference/android/view/View.html...
* Txt (381[7,112],412[7,143]): android:accessibilityLiveRegion
* End (412[7,143],416[7,147]): /a
* End (416[7,147],421[7,152]): /td
* Txt (421[7,152],422[8,0]): \nTag (422[8,0],445[8,23]): td class="jd-linkcol"
* Txt (445[8,23],446[9,0]): \n
* Tag (446[9,0],530[9,84]): a href="../../../reference/android/view/View.html#s...
* Txt (530[9,84],561[9,115]): setAccessibilityLiveRegion(int)
* End (561[9,115],565[9,119]): /a
* Txt (565[9,119],566[10,0]): \n
* End (566[10,0],571[10,5]): /td
* Txt (571[10,5],572[11,0]): \nTag (572[11,0],609[11,37]): td class="jd-descrcol" width="100%"
* Txt (609[11,37],712[14,0]): \nIndicates to accessibility services whether the...
* End (712[14,0],717[14,5]): /td
* Txt (717[14,5],718[15,0]): \n
* ***** trNodeList example *****
*/
if (trNodeList.size() != 7) {
throw new AndroidDocException(AndroidDocException.ATM_FORMAT_ERROR);
}
String attr = trNodeList.elementAt(1).toPlainTextString();
attr = attr.replace("\n", "");
String method = trNodeList.elementAt(3).toPlainTextString();
map.add(attr, method);
}
return map;
} catch (ParserException e) {
throw new AndroidDocException(AndroidDocException.AXML_FORMAT_ERROR);
}
}
示例11: list
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "sdlist", results = { @Result(type = "json", params = {
"root", "list" }) })
public String list() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey =domain+listid + page;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(domain+"/"+listid+"/list"
+ page+".htm");
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new AttributeRegexFilter(
"href", ".*/page\\.htm"));
SimpleNodeIterator i = ls.elements();
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
News news = new News();
String href = tn.getAttribute("href");
news.setId(href);
news.setTitle(tn.getAttribute("alt"));
Node tmp=tn.getParent().getNextSibling();
while(tmp!=null &&!(tmp instanceof TableColumn))
tmp=tmp.getNextSibling();
if(tmp!=null)
news.setPubdate(tmp.toPlainTextString());
list.add(news);
}
}
c.put(new Element(ckey, list));
} catch (ParserException e) {
e.printStackTrace();
}
}
jsonp(list);
return NONE;
}
示例12: list
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "newslist", results = { @Result(type = "json", params = {
"root", "list" }) })
public String list() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "newslist"+listid + page;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/news/"+listid+"/"+page+".html");
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new HasAttributeFilter("class","date"));
SimpleNodeIterator i = ls.elements();
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
News news = new News();
news.setPubdate(tn.toPlainTextString());
Node tmp=tn.getNextSibling();
while(tmp!=null &&!(tmp instanceof LinkTag))
tmp=tmp.getNextSibling();
if(tmp!=null)
{
LinkTag link=(LinkTag)tmp;
news.setId(link.getAttribute("href"));
news.setTitle(link.getAttribute("title"));
}
list.add(news);
}
}
c.put(new Element(ckey, list));
} catch (ParserException e) {
e.printStackTrace();
}
}
return SUCCESS;
}
示例13: list
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Action(value = "eventlist")
public String list() throws IOException {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "eventlist"+page ;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/calendar/?a=list&&m=recent&range=30&_="+System.currentTimeMillis()+"&type=0&place=0&type="+page );
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new HasAttributeFilter("class","clear"));
if(ls.size()==2)
{
int tk1=ls.elementAt(0).getEndPosition();
int tk2=ls.elementAt(1).getStartPosition();
ServletActionContext.getResponse().setCharacterEncoding("utf-8");
p=Parser.createParser(retstr.substring(tk1+6, tk2), "utf-8");
NodeList nl=p.parse(null);
NodeList links=nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class),true);
SimpleNodeIterator i=links.elements();
while(i.hasMoreNodes())
{
LinkTag lt=(LinkTag)i.nextNode();
NodeList ll=new NodeList();
ll.add(new TextNode(lt.getAttribute("title")));
lt.setChildren(ll);
lt.removeAttribute("title");
}
ServletActionContext.getResponse().getWriter().print(nl.toHtml());
}
} catch (ParserException e) {
e.printStackTrace();
}
}
return NONE;
}
示例14: content
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@Action(value = "eventcontent", results = { @Result(type = "json", params = {
"root", "en" }) })
public String content() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "eventcontent" + newsid;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
en = (News) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/calendar/?a=one&evid="
+ newsid+"&_="+System.currentTimeMillis());
Parser p = Parser.createParser(retstr.toString(), "utf-8");
try {
NodeList nl = p.extractAllNodesThatMatch(new OrFilter(
new TagNameFilter("h1"), new TagNameFilter("table")));
SimpleNodeIterator i = nl.elements();
en = new News();
en.setId(newsid);
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
if (tn.getTagName().equalsIgnoreCase("h1"))
en.setTitle(tn.toPlainTextString());
if (tn.getTagName().equalsIgnoreCase("table")) {
en.setContent(tn.toHtml());
}
}
}
String str=retstr.toString().trim();
int tk=retstr.indexOf("imageurl");
if(tk>0)
{
tk=retstr.indexOf("'",tk);
int tk1=retstr.indexOf("'", tk+1);
String imgurl=RD+str.substring(tk+1,tk1);
String imgid = EncodeHelper.digest(
imgurl, "MD5");
BasicDBObject obj = new BasicDBObject("id",
imgid);
DBCollection col = MongoUtil.getInstance().getDB()
.getCollection("CrawlerImages");
DBObject dbo = col.findOne(obj);
if (dbo == null)
col.save(obj.append("url",imgurl));
en.setPubdate(imgid);
}
} catch (ParserException e) {
e.printStackTrace();
}
if (!CommonUtil.isEmpty(en) && !CommonUtil.isEmpty(en.getContent()))
c.put(new Element(ckey, en));
}
return SUCCESS;
}
示例15: list
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "calist", results = { @Result(type = "json", params = {
"root", "list" }) })
public String list() {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "calist" + page;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/announce/announce_list.php?page="
+ page);
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new AttributeRegexFilter(
"href", "announce/\\?announceid=\\d+"));
SimpleNodeIterator i = ls.elements();
while (i.hasMoreNodes()) {
Node n = i.nextNode();
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
News news = new News();
String href = tn.getAttribute("href");
int tk = href.indexOf("=");
if (tk > 0)
news.setId(href.substring(tk + 1));
news.setTitle(tn.toPlainTextString());
list.add(news);
}
}
c.put(new Element(ckey, list));
} catch (ParserException e) {
e.printStackTrace();
}
}
return SUCCESS;
}