当前位置: 首页>>代码示例>>Java>>正文


Java NodeList.toNodeArray方法代码示例

本文整理汇总了Java中org.htmlparser.util.NodeList.toNodeArray方法的典型用法代码示例。如果您正苦于以下问题:Java NodeList.toNodeArray方法的具体用法?Java NodeList.toNodeArray怎么用?Java NodeList.toNodeArray使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.htmlparser.util.NodeList的用法示例。


在下文中一共展示了NodeList.toNodeArray方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parseSpan

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
 * 解析其他
 *
 * @param nodeList
 * @return
 */
private String parseSpan(NodeList nodeList) {
    StringBuilder sb = new StringBuilder();
    for (Node node : nodeList.toNodeArray()) {
        if (node instanceof Div) {
            if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
                return "0";
            }
            NodeList spanNodeList = node.getChildren();
            for (Node spanNode : spanNodeList.toNodeArray()) {
                if (spanNode instanceof Span) {
                    String attribute = ((Span) spanNode).getAttribute("class");
                    sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
                }
            }
        }
    }
    return sb.toString();
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:25,代码来源:HouseParser.java

示例2: getValidHtml

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
 * Get valid HTML, auto close HTML tags if necessary
 * 
 * @param html
 * @return
 */
public static String getValidHtml(String html) {
	try {
		NodeList nodes = getHtmlRoot(html);
		StringBuilder newHtml = new StringBuilder();
		for (Node n : nodes.toNodeArray()) {
			if (n instanceof TagNode) {
				TagNode tn = (TagNode) n;
				if (tn.isEndTag())
					continue;
			}
			newHtml.append(n.toHtml());
		}
		return newHtml.toString();
	} catch (Exception e) {
		// do not through exceptions, just return input html
		return html;
	}
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:25,代码来源:HtmlUtil.java

示例3: parsePageInfo

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
 * 解析小区的页数
 *
 * @param url
 * @return
 * @throws IOException
 * @throws ParserException
 */
private int parsePageInfo(final String url) throws IOException, ParserException {
    Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));

    NodeFilter nodeFilter = new HasAttributeFilter("class", "pagenumber");
    NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
    for (Node node : nodeList.toNodeArray()) {
        if (!(node instanceof Div)) {
            continue;
        }
        for (Node innerNode : node.getChildren().elementAt(1).getChildren().toNodeArray()) {
            if (!(innerNode instanceof TextNode)) {
                continue;
            }
            String pageStr = innerNode.toPlainTextString();
            if (!pageStr.contains("/")) {
                continue;
            }
            pageStr = pageStr.substring(pageStr.indexOf("/") + 1);
            try {
                return Integer.parseInt(pageStr);
            } catch (Exception e) {

            }
        }
    }
    return 0;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:36,代码来源:DepartmentParser.java

示例4: parseSpan

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
 * 解析价格
 *
 * @param node
 * @return
 */
private String parseSpan(Node node) {
    StringBuilder sb = new StringBuilder();
    NodeList priceNodeList = new NodeList();
    NodeFilter nodeFilter = new TagNameFilter("span");
    node.collectInto(priceNodeList, nodeFilter);
    for (Node spanNode : priceNodeList.toNodeArray()) {
        if (spanNode instanceof Span) {
            String attribute = ((Span) spanNode).getAttribute("class");
            sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
        }
    }
    return CharMatcher.WHITESPACE.removeFrom(sb.toString());
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:20,代码来源:DepartmentParser.java

示例5: run

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
 * 爬取透明网最近的预售证信息
 * @param url
 * @throws InterruptedException
 * @throws IOException
 * @throws ParserException
 */
public void run(String url) throws InterruptedException, IOException, ParserException {

    URLConnection urlConnection = CommonHttpURLConnection.getURLConnection(url);
    Parser parser = new Parser(urlConnection);
    NodeFilter nodeFilter = new HasAttributeFilter("class", "sale1");
    NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);

    if (nodeList.toNodeArray().length > 0) {
        Node[] sellCreditNodeArray = nodeList.elementAt(0).getChildren().toNodeArray();
        for (int i = 2; i < sellCreditNodeArray.length; i++) {
            if (sellCreditNodeArray[i] instanceof TableRow) {
                SellCreditInfo sellCreditInfo = parseSellParser(sellCreditNodeArray[i]);
                log.info("get sell credit info:{}", sellCreditInfo);
                //该预售证是否已经爬过
                HouseInfo houseInfo = dataOP.getHouseInfoByDepartmentNameAndSellCredit(sellCreditInfo);
                if(houseInfo != null){
                    log.info("already parsing sell credit:{}",sellCreditInfo);
                    break;
                }
                dataOP.insertSellCreditInfo(sellCreditInfo);
                if(i==2) continue;
                parseHouseInfo(sellCreditInfo);
            }
        }
    }
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:34,代码来源:SellCreditParser.java

示例6: parseSpan

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private String parseSpan(Node node) {
    StringBuilder sb = new StringBuilder();
    NodeList priceNodeList = new NodeList();
    NodeFilter nodeFilter = new TagNameFilter("span");
    node.collectInto(priceNodeList, nodeFilter);
    for (Node spanNode : priceNodeList.toNodeArray()) {
        if (spanNode instanceof Span) {
            String attribute = ((Span) spanNode).getAttribute("class");
            sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
        }
    }
    return sb.toString();
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:14,代码来源:DailyDealParser.java

示例7: parseDailyBriefInfo

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public List<DailyBriefInfo> parseDailyBriefInfo() throws IOException, ParserException {

        Parser parser = new Parser(CommonHttpURLConnection.getURLConnection("http://www.tmsf.com/index.jsp"));
        NodeFilter nodeFilter = new HasAttributeFilter("id", "myCont5");
        NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
        if (nodeList.toNodeArray().length == 0) {
            return Collections.EMPTY_LIST;
        }

        List<DailyBriefInfo> dailyBriefInfoList = new ArrayList<>();

        //到1970/01/01 00:00:00的小时数
        int parseHour = (int) (Clock.systemUTC().millis() / (1000 * 3600));

        //到1970/01/01 00:00:00的天数
        int parseDay = (int) parseHour / 24;

        NodeList infoNodeList = nodeList.elementAt(0).getChildren().elementAt(1)
                .getChildren().elementAt(1).getChildren();

        for (int i = 5; i <= 13; i = i + 2) {
            DailyBriefInfo dailyBriefInfo = new DailyBriefInfo(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(1).toPlainTextString()),
                    Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(3).toPlainTextString())),
                    Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(5).toPlainTextString())),
                    Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(7).toPlainTextString())),
                    parseDay,parseHour);

            dailyBriefInfoList.add(dailyBriefInfo);
            dataOP.insertBriefDealInfo(dailyBriefInfo);

            ESOP.writeToES("log/daily_brief_info_es", JSONObject.toJSONString(dailyBriefInfo));
        }

        return dailyBriefInfoList;

    }
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:37,代码来源:DailyDealParser.java

示例8: parseLinkTag

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
 * 解析楼幢数
 *
 * @param nodeList
 * @return
 */
private String parseLinkTag(NodeList nodeList) {
    for (Node node : nodeList.toNodeArray()) {
        if (node instanceof LinkTag) {
            return node.toPlainTextString();
        }
    }
    return StringUtils.EMPTY;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:15,代码来源:HouseParser.java

示例9: parseDiv

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
 * 解析房号、状态
 *
 * @param nodeList
 * @return
 */
private String parseDiv(NodeList nodeList) {
    for (Node node : nodeList.toNodeArray()) {
        if (node instanceof Div) {
            return node.toPlainTextString();
        }
    }
    return StringUtils.EMPTY;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:15,代码来源:HouseParser.java

示例10: readTextAndLinkAndTitle

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
 * 分别读纯文本和链接.
 * @param result 网页的内容
 * @throws Exception
 */
public static void readTextAndLinkAndTitle(String result) throws Exception {
    Parser parser;
    NodeList nodelist;
    parser = Parser.createParser(result, "utf8");

    NodeFilter textFilter = new NodeClassFilter(TextNode.class);
    NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
    NodeFilter titleFilter = new NodeClassFilter(TitleTag.class);
    OrFilter lastFilter = new OrFilter();
    lastFilter.setPredicates(new NodeFilter[] { textFilter, linkFilter, titleFilter });
    nodelist = parser.parse(lastFilter);
    Node[] nodes = nodelist.toNodeArray();
    String line = "";
    
    for (int i = 0; i < nodes.length; i++) {
        Node node = nodes[i];
        if (node instanceof TextNode) {
            TextNode textnode = (TextNode) node;
            line = textnode.getText();
        } else if (node instanceof LinkTag) {
            LinkTag link = (LinkTag) node;
            line = link.getLink();
        } else if (node instanceof TitleTag) {
            TitleTag titlenode = (TitleTag) node;
            line = titlenode.getTitle();
        }
        
        if (isTrimEmpty(line))
            continue;
        System.out.println(line);
    }
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:38,代码来源:HtmlParserTest.java

示例11: getNodes

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private static void getNodes(NodeList list, String tagName,
		String attribute, String value, List<Node> nodes) throws Exception {
	if (list == null)
		return;
	for (Node n : list.toNodeArray()) {
		boolean added = false;
		if (n instanceof TagNode) {
			TagNode node = (TagNode) n;
			if (node.getTagName().equalsIgnoreCase(tagName)) {
				if (attribute == null || "".equals(attribute)) {
					nodes.add(node);
					added = true;
				} else {
					String avs = node.getAttribute(attribute);
					if (avs == null)
						continue;
					for (String v : avs.split("\\s+")) {
						if (v.equalsIgnoreCase(value)) {
							nodes.add(node);
							added = true;
							break;
						}
					}
				}
			}
		}
		if (!added)
			getNodes(n.getChildren(), tagName, attribute, value, nodes);
	}
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:31,代码来源:HtmlUtil.java

示例12: getScorers

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public ArrayList<FootballEvent> getScorers(String link) {
    Parser parser = new Parser();
    ArrayList<FootballEvent> events = new ArrayList<FootballEvent>();
    try {
        NodeFilter tagNameFilter = new TagNameFilter("table");
        parser.setResource(link);
        NodeList nl = parser.parse(tagNameFilter);
        Node node = nl.toNodeArray()[0];
        Node[] nodes = node.getChildren().toNodeArray();
        Node[] tempnodes;
        Node[] innestTemp;
        Tag t;
        String string;
        String matchtime = "";
        String score = "";
        String playername = "";
        String imglink = "";
        for (int i = 4; i < nodes.length; i++) {
            if (nodes[i].getChildren() == null) {
                continue;
            }
            tempnodes = nodes[i].getChildren().toNodeArray();
            for (int j = 0; j < tempnodes.length; j++) {
                if (tempnodes[j].getChildren() == null) {
                    continue;
                }
                innestTemp = tempnodes[j].getChildren().toNodeArray();
                for (int k = 0; k < innestTemp.length; k++) {
                    if (innestTemp[k] instanceof TextNode) {
                        string = innestTemp[k].getText();
                        if (string.contains("'")) {
                            matchtime = string;
                        } else if (string.contains("[")) {
                            score = string;
                        } else {
                            playername = string;
                        }
                    } else if (innestTemp[k] instanceof Tag && ((Tag) innestTemp[k]).getAttribute("src") != null) {
                        t = (Tag) innestTemp[k];
                        imglink = t.getAttribute("src");
                        events.add(new FootballEvent(matchtime, score, playername, imglink));
                        score = "";
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return events;
}
 
开发者ID:reggna,项目名称:silvertrout,代码行数:52,代码来源:LiveScoreParser.java

示例13: getGames

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public ArrayList<FootballGame> getGames() {
    Parser parser = new Parser();
    ArrayList<FootballGame> games = new ArrayList<FootballGame>();
    try {
        NodeFilter tagNameFilter = new TagNameFilter("table");
        HasAttributeFilter attrFilter = new HasAttributeFilter("bgcolor", "#666666");
        parser.setResource("http://livescores.com/");
        NodeList nl = parser.parse(tagNameFilter);
        nl = nl.extractAllNodesThatMatch(attrFilter);
        attrFilter = new HasAttributeFilter("width", "331");
        nl = nl.extractAllNodesThatMatch(attrFilter);
        Node node = nl.remove(0);
        nl = node.getChildren();
        Node[] nodes = nl.toNodeArray();
        Tag tag;
        String country = "";
        String league = "";
        String hometeam = "";
        String awayteam = "";
        String gametime = "";
        String link = "";
        String result = "";
        for (int i = 0; i < nodes.length; i++) {
            if (nodes[i] instanceof Tag) {
                tag = (Tag) nodes[i];
                String str = tag.getAttribute("bgcolor");
                if (str != null) {
                    //if(str.contains("11111"))
                    //NEW LEAGUE!
                    //    ;
                    if (str.contains("3333")) {
                        tag = (Tag) tag.getFirstChild();
                        str = tag.getAttribute("class");
                        if (str != null && str.contains("title")) {
                            country = tag.getChildren().toNodeArray()[2].getText();
                            league = tag.getChildren().toNodeArray()[4].getText();
                        }
                    } else if (str.contains("f")) {
                        Node[] tempnodes = tag.getChildren().toNodeArray();
                        String[] t = tempnodes[0].getFirstChild().getText().split(";");
                        if (t.length > 1)
                            gametime = t[1];
                        else
                            gametime = tempnodes[0].getFirstChild().getNextSibling().getNextSibling().getText();
                        hometeam = tempnodes[1].getFirstChild().getText();
                        awayteam = tempnodes[3].getFirstChild().getText();
                        //RESULTAT
                        if (tempnodes[2].getFirstChild().getFirstChild() != null) {
                            //MED LÄNK
                            result = tempnodes[2].getFirstChild().getFirstChild().getText();
                            link = ((LinkTag) (tempnodes[2].getFirstChild())).extractLink();
                        } else {
                            //UTAN LÄNK
                            result = tempnodes[2].getFirstChild().getText();
                            link = null;
                        }
                        ArrayList<FootballEvent> ev = new ArrayList<FootballEvent>();
                        if (link != null) {
                            ev = getScorers(link);
                        }
                        games.add(new FootballGame(country, league, hometeam, awayteam, gametime, ev, result));
                    }
                }
            }
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return games;
}
 
开发者ID:reggna,项目名称:silvertrout,代码行数:71,代码来源:LiveScoreParser.java

示例14: preencheMapaFeriadosEstaduais

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private void preencheMapaFeriadosEstaduais() throws IOException, ParserException,ParseException {
    String estadosPage =  recuperarDadosEstado();
    StringBuilder stringBuilder = new StringBuilder(estadosPage);
    stringBuilder.delete(0,estadosPage.indexOf("<h3"));
    NodeList nodeEstadoList = filterTable(stringBuilder.toString());
    String todosMeses[] = {"janeiro", "fevereiro", "março", "abril", "maio", "junho", "julho", "agosto", "setembro", "outubro", "novembro", "dezembro"};
    Map<String,String> mapaMeses = new HashMap<String,String>();
    int i = 1;
    for (String mes:todosMeses){
        String valor = String.valueOf(i++);
        if(valor.length()< 2){
            valor ="0"+valor;
        }
        mapaMeses.put(mes,valor);
    }

    String estado = null;
    for (Node node:nodeEstadoList.toNodeArray()){
        if(node instanceof TableTag){
            NodeList lista = ((TableTag) node).searchFor(TableColumn.class, true);
            SimpleNodeIterator iterator  = lista.elements();
            while (iterator.hasMoreNodes()){
                Feriado feriado = new Feriado();
                Node data = iterator.nextNode();
                String[] dataExtenso = data.toPlainTextString().split(" de ");
                feriado.setData(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015");
                Node nome = iterator.nextNode();
                feriado.setNome(nome.toPlainTextString());
                Node lei = iterator.nextNode();
                if(dataExtenso[0].length()==1){
                    dataExtenso[0] = "0"+dataExtenso[0];
                }
                System.out.println(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015,"+nome.toPlainTextString()+","+mapaEstados.get(estado));
                mapaFeriadosEstado.get(estado).add(feriado);
            }

        }
        if(node instanceof HeadingTag){
            estado =  node.getChildren().toHtml().trim();
            if(node.getChildren().elementAt(0).getChildren() != null){
                estado =  node.getChildren().elementAt(0).getChildren().toHtml().trim();
            }
            mapaFeriadosEstado.put(estado,new ArrayList<Feriado>());
        }

    }
}
 
开发者ID:emivaljr,项目名称:hojenaoapp,代码行数:48,代码来源:MyEndpoint.java

示例15: getInnerText

import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
 * Get inner text of certain HTML string, given node filter tagname,
 * attribute, attr value, replace inner text if necessary
 * 
 * @param html
 * @param filterNodeFormat
 *            { tagname, attribute, attr value, inside replacement match,
 *            inside replacement}
 * @param leaveBlank
 *            keep blank characters
 * @return
 */
public static String getInnerText(String html, String[][] filterNodeFormat,
		boolean leaveBlank) {
	NodeList nl = HtmlUtil.getHtmlRoot(String.format("<htm>%s</htm>",
			HtmlUtil.getValidHtml(HtmlUtil.filterOutScriptSegment(html))));
	StringBuilder txt = new StringBuilder();
	for (Node n : nl.toNodeArray()) {
		txt.append(getInnerText(n, filterNodeFormat, leaveBlank));
	}
	return txt.toString();
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:23,代码来源:HtmlUtil.java


注:本文中的org.htmlparser.util.NodeList.toNodeArray方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。