本文整理汇总了Java中org.htmlparser.util.NodeList.toNodeArray方法的典型用法代码示例。如果您正苦于以下问题:Java NodeList.toNodeArray方法的具体用法?Java NodeList.toNodeArray怎么用?Java NodeList.toNodeArray使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlparser.util.NodeList
的用法示例。
在下文中一共展示了NodeList.toNodeArray方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parseSpan
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
* 解析其他
*
* @param nodeList
* @return
*/
private String parseSpan(NodeList nodeList) {
StringBuilder sb = new StringBuilder();
for (Node node : nodeList.toNodeArray()) {
if (node instanceof Div) {
if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
return "0";
}
NodeList spanNodeList = node.getChildren();
for (Node spanNode : spanNodeList.toNodeArray()) {
if (spanNode instanceof Span) {
String attribute = ((Span) spanNode).getAttribute("class");
sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
}
}
}
}
return sb.toString();
}
示例2: getValidHtml
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* Get valid HTML, auto close HTML tags if necessary
*
* @param html
* @return
*/
public static String getValidHtml(String html) {
try {
NodeList nodes = getHtmlRoot(html);
StringBuilder newHtml = new StringBuilder();
for (Node n : nodes.toNodeArray()) {
if (n instanceof TagNode) {
TagNode tn = (TagNode) n;
if (tn.isEndTag())
continue;
}
newHtml.append(n.toHtml());
}
return newHtml.toString();
} catch (Exception e) {
// do not through exceptions, just return input html
return html;
}
}
示例3: parsePageInfo
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
* 解析小区的页数
*
* @param url
* @return
* @throws IOException
* @throws ParserException
*/
private int parsePageInfo(final String url) throws IOException, ParserException {
Parser parser = new Parser(CommonHttpURLConnection.getURLConnection(url));
NodeFilter nodeFilter = new HasAttributeFilter("class", "pagenumber");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
for (Node node : nodeList.toNodeArray()) {
if (!(node instanceof Div)) {
continue;
}
for (Node innerNode : node.getChildren().elementAt(1).getChildren().toNodeArray()) {
if (!(innerNode instanceof TextNode)) {
continue;
}
String pageStr = innerNode.toPlainTextString();
if (!pageStr.contains("/")) {
continue;
}
pageStr = pageStr.substring(pageStr.indexOf("/") + 1);
try {
return Integer.parseInt(pageStr);
} catch (Exception e) {
}
}
}
return 0;
}
示例4: parseSpan
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
* 解析价格
*
* @param node
* @return
*/
private String parseSpan(Node node) {
StringBuilder sb = new StringBuilder();
NodeList priceNodeList = new NodeList();
NodeFilter nodeFilter = new TagNameFilter("span");
node.collectInto(priceNodeList, nodeFilter);
for (Node spanNode : priceNodeList.toNodeArray()) {
if (spanNode instanceof Span) {
String attribute = ((Span) spanNode).getAttribute("class");
sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
}
}
return CharMatcher.WHITESPACE.removeFrom(sb.toString());
}
示例5: run
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/***
* 爬取透明网最近的预售证信息
* @param url
* @throws InterruptedException
* @throws IOException
* @throws ParserException
*/
public void run(String url) throws InterruptedException, IOException, ParserException {
URLConnection urlConnection = CommonHttpURLConnection.getURLConnection(url);
Parser parser = new Parser(urlConnection);
NodeFilter nodeFilter = new HasAttributeFilter("class", "sale1");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
if (nodeList.toNodeArray().length > 0) {
Node[] sellCreditNodeArray = nodeList.elementAt(0).getChildren().toNodeArray();
for (int i = 2; i < sellCreditNodeArray.length; i++) {
if (sellCreditNodeArray[i] instanceof TableRow) {
SellCreditInfo sellCreditInfo = parseSellParser(sellCreditNodeArray[i]);
log.info("get sell credit info:{}", sellCreditInfo);
//该预售证是否已经爬过
HouseInfo houseInfo = dataOP.getHouseInfoByDepartmentNameAndSellCredit(sellCreditInfo);
if(houseInfo != null){
log.info("already parsing sell credit:{}",sellCreditInfo);
break;
}
dataOP.insertSellCreditInfo(sellCreditInfo);
if(i==2) continue;
parseHouseInfo(sellCreditInfo);
}
}
}
}
示例6: parseSpan
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private String parseSpan(Node node) {
StringBuilder sb = new StringBuilder();
NodeList priceNodeList = new NodeList();
NodeFilter nodeFilter = new TagNameFilter("span");
node.collectInto(priceNodeList, nodeFilter);
for (Node spanNode : priceNodeList.toNodeArray()) {
if (spanNode instanceof Span) {
String attribute = ((Span) spanNode).getAttribute("class");
sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
}
}
return sb.toString();
}
示例7: parseDailyBriefInfo
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public List<DailyBriefInfo> parseDailyBriefInfo() throws IOException, ParserException {
Parser parser = new Parser(CommonHttpURLConnection.getURLConnection("http://www.tmsf.com/index.jsp"));
NodeFilter nodeFilter = new HasAttributeFilter("id", "myCont5");
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
if (nodeList.toNodeArray().length == 0) {
return Collections.EMPTY_LIST;
}
List<DailyBriefInfo> dailyBriefInfoList = new ArrayList<>();
//到1970/01/01 00:00:00的小时数
int parseHour = (int) (Clock.systemUTC().millis() / (1000 * 3600));
//到1970/01/01 00:00:00的天数
int parseDay = (int) parseHour / 24;
NodeList infoNodeList = nodeList.elementAt(0).getChildren().elementAt(1)
.getChildren().elementAt(1).getChildren();
for (int i = 5; i <= 13; i = i + 2) {
DailyBriefInfo dailyBriefInfo = new DailyBriefInfo(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(1).toPlainTextString()),
Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(3).toPlainTextString())),
Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(5).toPlainTextString())),
Integer.parseInt(CharMatcher.WHITESPACE.trimFrom(infoNodeList.elementAt(i).getChildren().elementAt(7).toPlainTextString())),
parseDay,parseHour);
dailyBriefInfoList.add(dailyBriefInfo);
dataOP.insertBriefDealInfo(dailyBriefInfo);
ESOP.writeToES("log/daily_brief_info_es", JSONObject.toJSONString(dailyBriefInfo));
}
return dailyBriefInfoList;
}
示例8: parseLinkTag
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* 解析楼幢数
*
* @param nodeList
* @return
*/
private String parseLinkTag(NodeList nodeList) {
for (Node node : nodeList.toNodeArray()) {
if (node instanceof LinkTag) {
return node.toPlainTextString();
}
}
return StringUtils.EMPTY;
}
示例9: parseDiv
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* 解析房号、状态
*
* @param nodeList
* @return
*/
private String parseDiv(NodeList nodeList) {
for (Node node : nodeList.toNodeArray()) {
if (node instanceof Div) {
return node.toPlainTextString();
}
}
return StringUtils.EMPTY;
}
示例10: readTextAndLinkAndTitle
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* 分别读纯文本和链接.
* @param result 网页的内容
* @throws Exception
*/
public static void readTextAndLinkAndTitle(String result) throws Exception {
Parser parser;
NodeList nodelist;
parser = Parser.createParser(result, "utf8");
NodeFilter textFilter = new NodeClassFilter(TextNode.class);
NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
NodeFilter titleFilter = new NodeClassFilter(TitleTag.class);
OrFilter lastFilter = new OrFilter();
lastFilter.setPredicates(new NodeFilter[] { textFilter, linkFilter, titleFilter });
nodelist = parser.parse(lastFilter);
Node[] nodes = nodelist.toNodeArray();
String line = "";
for (int i = 0; i < nodes.length; i++) {
Node node = nodes[i];
if (node instanceof TextNode) {
TextNode textnode = (TextNode) node;
line = textnode.getText();
} else if (node instanceof LinkTag) {
LinkTag link = (LinkTag) node;
line = link.getLink();
} else if (node instanceof TitleTag) {
TitleTag titlenode = (TitleTag) node;
line = titlenode.getTitle();
}
if (isTrimEmpty(line))
continue;
System.out.println(line);
}
}
示例11: getNodes
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private static void getNodes(NodeList list, String tagName,
String attribute, String value, List<Node> nodes) throws Exception {
if (list == null)
return;
for (Node n : list.toNodeArray()) {
boolean added = false;
if (n instanceof TagNode) {
TagNode node = (TagNode) n;
if (node.getTagName().equalsIgnoreCase(tagName)) {
if (attribute == null || "".equals(attribute)) {
nodes.add(node);
added = true;
} else {
String avs = node.getAttribute(attribute);
if (avs == null)
continue;
for (String v : avs.split("\\s+")) {
if (v.equalsIgnoreCase(value)) {
nodes.add(node);
added = true;
break;
}
}
}
}
}
if (!added)
getNodes(n.getChildren(), tagName, attribute, value, nodes);
}
}
示例12: getScorers
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public ArrayList<FootballEvent> getScorers(String link) {
Parser parser = new Parser();
ArrayList<FootballEvent> events = new ArrayList<FootballEvent>();
try {
NodeFilter tagNameFilter = new TagNameFilter("table");
parser.setResource(link);
NodeList nl = parser.parse(tagNameFilter);
Node node = nl.toNodeArray()[0];
Node[] nodes = node.getChildren().toNodeArray();
Node[] tempnodes;
Node[] innestTemp;
Tag t;
String string;
String matchtime = "";
String score = "";
String playername = "";
String imglink = "";
for (int i = 4; i < nodes.length; i++) {
if (nodes[i].getChildren() == null) {
continue;
}
tempnodes = nodes[i].getChildren().toNodeArray();
for (int j = 0; j < tempnodes.length; j++) {
if (tempnodes[j].getChildren() == null) {
continue;
}
innestTemp = tempnodes[j].getChildren().toNodeArray();
for (int k = 0; k < innestTemp.length; k++) {
if (innestTemp[k] instanceof TextNode) {
string = innestTemp[k].getText();
if (string.contains("'")) {
matchtime = string;
} else if (string.contains("[")) {
score = string;
} else {
playername = string;
}
} else if (innestTemp[k] instanceof Tag && ((Tag) innestTemp[k]).getAttribute("src") != null) {
t = (Tag) innestTemp[k];
imglink = t.getAttribute("src");
events.add(new FootballEvent(matchtime, score, playername, imglink));
score = "";
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return events;
}
示例13: getGames
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
public ArrayList<FootballGame> getGames() {
Parser parser = new Parser();
ArrayList<FootballGame> games = new ArrayList<FootballGame>();
try {
NodeFilter tagNameFilter = new TagNameFilter("table");
HasAttributeFilter attrFilter = new HasAttributeFilter("bgcolor", "#666666");
parser.setResource("http://livescores.com/");
NodeList nl = parser.parse(tagNameFilter);
nl = nl.extractAllNodesThatMatch(attrFilter);
attrFilter = new HasAttributeFilter("width", "331");
nl = nl.extractAllNodesThatMatch(attrFilter);
Node node = nl.remove(0);
nl = node.getChildren();
Node[] nodes = nl.toNodeArray();
Tag tag;
String country = "";
String league = "";
String hometeam = "";
String awayteam = "";
String gametime = "";
String link = "";
String result = "";
for (int i = 0; i < nodes.length; i++) {
if (nodes[i] instanceof Tag) {
tag = (Tag) nodes[i];
String str = tag.getAttribute("bgcolor");
if (str != null) {
//if(str.contains("11111"))
//NEW LEAGUE!
// ;
if (str.contains("3333")) {
tag = (Tag) tag.getFirstChild();
str = tag.getAttribute("class");
if (str != null && str.contains("title")) {
country = tag.getChildren().toNodeArray()[2].getText();
league = tag.getChildren().toNodeArray()[4].getText();
}
} else if (str.contains("f")) {
Node[] tempnodes = tag.getChildren().toNodeArray();
String[] t = tempnodes[0].getFirstChild().getText().split(";");
if (t.length > 1)
gametime = t[1];
else
gametime = tempnodes[0].getFirstChild().getNextSibling().getNextSibling().getText();
hometeam = tempnodes[1].getFirstChild().getText();
awayteam = tempnodes[3].getFirstChild().getText();
//RESULTAT
if (tempnodes[2].getFirstChild().getFirstChild() != null) {
//MED LÄNK
result = tempnodes[2].getFirstChild().getFirstChild().getText();
link = ((LinkTag) (tempnodes[2].getFirstChild())).extractLink();
} else {
//UTAN LÄNK
result = tempnodes[2].getFirstChild().getText();
link = null;
}
ArrayList<FootballEvent> ev = new ArrayList<FootballEvent>();
if (link != null) {
ev = getScorers(link);
}
games.add(new FootballGame(country, league, hometeam, awayteam, gametime, ev, result));
}
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
return games;
}
示例14: preencheMapaFeriadosEstaduais
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
private void preencheMapaFeriadosEstaduais() throws IOException, ParserException,ParseException {
String estadosPage = recuperarDadosEstado();
StringBuilder stringBuilder = new StringBuilder(estadosPage);
stringBuilder.delete(0,estadosPage.indexOf("<h3"));
NodeList nodeEstadoList = filterTable(stringBuilder.toString());
String todosMeses[] = {"janeiro", "fevereiro", "março", "abril", "maio", "junho", "julho", "agosto", "setembro", "outubro", "novembro", "dezembro"};
Map<String,String> mapaMeses = new HashMap<String,String>();
int i = 1;
for (String mes:todosMeses){
String valor = String.valueOf(i++);
if(valor.length()< 2){
valor ="0"+valor;
}
mapaMeses.put(mes,valor);
}
String estado = null;
for (Node node:nodeEstadoList.toNodeArray()){
if(node instanceof TableTag){
NodeList lista = ((TableTag) node).searchFor(TableColumn.class, true);
SimpleNodeIterator iterator = lista.elements();
while (iterator.hasMoreNodes()){
Feriado feriado = new Feriado();
Node data = iterator.nextNode();
String[] dataExtenso = data.toPlainTextString().split(" de ");
feriado.setData(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015");
Node nome = iterator.nextNode();
feriado.setNome(nome.toPlainTextString());
Node lei = iterator.nextNode();
if(dataExtenso[0].length()==1){
dataExtenso[0] = "0"+dataExtenso[0];
}
System.out.println(dataExtenso[0] + "/" + mapaMeses.get(dataExtenso[1]) + "/2015,"+nome.toPlainTextString()+","+mapaEstados.get(estado));
mapaFeriadosEstado.get(estado).add(feriado);
}
}
if(node instanceof HeadingTag){
estado = node.getChildren().toHtml().trim();
if(node.getChildren().elementAt(0).getChildren() != null){
estado = node.getChildren().elementAt(0).getChildren().toHtml().trim();
}
mapaFeriadosEstado.put(estado,new ArrayList<Feriado>());
}
}
}
示例15: getInnerText
import org.htmlparser.util.NodeList; //导入方法依赖的package包/类
/**
* Get inner text of certain HTML string, given node filter tagname,
* attribute, attr value, replace inner text if necessary
*
* @param html
* @param filterNodeFormat
* { tagname, attribute, attr value, inside replacement match,
* inside replacement}
* @param leaveBlank
* keep blank characters
* @return
*/
public static String getInnerText(String html, String[][] filterNodeFormat,
boolean leaveBlank) {
NodeList nl = HtmlUtil.getHtmlRoot(String.format("<htm>%s</htm>",
HtmlUtil.getValidHtml(HtmlUtil.filterOutScriptSegment(html))));
StringBuilder txt = new StringBuilder();
for (Node n : nl.toNodeArray()) {
txt.append(getInnerText(n, filterNodeFormat, leaveBlank));
}
return txt.toString();
}