本文整理汇总了Java中org.htmlparser.Node类的典型用法代码示例。如果您正苦于以下问题:Java Node类的具体用法?Java Node怎么用?Java Node使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Node类属于org.htmlparser包,在下文中一共展示了Node类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parserUrl
import org.htmlparser.Node; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
NodeFilter hrefNodeFilter = new NodeFilter() {
@Override
public boolean accept(Node node) {
if (node.getText().startsWith("a href=")) {
return true;
} else {
return false;
}
}
};
try {
return parser.extractAllNodesThatMatch(hrefNodeFilter);
} catch (ParserException e) {
e.printStackTrace();
}
return null;
}
示例2: hasMetaTagName
import org.htmlparser.Node; //导入依赖的package包/类
/**
* returns true if the html document contains a Meta tag with a name equal to mname , otherwise returns false
* e.g. :
* HTMLParser hp = new HTMLParser("http://www.abc.org");
* boolean containskeywords = hp.hasMetaTagName("keywords");
* boolean containsxyz = hp.hasMetaTagName("xyz");
* In this code, containskeywords will be true, and containsxyz will be false.
*
* @param name name of the Meta Tag
* @return true or false, if this tag is present or not
* @exception ParserException
*/
public boolean hasMetaTagName(String name) throws ParserException {
boolean hasIt = false;
String[] tagToBeFound = {"META"};
TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
parser.visitAllNodesWith(visitor);
Node[] allMTags = visitor.getTags(0);
for (int i = 0; i < allMTags.length; i++) {
MetaTag metatag = (MetaTag) allMTags[i];
if (name.equalsIgnoreCase(metatag.getMetaTagName())) {
hasIt = true;
break;
}
}
parser.reset();
return hasIt;
}
示例3: getMetaTagContentByName
import org.htmlparser.Node; //导入依赖的package包/类
/**
* returns the content of the Meta tag whose name equals mname. If such a tag does not exist, returns an empty string.
* E.g. :
* HTMLParser hp = new HTMLParser("http://www.abc.org");
* if (hp.hasMetaTagName("organization"))
* {
* System.out.println(hp.getMetaTagContentByName("organization"));
* }
* This prints out the following :
*
* ABC Program Center
*
* @param name name of the Meta Tag
* @return The value of this meta tag
* @exception ParserException
*/
public String getMetaTagContentByName(String name) throws ParserException {
String MetaTagContent = "";
String[] tagToBeFound = {"META"};
TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
parser.visitAllNodesWith(visitor);
Node[] allMTags = visitor.getTags(0);
for (int i = 0; i < allMTags.length; i++) {
MetaTag metatag = (MetaTag) allMTags[i];
if (name.equals(metatag.getMetaTagName())) {
MetaTagContent = metatag.getMetaContent();
break;
}
}
parser.reset();
return MetaTagContent;
}
示例4: getLinkTitles
import org.htmlparser.Node; //导入依赖的package包/类
/**
* returns a String containing all the text within the title attribute of all the links in the html document
*
* @return all the text within the title attribute of all the links in the doc.
* @exception ParserException
*/
public String getLinkTitles() throws ParserException {
String title = "";
String[] tagToBeFound = {"A"};
TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
parser.visitAllNodesWith(visitor);
Node[] allLinkTags = visitor.getTags(0);
for (int i = 0; i < allLinkTags.length; i++) {
LinkTag l = (LinkTag) allLinkTags[i];
String titletext = l.getAttribute("TITLE");
if (titletext != null)
title = title + " " + titletext;
}
parser.reset();
return title;
}
示例5: getImgAlts
import org.htmlparser.Node; //导入依赖的package包/类
/**
* returns a String containing all the text within the alt attribute of all the img tags in the html document
*
* @return all the text within the alt attribute of all the img tahs in the html doc
* @exception ParserException
*/
public String getImgAlts() throws ParserException {
String alt = "";
String[] tagToBeFound = {"IMG"};
TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
parser.visitAllNodesWith(visitor);
Node[] allImgTags = visitor.getTags(0);
for (int i = 0; i < allImgTags.length; i++) {
ImageTag imagetag = (ImageTag) allImgTags[i];
String alttext = imagetag.getAttribute("ALT");
if (alttext != null)
alt = alt + " " + alttext;
}
parser.reset();
return alt;
}
示例6: toObject
import org.htmlparser.Node; //导入依赖的package包/类
private void toObject(Node node) {
Record record = new Record();
record.setName(node.getChildren().elementAt(1).toPlainTextString().trim());
record.setTypeName(node.getChildren().elementAt(3).toPlainTextString().trim());
record.setContent(node.getChildren().elementAt(5).toPlainTextString().trim());
record.setDate(node.getChildren().elementAt(7).toPlainTextString().trim());
String click = node.getText().split("\\s")[4];
record.setId(click.substring(23, click.length() - 3));
try {
// System.out.println(record.getName()+ "|"
// + record.getTypeName()+ "|"
// + record.getContent()+ "|"
// + record.getDate()+ "|"
// + record.getId());
store.store(record);
} catch (Exception e) {
logger.error(currentThread().getName() + "存储到hbase出现错误!\n"+e.getMessage()+"\n");
}
}
示例7: dealTag
import org.htmlparser.Node; //导入依赖的package包/类
/**
* 处理标签
* @param tag 所要处理的标签
*/
private void dealTag(Node tag) {
NodeList list = tag.getChildren();
if(list != null) {
NodeIterator nit = list.elements();
try {
while(nit.hasMoreNodes()) {
Node node = nit.nextNode();
parserNode(node); // 递归调用分析结点
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
示例8: parseDetailInfo
import org.htmlparser.Node; //导入依赖的package包/类
private Map<String, String> parseDetailInfo(NodeList nodeList) {
Map<String, String> InfoMap = Maps.newHashMap();
if (nodeList.size() == 0) {
return InfoMap;
}
for (Node pageNode : nodeList.elementAt(0).getChildren().toNodeArray()) {
try {
if (pageNode instanceof LinkTag) {
String rawId = ((LinkTag) pageNode).getAttribute("id");
if (StringUtils.isBlank(rawId)) {
continue;
}
if (rawId.contains("all")) {
continue;
}
String id = rawId.substring(rawId.indexOf("_") + 1);
InfoMap.put(id, pageNode.toPlainTextString());
}
} catch (Exception e) {
log.error("parse parseDetailInfo catch Exception:", e);
}
}
return InfoMap;
}
示例9: parseSpan
import org.htmlparser.Node; //导入依赖的package包/类
/***
* 解析其他
*
* @param nodeList
* @return
*/
private String parseSpan(NodeList nodeList) {
StringBuilder sb = new StringBuilder();
for (Node node : nodeList.toNodeArray()) {
if (node instanceof Div) {
if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
return "0";
}
NodeList spanNodeList = node.getChildren();
for (Node spanNode : spanNodeList.toNodeArray()) {
if (spanNode instanceof Span) {
String attribute = ((Span) spanNode).getAttribute("class");
sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
}
}
}
}
return sb.toString();
}
示例10: getColumnCount
import org.htmlparser.Node; //导入依赖的package包/类
/**
* Returns the number of columns/cells in the given row, including cell spacing.
*/
private static int getColumnCount( TableRow row )
{
Node[] cells = row.getChildren().extractAllNodesThatMatch( HTML_ROW_FILTER ).toNodeArray();
int cols = 0;
for ( Node cell : cells )
{
Integer colSpan = MathUtils.parseInt( ((TagNode) cell).getAttribute( "colspan" ) );
cols += colSpan != null ? colSpan : 1;
}
return cols;
}
示例11: getValue
import org.htmlparser.Node; //导入依赖的package包/类
/**
* Retrieves the value of a table cell. Appends the text of child nodes of
* the cell. In case of composite tags like span or div the inner text is
* appended.
*/
public static String getValue( TagNode cell )
{
StringBuilder builder = new StringBuilder();
for ( Node child : cell.getChildren().toNodeArray() )
{
if ( child instanceof CompositeTag )
{
builder.append( ((CompositeTag) child).getStringText() );
}
else
{
builder.append( child.getText() );
}
}
return builder.toString().trim().replaceAll( " ", EMPTY );
}
示例12: processNodeList
import org.htmlparser.Node; //导入依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
// 迭代开始
SimpleNodeIterator iterator = list.elements();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
// 得到该节点的子节点列表
NodeList childList = node.getChildren();
// 孩子节点为空,说明是值节点
if (null == childList) {
// 得到值节点的值
String result = node.toPlainTextString();
// 若包含关键字,则简单打印出来文本
if (result.indexOf(keyword) != -1)
System.out.println(result);
} // end if
// 孩子节点不为空,继续迭代该孩子节点
else {
processNodeList(childList, keyword);
}// end else
}// end wile
}
示例13: html2Text
import org.htmlparser.Node; //导入依赖的package包/类
public static String html2Text(String html, int len) {
try {
Lexer lexer = new Lexer(html);
Node node;
StringBuilder sb = new StringBuilder(html.length());
while ((node = lexer.nextNode()) != null) {
if (node instanceof TextNode) {
sb.append(node.toHtml());
}
if (sb.length() > len) {
break;
}
}
return sb.toString();
} catch (ParserException e) {
throw new RuntimeException(e);
}
}
示例14: getColumnCount
import org.htmlparser.Node; //导入依赖的package包/类
/**
* Returns the number of columns/cells in the given row, including cell spacing.
*/
private static int getColumnCount( TableRow row )
{
Node[] cells = row.getChildren().extractAllNodesThatMatch( HTML_ROW_FILTER ).toNodeArray();
int cols = 0;
for ( Node cell : cells )
{
Integer colSpan = MathUtils.parseInt( ((TagNode) cell).getAttribute( "colspan" ) );
cols += colSpan != null ? colSpan : 1;
}
return cols;
}
示例15: getValue
import org.htmlparser.Node; //导入依赖的package包/类
/**
* Retrieves the value of a table cell. Appends the text of child nodes of
* the cell. In case of composite tags like span or div the inner text is
* appended.
*/
public static String getValue( TagNode cell )
{
StringBuilder builder = new StringBuilder();
for ( Node child : cell.getChildren().toNodeArray() )
{
if ( child instanceof CompositeTag )
{
builder.append( ((CompositeTag) child).getStringText() );
}
else
{
builder.append( child.getText() );
}
}
return builder.toString().trim().replaceAll( " ", EMPTY );
}