当前位置: 首页>>代码示例>>Java>>正文


Java Node类代码示例

本文整理汇总了Java中org.htmlparser.Node的典型用法代码示例。如果您正苦于以下问题:Java Node类的具体用法?Java Node怎么用?Java Node使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Node类属于org.htmlparser包,在下文中一共展示了Node类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parserUrl

import org.htmlparser.Node; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
	NodeFilter hrefNodeFilter = new NodeFilter() {
		@Override
		public boolean accept(Node node) {
			if (node.getText().startsWith("a href=")) {
				return true;
			} else {
				return false;
			}
		}
	};
	try {
		return parser.extractAllNodesThatMatch(hrefNodeFilter);
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return null;
}
 
开发者ID:PerkinsZhu,项目名称:WebSprider,代码行数:20,代码来源:HtmlParser01.java

示例2: hasMetaTagName

import org.htmlparser.Node; //导入依赖的package包/类
/**
 *  returns true if the html document contains a Meta tag with a name equal to mname , otherwise returns false
 *  e.g. :
        *  HTMLParser hp = new HTMLParser("http://www.abc.org");
        *  boolean containskeywords = hp.hasMetaTagName("keywords");
        *  boolean containsxyz = hp.hasMetaTagName("xyz");
        *  In this code, containskeywords will be true, and containsxyz will be false. 
 *
 * @param  name                 name of the Meta Tag
 * @return                      true or false, if this tag is present or not
 * @exception  ParserException  
 */
public boolean hasMetaTagName(String name) throws ParserException {
	boolean hasIt = false;

	String[] tagToBeFound = {"META"};
	TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
	parser.visitAllNodesWith(visitor);
	Node[] allMTags = visitor.getTags(0);

	for (int i = 0; i < allMTags.length; i++) {
		MetaTag metatag = (MetaTag) allMTags[i];

		if (name.equalsIgnoreCase(metatag.getMetaTagName())) {
			hasIt = true;
			break;
		}
	}

	parser.reset();
	return hasIt;
}
 
开发者ID:NCAR,项目名称:joai-project,代码行数:33,代码来源:HTMLParser.java

示例3: getMetaTagContentByName

import org.htmlparser.Node; //导入依赖的package包/类
/**
        * returns the content of the Meta tag whose name equals mname. If such a tag does not exist, returns an empty string. 
 * E.g. :
        * HTMLParser hp = new HTMLParser("http://www.abc.org");
        * if (hp.hasMetaTagName("organization"))
        * {
        *    System.out.println(hp.getMetaTagContentByName("organization"));
        * }
        * This prints out the following : 
 * 
        * ABC Program Center
 *
 * @param  name                 name of the Meta Tag
 * @return                      The value of this meta tag
 * @exception  ParserException  
 */
public String getMetaTagContentByName(String name) throws ParserException {
	String MetaTagContent = "";

	String[] tagToBeFound = {"META"};
	TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
	parser.visitAllNodesWith(visitor);
	Node[] allMTags = visitor.getTags(0);

	for (int i = 0; i < allMTags.length; i++) {
		MetaTag metatag = (MetaTag) allMTags[i];

		if (name.equals(metatag.getMetaTagName())) {
			MetaTagContent = metatag.getMetaContent();
			break;
		}
	}

	parser.reset();
	return MetaTagContent;
}
 
开发者ID:NCAR,项目名称:joai-project,代码行数:37,代码来源:HTMLParser.java

示例4: getLinkTitles

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * returns a String containing all the text within the title attribute of all the links in the html document 
 *
 * @return                      all the text within the title attribute of all the links in the doc.
 * @exception  ParserException  
 */
public String getLinkTitles() throws ParserException {
	String title = "";

	String[] tagToBeFound = {"A"};
	TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
	parser.visitAllNodesWith(visitor);
	Node[] allLinkTags = visitor.getTags(0);

	for (int i = 0; i < allLinkTags.length; i++) {
		LinkTag l = (LinkTag) allLinkTags[i];
		String titletext = l.getAttribute("TITLE");
		if (titletext != null)
			title = title + " " + titletext;
	}

	parser.reset();
	return title;
}
 
开发者ID:NCAR,项目名称:joai-project,代码行数:25,代码来源:HTMLParser.java

示例5: getImgAlts

import org.htmlparser.Node; //导入依赖的package包/类
/**
 *  returns a String containing all the text within the alt attribute of all the img tags in the html document 
 *
 * @return                      all the text within the alt attribute of all the img tahs in the html doc
 * @exception  ParserException  
 */
public String getImgAlts() throws ParserException {
	String alt = "";

	String[] tagToBeFound = {"IMG"};
	TagFindingVisitor visitor = new TagFindingVisitor(tagToBeFound);
	parser.visitAllNodesWith(visitor);
	Node[] allImgTags = visitor.getTags(0);

	for (int i = 0; i < allImgTags.length; i++) {
		ImageTag imagetag = (ImageTag) allImgTags[i];
		String alttext = imagetag.getAttribute("ALT");
		if (alttext != null)
			alt = alt + " " + alttext;
	}

	parser.reset();
	return alt;
}
 
开发者ID:NCAR,项目名称:joai-project,代码行数:25,代码来源:HTMLParser.java

示例6: toObject

import org.htmlparser.Node; //导入依赖的package包/类
private void toObject(Node node) {
		Record record = new Record();
		record.setName(node.getChildren().elementAt(1).toPlainTextString().trim());
		record.setTypeName(node.getChildren().elementAt(3).toPlainTextString().trim());
		record.setContent(node.getChildren().elementAt(5).toPlainTextString().trim());
		record.setDate(node.getChildren().elementAt(7).toPlainTextString().trim());
		String click = node.getText().split("\\s")[4];
		record.setId(click.substring(23, click.length() - 3));
		try {
//			 System.out.println(record.getName()+ "|"
//			 + record.getTypeName()+ "|"
//			 + record.getContent()+ "|"
//			 + record.getDate()+ "|"
//			 + record.getId());
			store.store(record);
		} catch (Exception e) {
			logger.error(currentThread().getName() + "存储到hbase出现错误!\n"+e.getMessage()+"\n");
		}
	}
 
开发者ID:husky00,项目名称:worm,代码行数:20,代码来源:PostRequestHtmlParser.java

示例7: dealTag

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * 处理标签
 * @param tag 所要处理的标签
 */
private void dealTag(Node tag) {		
	NodeList list = tag.getChildren();
	if(list != null) {
		NodeIterator nit = list.elements();
		try {
			while(nit.hasMoreNodes()) {
				Node node = nit.nextNode();
				parserNode(node);	// 递归调用分析结点
			}
		}
		catch(ParserException exc) {
			System.out.println("ParserException");
			//exc.printStackTrace();
		}
	}
}
 
开发者ID:uraplutonium,项目名称:hadoop-distributed-crawler,代码行数:21,代码来源:URLAnalyzer.java

示例8: parseDetailInfo

import org.htmlparser.Node; //导入依赖的package包/类
private Map<String, String> parseDetailInfo(NodeList nodeList) {
    Map<String, String> InfoMap = Maps.newHashMap();
    if (nodeList.size() == 0) {
        return InfoMap;
    }
    for (Node pageNode : nodeList.elementAt(0).getChildren().toNodeArray()) {
        try {
            if (pageNode instanceof LinkTag) {
                String rawId = ((LinkTag) pageNode).getAttribute("id");
                if (StringUtils.isBlank(rawId)) {
                    continue;
                }
                if (rawId.contains("all")) {
                    continue;
                }
                String id = rawId.substring(rawId.indexOf("_") + 1);

                InfoMap.put(id, pageNode.toPlainTextString());
            }
        } catch (Exception e) {
            log.error("parse parseDetailInfo catch Exception:", e);
        }
    }
    return InfoMap;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:26,代码来源:HouseParser.java

示例9: parseSpan

import org.htmlparser.Node; //导入依赖的package包/类
/***
 * 解析其他
 *
 * @param nodeList
 * @return
 */
private String parseSpan(NodeList nodeList) {
    StringBuilder sb = new StringBuilder();
    for (Node node : nodeList.toNodeArray()) {
        if (node instanceof Div) {
            if (StringUtils.equalsIgnoreCase("-", node.toPlainTextString())) {
                return "0";
            }
            NodeList spanNodeList = node.getChildren();
            for (Node spanNode : spanNodeList.toNodeArray()) {
                if (spanNode instanceof Span) {
                    String attribute = ((Span) spanNode).getAttribute("class");
                    sb.append(MappingSet.NUMBER_MAPPING.get(attribute));
                }
            }
        }
    }
    return sb.toString();
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:25,代码来源:HouseParser.java

示例10: getColumnCount

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * Returns the number of columns/cells in the given row, including cell spacing.
 */
private static int getColumnCount( TableRow row )
{
    Node[] cells = row.getChildren().extractAllNodesThatMatch( HTML_ROW_FILTER ).toNodeArray();

    int cols = 0;

    for ( Node cell : cells )
    {
        Integer colSpan = MathUtils.parseInt( ((TagNode) cell).getAttribute( "colspan" ) );

        cols += colSpan != null ? colSpan : 1;
    }

    return cols;
}
 
开发者ID:dhis2,项目名称:dhis2-core,代码行数:19,代码来源:GridUtils.java

示例11: getValue

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * Retrieves the value of a table cell. Appends the text of child nodes of
 * the cell. In case of composite tags like span or div the inner text is
 * appended.
 */
public static String getValue( TagNode cell )
{
    StringBuilder builder = new StringBuilder();

    for ( Node child : cell.getChildren().toNodeArray() )
    {
        if ( child instanceof CompositeTag )
        {
            builder.append( ((CompositeTag) child).getStringText() );
        }
        else
        {
            builder.append( child.getText() );
        }
    }

    return builder.toString().trim().replaceAll( "&nbsp;", EMPTY );
}
 
开发者ID:dhis2,项目名称:dhis2-core,代码行数:24,代码来源:GridUtils.java

示例12: processNodeList

import org.htmlparser.Node; //导入依赖的package包/类
private static void processNodeList(NodeList list, String keyword) {
	// 迭代开始
	SimpleNodeIterator iterator = list.elements();
	while (iterator.hasMoreNodes()) {
		Node node = iterator.nextNode();
		// 得到该节点的子节点列表
		NodeList childList = node.getChildren();
		// 孩子节点为空,说明是值节点
		if (null == childList) {
			// 得到值节点的值
			String result = node.toPlainTextString();
			// 若包含关键字,则简单打印出来文本
			if (result.indexOf(keyword) != -1)
				System.out.println(result);
		} // end if
			// 孩子节点不为空,继续迭代该孩子节点
		else {
			processNodeList(childList, keyword);
		}// end else
	}// end wile
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:22,代码来源:HtmlParserTest.java

示例13: html2Text

import org.htmlparser.Node; //导入依赖的package包/类
public static String html2Text(String html, int len) {
	try {
		Lexer lexer = new Lexer(html);
		Node node;
		StringBuilder sb = new StringBuilder(html.length());
		while ((node = lexer.nextNode()) != null) {
			if (node instanceof TextNode) {
				sb.append(node.toHtml());
			}
			if (sb.length() > len) {
				break;
			}
		}
		return sb.toString();
	} catch (ParserException e) {
		throw new RuntimeException(e);
	}
}
 
开发者ID:huanzhou,项目名称:jeecms6,代码行数:19,代码来源:StrUtils.java

示例14: getColumnCount

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * Returns the number of columns/cells in the given row, including cell spacing.
 */
private static int getColumnCount( TableRow row )
{
    Node[] cells = row.getChildren().extractAllNodesThatMatch( HTML_ROW_FILTER ).toNodeArray();
    
    int cols = 0;
    
    for ( Node cell : cells )
    {
        Integer colSpan = MathUtils.parseInt( ((TagNode) cell).getAttribute( "colspan" ) );
        
        cols += colSpan != null ? colSpan : 1;
    }
    
    return cols;
}
 
开发者ID:ehatle,项目名称:AgileAlligators,代码行数:19,代码来源:GridUtils.java

示例15: getValue

import org.htmlparser.Node; //导入依赖的package包/类
/**
 * Retrieves the value of a table cell. Appends the text of child nodes of
 * the cell. In case of composite tags like span or div the inner text is
 * appended.
 */
public static String getValue( TagNode cell )
{
    StringBuilder builder = new StringBuilder();

    for ( Node child : cell.getChildren().toNodeArray() )
    {
        if ( child instanceof CompositeTag )
        {
            builder.append( ((CompositeTag) child).getStringText() );
        }
        else
        {
            builder.append( child.getText() );
        }
    }
    
    return builder.toString().trim().replaceAll( "&nbsp;", EMPTY );
}
 
开发者ID:ehatle,项目名称:AgileAlligators,代码行数:24,代码来源:GridUtils.java


注:本文中的org.htmlparser.Node类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。