当前位置: 首页>>代码示例>>Java>>正文


Java ParserException类代码示例

本文整理汇总了Java中org.htmlparser.util.ParserException的典型用法代码示例。如果您正苦于以下问题:Java ParserException类的具体用法?Java ParserException怎么用?Java ParserException使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


ParserException类属于org.htmlparser.util包,在下文中一共展示了ParserException类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parserUrl

import org.htmlparser.util.ParserException; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
	NodeFilter hrefNodeFilter = new NodeFilter() {
		@Override
		public boolean accept(Node node) {
			if (node.getText().startsWith("a href=")) {
				return true;
			} else {
				return false;
			}
		}
	};
	try {
		return parser.extractAllNodesThatMatch(hrefNodeFilter);
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return null;
}
 
开发者ID:PerkinsZhu,项目名称:WebSprider,代码行数:20,代码来源:HtmlParser01.java

示例2: dealTag

import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
 * 处理标签
 * @param tag 所要处理的标签
 */
private void dealTag(Node tag) {		
	NodeList list = tag.getChildren();
	if(list != null) {
		NodeIterator nit = list.elements();
		try {
			while(nit.hasMoreNodes()) {
				Node node = nit.nextNode();
				parserNode(node);	// 递归调用分析结点
			}
		}
		catch(ParserException exc) {
			System.out.println("ParserException");
			//exc.printStackTrace();
		}
	}
}
 
开发者ID:uraplutonium,项目名称:hadoop-distributed-crawler,代码行数:21,代码来源:URLAnalyzer.java

示例3: html2Text

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public static String html2Text(String html, int len) {
	try {
		Lexer lexer = new Lexer(html);
		Node node;
		StringBuilder sb = new StringBuilder(html.length());
		while ((node = lexer.nextNode()) != null) {
			if (node instanceof TextNode) {
				sb.append(node.toHtml());
			}
			if (sb.length() > len) {
				break;
			}
		}
		return sb.toString();
	} catch (ParserException e) {
		throw new RuntimeException(e);
	}
}
 
开发者ID:huanzhou,项目名称:jeecms6,代码行数:19,代码来源:StrUtils.java

示例4: getPlainText

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public static String getPlainText(String htmlStr) {
    Parser parser = new Parser();
    String plainText = "";
    try {
        parser.setInputHTML(htmlStr);

        StringBean stringBean = new StringBean();
        // 设置不需要得到页面所包含的链接信息
        stringBean.setLinks(false);
        // 设置将不间断空格由正规空格所替代
        stringBean.setReplaceNonBreakingSpaces(true);
        // 设置将一序列空格由单一空格替代
        stringBean.setCollapse(true);

        parser.visitAllNodesWith(stringBean);
        plainText = stringBean.getStrings();

    } catch (ParserException e) {
        e.printStackTrace();
    }

    return plainText;
}
 
开发者ID:sercxtyf,项目名称:onboard,代码行数:24,代码来源:HtmlTextParser.java

示例5: parseMessage

import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
   * parses the body of the message, and returns a parsed representation
   * See {@link http://htmlparser.sourceforge.net/} for details
   * @param url the url that the message resulted from
   * @param message the Message to parse
   * @return a NodeList containing the various Nodes making up the page
   */
  public Object parseMessage(HttpUrl url, Message message) {
      String contentType = message.getHeader("Content-Type");
      if (contentType == null || !contentType.matches("text/html.*")) {
          return null;
      }
      byte[] content = message.getContent();
      if (content == null || content.length == 0) {
          return null;
      }
      Parser parser = Parser.createParser(new String(content), null);
      try {
          NodeList nodelist = parser.extractAllNodesThatMatch(new NodeFilter() {
public boolean accept(Node node) {
                  return true;
              }
          });
          return nodelist;
      } catch (ParserException pe) {
          _logger.severe(pe.toString());
          return null;
      }
  }
 
开发者ID:Neraud,项目名称:PADListener,代码行数:30,代码来源:HTMLParser.java

示例6: getGangliaAttribute

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public List<String> getGangliaAttribute(String clusterName)
		throws ParserException, MalformedURLException, IOException {
	String url = gangliaMetricUrl.replaceAll(clusterPattern, clusterName);
	Parser parser = new Parser(new URL(url).openConnection());
	NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
			new HasAttributeFilter("id", "metrics-picker"));
	NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
	SimpleNodeIterator iterator = nodeList.elements();
	List<String> metricList = new ArrayList<String>();
	while (iterator.hasMoreNodes()) {
		Node node = iterator.nextNode();

		SimpleNodeIterator childIterator = node.getChildren().elements();
		while (childIterator.hasMoreNodes()) {
			OptionTag children = (OptionTag) childIterator.nextNode();
			metricList.add(children.getOptionText());
		}
	}

	return metricList;

}
 
开发者ID:Ctrip-DI,项目名称:Hue-Ctrip-DI,代码行数:23,代码来源:GangliaHttpParser.java

示例7: splitHtml

import org.htmlparser.util.ParserException; //导入依赖的package包/类
private List<String> splitHtml() {
	List<String> resultList = new ArrayList<String>();
	try {
		Parser parser = Parser.createParser(content, "UTF-8");
		NodeList nodeList = parser.parse(null);
		resultList = recusiveSplitHtml(nodeList);
		StringBuffer lastPageContent = new StringBuffer();
		for (TagNode tagNode : tagNodeList) {
			if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
				lastPageContent.append("<");
				lastPageContent.append(tagNode.getText());
				lastPageContent.append(">");
			}
		}
		lastPageContent.append(content.substring(startPosition));
		Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
		NodeList pageContentNodeList = lastPageContentParser.parse(null);
		resultList.add(pageContentNodeList.toHtml());
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return resultList;
}
 
开发者ID:wangko27,项目名称:SelfSoftShop,代码行数:24,代码来源:Article.java

示例8: html2text

import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
 * Converts an HTML document into plain text.
 * 
 * @param html HTML document
 * @return plain text or <code>null</code> if the conversion failed
 */
public static synchronized String html2text(String html) {
	// convert HTML document
	StringBean sb = new StringBean();
	sb.setLinks(false);  // no links
	sb.setReplaceNonBreakingSpaces (true); // replace non-breaking spaces
    sb.setCollapse(true);  // replace sequences of whitespaces
	Parser parser = new Parser();
	try {
		parser.setInputHTML(html);
		parser.visitAllNodesWith(sb);
	} catch (ParserException e) {
		return null;
	}
	String docText = sb.getStrings();
	
	if (docText == null) docText = "";  // no content
	
	return docText;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:26,代码来源:HTMLConverter.java

示例9: file2text

import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
 * Reads an HTML document from a file and converts it into plain text.
 * 
 * @param filename name of file containing HTML documents
 * @return plain text or <code>null</code> if the reading or conversion failed
 */
public static synchronized String file2text(String filename) {
	// read from file and convert HTML document
	StringBean sb = new StringBean();
	sb.setLinks(false);  // no links
	sb.setReplaceNonBreakingSpaces (true); // replace non-breaking spaces
    sb.setCollapse(true);  // replace sequences of whitespaces
	Parser parser = new Parser();
	try {
		parser.setResource(filename);
		parser.visitAllNodesWith(sb);
	} catch (ParserException e) {
		return null;
	}
	String docText = sb.getStrings();
	
	return docText;
}
 
开发者ID:claritylab,项目名称:lucida,代码行数:24,代码来源:HTMLConverter.java

示例10: nextNode

import org.htmlparser.util.ParserException; //导入依赖的package包/类
@Override
public Node nextNode() throws ParserException {
	inJS = false;
	inCSS = false;
	if(cached != null) {
		Node tmp = cached;
		cached = null;
		inJS = cachedJS;
		inCSS = !cachedJS;
		return tmp;
	}
	Node got = super.nextNode();
	if(NodeUtils.isNonEmptyOpenTagNodeNamed(got, "SCRIPT")) {
		cached = super.parseCDATA(true);
		cachedJS = true;
	} else if (NodeUtils.isNonEmptyOpenTagNodeNamed(got, "STYLE")) {
		cached = super.parseCDATA(true);
		cachedJS = false;
	}
	return got;
}
 
开发者ID:iipc,项目名称:webarchive-commons,代码行数:22,代码来源:CDATALexer.java

示例11: testInJS

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInJS() throws ParserException {
	l = makeLexer("<script>foo bar baz</script>");
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "SCRIPT"));
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertTrue(l.inJS());
	assertTrue(NodeUtils.isTextNode(n));
	assertEquals("foo bar baz",((TextNode)n).getText());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT"));
}
 
开发者ID:iipc,项目名称:webarchive-commons,代码行数:19,代码来源:CDATALexerTest.java

示例12: testInCSS

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInCSS() throws ParserException {
	l = makeLexer("<style>foo bar baz</style>");
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "STYLE"));
	n = l.nextNode();
	assertTrue(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isTextNode(n));
	assertEquals("foo bar baz",((TextNode)n).getText());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isCloseTagNodeNamed(n, "STYLE"));
}
 
开发者ID:iipc,项目名称:webarchive-commons,代码行数:19,代码来源:CDATALexerTest.java

示例13: testInJSComment

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInJSComment() throws ParserException {
		
//		dumpParse("<script>//<!--\n foo bar baz\n //--></script>");
//		dumpParse("<script><!-- foo bar baz --></script>");
//		dumpParse("<script>//<!-- foo bar baz --></script>");
//		dumpParse("<script><!-- foo bar baz //--></script>");
//		dumpParse("<script>\n//<!-- foo bar baz\n //--></script>");
//		dumpParse("<script> if(1 < 2) { foo(); } </script>");
//		dumpParse("<script> if(1 <n) { foo(); } </script>");
//		dumpParse("<script> document.write(\"<b>bold</b>\"); </script>");
//		dumpParse("<script> document.write(\"<script>bold</script>\"); </script>");
//		dumpParse("<script> <![CDATA[\n if(i<n) { foo() } // content of your Javascript goes here \n ]]> </script>");

		assertJSContentWorks("//<!--\n foo bar baz\n //-->");
		assertJSContentWorks("<!-- foo bar baz -->");
		assertJSContentWorks("//<!-- foo bar baz -->");
		assertJSContentWorks("<!-- foo bar baz //-->");
		assertJSContentWorks("\n//<!-- foo bar baz\n //-->");
		assertJSContentWorks("if(1 < 2) { foo(); } ");
		assertJSContentWorks("if(1 <n) { foo(); } ");
		assertJSContentWorks("document.write(\"<b>bold</b>\"); ");
		assertJSContentWorks("document.write(\"<script>bold</script>\"); ");
		assertJSContentWorks("<![CDATA[\n if(i<n) { foo() } // a comment \n ]]> ");

	}
 
开发者ID:iipc,项目名称:webarchive-commons,代码行数:26,代码来源:CDATALexerTest.java

示例14: assertJSContentWorks

import org.htmlparser.util.ParserException; //导入依赖的package包/类
private void assertJSContentWorks(String js) throws ParserException {
	String html = String.format("<script>%s</script>",js);
	l = makeLexer(html);
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "SCRIPT"));
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertTrue(l.inJS());
	assertTrue(NodeUtils.isTextNode(n));
	assertEquals(js,((TextNode)n).getText());
	n = l.nextNode();
	assertFalse(l.inCSS());
	assertFalse(l.inJS());
	assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT"));
}
 
开发者ID:iipc,项目名称:webarchive-commons,代码行数:20,代码来源:CDATALexerTest.java

示例15: getDepartmentByDistrict

import org.htmlparser.util.ParserException; //导入依赖的package包/类
public List<DepartmentInfo> getDepartmentByDistrict(final Table<String, String, String> districtTable) throws IOException, ParserException {
    List<DepartmentInfo> departmentInfoList = new ArrayList<>();
    for (Table.Cell<String, String, String> cell : districtTable.cellSet()) {
        departmentInfoList.addAll(runDistrict(cell, true));
        try {
            Thread.sleep(1000);
        } catch (Exception e) {

        }
    }
    return departmentInfoList;
}
 
开发者ID:deanjin,项目名称:houseHunter,代码行数:13,代码来源:DepartmentParser.java


注:本文中的org.htmlparser.util.ParserException类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。