本文整理汇总了Java中org.htmlparser.util.ParserException类的典型用法代码示例。如果您正苦于以下问题:Java ParserException类的具体用法?Java ParserException怎么用?Java ParserException使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ParserException类属于org.htmlparser.util包,在下文中一共展示了ParserException类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parserUrl
import org.htmlparser.util.ParserException; //导入依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
NodeFilter hrefNodeFilter = new NodeFilter() {
@Override
public boolean accept(Node node) {
if (node.getText().startsWith("a href=")) {
return true;
} else {
return false;
}
}
};
try {
return parser.extractAllNodesThatMatch(hrefNodeFilter);
} catch (ParserException e) {
e.printStackTrace();
}
return null;
}
示例2: dealTag
import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
* 处理标签
* @param tag 所要处理的标签
*/
private void dealTag(Node tag) {
NodeList list = tag.getChildren();
if(list != null) {
NodeIterator nit = list.elements();
try {
while(nit.hasMoreNodes()) {
Node node = nit.nextNode();
parserNode(node); // 递归调用分析结点
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
示例3: html2Text
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public static String html2Text(String html, int len) {
try {
Lexer lexer = new Lexer(html);
Node node;
StringBuilder sb = new StringBuilder(html.length());
while ((node = lexer.nextNode()) != null) {
if (node instanceof TextNode) {
sb.append(node.toHtml());
}
if (sb.length() > len) {
break;
}
}
return sb.toString();
} catch (ParserException e) {
throw new RuntimeException(e);
}
}
示例4: getPlainText
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public static String getPlainText(String htmlStr) {
Parser parser = new Parser();
String plainText = "";
try {
parser.setInputHTML(htmlStr);
StringBean stringBean = new StringBean();
// 设置不需要得到页面所包含的链接信息
stringBean.setLinks(false);
// 设置将不间断空格由正规空格所替代
stringBean.setReplaceNonBreakingSpaces(true);
// 设置将一序列空格由单一空格替代
stringBean.setCollapse(true);
parser.visitAllNodesWith(stringBean);
plainText = stringBean.getStrings();
} catch (ParserException e) {
e.printStackTrace();
}
return plainText;
}
示例5: parseMessage
import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
* parses the body of the message, and returns a parsed representation
* See {@link http://htmlparser.sourceforge.net/} for details
* @param url the url that the message resulted from
* @param message the Message to parse
* @return a NodeList containing the various Nodes making up the page
*/
public Object parseMessage(HttpUrl url, Message message) {
String contentType = message.getHeader("Content-Type");
if (contentType == null || !contentType.matches("text/html.*")) {
return null;
}
byte[] content = message.getContent();
if (content == null || content.length == 0) {
return null;
}
Parser parser = Parser.createParser(new String(content), null);
try {
NodeList nodelist = parser.extractAllNodesThatMatch(new NodeFilter() {
public boolean accept(Node node) {
return true;
}
});
return nodelist;
} catch (ParserException pe) {
_logger.severe(pe.toString());
return null;
}
}
示例6: getGangliaAttribute
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public List<String> getGangliaAttribute(String clusterName)
throws ParserException, MalformedURLException, IOException {
String url = gangliaMetricUrl.replaceAll(clusterPattern, clusterName);
Parser parser = new Parser(new URL(url).openConnection());
NodeFilter nodeFilter = new AndFilter(new TagNameFilter("select"),
new HasAttributeFilter("id", "metrics-picker"));
NodeList nodeList = parser.extractAllNodesThatMatch(nodeFilter);
SimpleNodeIterator iterator = nodeList.elements();
List<String> metricList = new ArrayList<String>();
while (iterator.hasMoreNodes()) {
Node node = iterator.nextNode();
SimpleNodeIterator childIterator = node.getChildren().elements();
while (childIterator.hasMoreNodes()) {
OptionTag children = (OptionTag) childIterator.nextNode();
metricList.add(children.getOptionText());
}
}
return metricList;
}
示例7: splitHtml
import org.htmlparser.util.ParserException; //导入依赖的package包/类
private List<String> splitHtml() {
List<String> resultList = new ArrayList<String>();
try {
Parser parser = Parser.createParser(content, "UTF-8");
NodeList nodeList = parser.parse(null);
resultList = recusiveSplitHtml(nodeList);
StringBuffer lastPageContent = new StringBuffer();
for (TagNode tagNode : tagNodeList) {
if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
lastPageContent.append("<");
lastPageContent.append(tagNode.getText());
lastPageContent.append(">");
}
}
lastPageContent.append(content.substring(startPosition));
Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
NodeList pageContentNodeList = lastPageContentParser.parse(null);
resultList.add(pageContentNodeList.toHtml());
} catch (ParserException e) {
e.printStackTrace();
}
return resultList;
}
示例8: html2text
import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
* Converts an HTML document into plain text.
*
* @param html HTML document
* @return plain text or <code>null</code> if the conversion failed
*/
public static synchronized String html2text(String html) {
// convert HTML document
StringBean sb = new StringBean();
sb.setLinks(false); // no links
sb.setReplaceNonBreakingSpaces (true); // replace non-breaking spaces
sb.setCollapse(true); // replace sequences of whitespaces
Parser parser = new Parser();
try {
parser.setInputHTML(html);
parser.visitAllNodesWith(sb);
} catch (ParserException e) {
return null;
}
String docText = sb.getStrings();
if (docText == null) docText = ""; // no content
return docText;
}
示例9: file2text
import org.htmlparser.util.ParserException; //导入依赖的package包/类
/**
* Reads an HTML document from a file and converts it into plain text.
*
* @param filename name of file containing HTML documents
* @return plain text or <code>null</code> if the reading or conversion failed
*/
public static synchronized String file2text(String filename) {
// read from file and convert HTML document
StringBean sb = new StringBean();
sb.setLinks(false); // no links
sb.setReplaceNonBreakingSpaces (true); // replace non-breaking spaces
sb.setCollapse(true); // replace sequences of whitespaces
Parser parser = new Parser();
try {
parser.setResource(filename);
parser.visitAllNodesWith(sb);
} catch (ParserException e) {
return null;
}
String docText = sb.getStrings();
return docText;
}
示例10: nextNode
import org.htmlparser.util.ParserException; //导入依赖的package包/类
@Override
public Node nextNode() throws ParserException {
inJS = false;
inCSS = false;
if(cached != null) {
Node tmp = cached;
cached = null;
inJS = cachedJS;
inCSS = !cachedJS;
return tmp;
}
Node got = super.nextNode();
if(NodeUtils.isNonEmptyOpenTagNodeNamed(got, "SCRIPT")) {
cached = super.parseCDATA(true);
cachedJS = true;
} else if (NodeUtils.isNonEmptyOpenTagNodeNamed(got, "STYLE")) {
cached = super.parseCDATA(true);
cachedJS = false;
}
return got;
}
示例11: testInJS
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInJS() throws ParserException {
l = makeLexer("<script>foo bar baz</script>");
assertFalse(l.inCSS());
assertFalse(l.inJS());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "SCRIPT"));
n = l.nextNode();
assertFalse(l.inCSS());
assertTrue(l.inJS());
assertTrue(NodeUtils.isTextNode(n));
assertEquals("foo bar baz",((TextNode)n).getText());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT"));
}
示例12: testInCSS
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInCSS() throws ParserException {
l = makeLexer("<style>foo bar baz</style>");
assertFalse(l.inCSS());
assertFalse(l.inJS());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "STYLE"));
n = l.nextNode();
assertTrue(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isTextNode(n));
assertEquals("foo bar baz",((TextNode)n).getText());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isCloseTagNodeNamed(n, "STYLE"));
}
示例13: testInJSComment
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public void testInJSComment() throws ParserException {
// dumpParse("<script>//<!--\n foo bar baz\n //--></script>");
// dumpParse("<script><!-- foo bar baz --></script>");
// dumpParse("<script>//<!-- foo bar baz --></script>");
// dumpParse("<script><!-- foo bar baz //--></script>");
// dumpParse("<script>\n//<!-- foo bar baz\n //--></script>");
// dumpParse("<script> if(1 < 2) { foo(); } </script>");
// dumpParse("<script> if(1 <n) { foo(); } </script>");
// dumpParse("<script> document.write(\"<b>bold</b>\"); </script>");
// dumpParse("<script> document.write(\"<script>bold</script>\"); </script>");
// dumpParse("<script> <![CDATA[\n if(i<n) { foo() } // content of your Javascript goes here \n ]]> </script>");
assertJSContentWorks("//<!--\n foo bar baz\n //-->");
assertJSContentWorks("<!-- foo bar baz -->");
assertJSContentWorks("//<!-- foo bar baz -->");
assertJSContentWorks("<!-- foo bar baz //-->");
assertJSContentWorks("\n//<!-- foo bar baz\n //-->");
assertJSContentWorks("if(1 < 2) { foo(); } ");
assertJSContentWorks("if(1 <n) { foo(); } ");
assertJSContentWorks("document.write(\"<b>bold</b>\"); ");
assertJSContentWorks("document.write(\"<script>bold</script>\"); ");
assertJSContentWorks("<![CDATA[\n if(i<n) { foo() } // a comment \n ]]> ");
}
示例14: assertJSContentWorks
import org.htmlparser.util.ParserException; //导入依赖的package包/类
private void assertJSContentWorks(String js) throws ParserException {
String html = String.format("<script>%s</script>",js);
l = makeLexer(html);
assertFalse(l.inCSS());
assertFalse(l.inJS());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isNonEmptyOpenTagNodeNamed(n, "SCRIPT"));
n = l.nextNode();
assertFalse(l.inCSS());
assertTrue(l.inJS());
assertTrue(NodeUtils.isTextNode(n));
assertEquals(js,((TextNode)n).getText());
n = l.nextNode();
assertFalse(l.inCSS());
assertFalse(l.inJS());
assertTrue(NodeUtils.isCloseTagNodeNamed(n, "SCRIPT"));
}
示例15: getDepartmentByDistrict
import org.htmlparser.util.ParserException; //导入依赖的package包/类
public List<DepartmentInfo> getDepartmentByDistrict(final Table<String, String, String> districtTable) throws IOException, ParserException {
List<DepartmentInfo> departmentInfoList = new ArrayList<>();
for (Table.Cell<String, String, String> cell : districtTable.cellSet()) {
departmentInfoList.addAll(runDistrict(cell, true));
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
return departmentInfoList;
}