当前位置: 首页>>代码示例>>Java>>正文


Java ParserException.printStackTrace方法代码示例

本文整理汇总了Java中org.htmlparser.util.ParserException.printStackTrace方法的典型用法代码示例。如果您正苦于以下问题:Java ParserException.printStackTrace方法的具体用法?Java ParserException.printStackTrace怎么用?Java ParserException.printStackTrace使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.htmlparser.util.ParserException的用法示例。


在下文中一共展示了ParserException.printStackTrace方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parserUrl

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@Override
public NodeList parserUrl(Parser parser) {
	NodeFilter hrefNodeFilter = new NodeFilter() {
		@Override
		public boolean accept(Node node) {
			if (node.getText().startsWith("a href=")) {
				return true;
			} else {
				return false;
			}
		}
	};
	try {
		return parser.extractAllNodesThatMatch(hrefNodeFilter);
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return null;
}
 
开发者ID:PerkinsZhu,项目名称:WebSprider,代码行数:20,代码来源:HtmlParser01.java

示例2: getPlainText

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
public static String getPlainText(String htmlStr) {
    Parser parser = new Parser();
    String plainText = "";
    try {
        parser.setInputHTML(htmlStr);

        StringBean stringBean = new StringBean();
        // 设置不需要得到页面所包含的链接信息
        stringBean.setLinks(false);
        // 设置将不间断空格由正规空格所替代
        stringBean.setReplaceNonBreakingSpaces(true);
        // 设置将一序列空格由单一空格替代
        stringBean.setCollapse(true);

        parser.visitAllNodesWith(stringBean);
        plainText = stringBean.getStrings();

    } catch (ParserException e) {
        e.printStackTrace();
    }

    return plainText;
}
 
开发者ID:sercxtyf,项目名称:onboard,代码行数:24,代码来源:HtmlTextParser.java

示例3: splitHtml

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
private List<String> splitHtml() {
	List<String> resultList = new ArrayList<String>();
	try {
		Parser parser = Parser.createParser(content, "UTF-8");
		NodeList nodeList = parser.parse(null);
		resultList = recusiveSplitHtml(nodeList);
		StringBuffer lastPageContent = new StringBuffer();
		for (TagNode tagNode : tagNodeList) {
			if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
				lastPageContent.append("<");
				lastPageContent.append(tagNode.getText());
				lastPageContent.append(">");
			}
		}
		lastPageContent.append(content.substring(startPosition));
		Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
		NodeList pageContentNodeList = lastPageContentParser.parse(null);
		resultList.add(pageContentNodeList.toHtml());
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return resultList;
}
 
开发者ID:wangko27,项目名称:SelfSoftShop,代码行数:24,代码来源:Article.java

示例4: extractKeyWordText

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
public static void extractKeyWordText(String url, String keyword) {
	try {
		// 生成一个解析器对象,用网页的 url 作为参数
		Parser parser = new Parser(url);
		// 设置网页的编码,这里只是请求了一个 gb2312 编码网页
		parser.setEncoding("utf-8");// gb2312
		// 迭代所有节点, null 表示不使用 NodeFilter
		NodeList list = parser.parse(null);
		// 从初始的节点列表跌倒所有的节点
		processNodeList(list, keyword);
	} catch (ParserException e) {
		e.printStackTrace();
	}
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:15,代码来源:HtmlParserTest.java

示例5: getContentText

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@Transient
public String getContentText() {
	try {
		Parser parser = Parser.createParser(content, "UTF-8");
		TextExtractingVisitor textExtractingVisitor = new TextExtractingVisitor();
		parser.visitAllNodesWith(textExtractingVisitor);
		return textExtractingVisitor.getExtractedText();
	} catch (ParserException e) {
		e.printStackTrace();
		return null;
	}
}
 
开发者ID:wangko27,项目名称:SelfSoftShop,代码行数:13,代码来源:Article.java

示例6: processResponse

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
private boolean processResponse(HttpResponse resp, Document doc, Element root) {
	if(resp.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
		System.out.println("[INFO] HTTP Status OK.");
		System.out.println("[INFO] Extracting html page...");
		String html = extractHtml(resp);
		if(html == null) return false;
		System.out.println("[INFO] " + html.length() + "B html page extracted.");
		if(html.length() < 500) {
			System.out.println("[INFO] EOF reached, task completed.");
			return false;
		} else {
			System.out.println("[INFO] Parsing html page...");
			try {
				Parser parser = new Parser(html);
				NodeList weibo_list = parser.extractAllNodesThatMatch(
						new HasAttributeFilter("action-type", "feed_list_item"));
				System.out.println("[INFO] " + weibo_list.size() + " entries detected.");
				SimpleNodeIterator iter = weibo_list.elements();
				while(iter.hasMoreNodes()) {
					System.out.println("[INFO] processing entry #" + (++total) + "...");
					Element elem = extractContent(iter.nextNode(), doc);
					if(elem == null) {
						System.out.println("[ERROR] Data extraction failed.");
						return false;
					}
					root.appendChild(elem);
				}
				if(weibo_list.size() != 15) return false;
			} catch (ParserException e) {
				System.out.println("[ERROR] Parser failed.");
				e.printStackTrace();
				return false;
			}
		}
	} else {
		return false;
	}
	return true;
}
 
开发者ID:w1ndy,项目名称:weibo-fetcher,代码行数:40,代码来源:Spider.java

示例7: extracLinks

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
public static Set<String> extracLinks(String url, LinkFilter filter) {
	Set<String> links = new HashSet<String>();
	try{
		Parser parser = new Parser(url);
		parser.setEncoding("gb2312");
		// <frame >
		@SuppressWarnings("serial")
		NodeFilter frameFilter = new NodeFilter(){
			public boolean accept(Node node){
				if (node.getText().startsWith("frame src=")){
					return true;
				} 
				else{
					return false;
				}
			}
		};
		//<a><frame>
		OrFilter linkFilter = new OrFilter(new NodeClassFilter(
				LinkTag.class), frameFilter);
		NodeList list = parser.extractAllNodesThatMatch(linkFilter);
		for (int i = 0; i < list.size(); i++) {
			Node tag = list.elementAt(i);
			if (tag instanceof LinkTag){// <a>
				LinkTag link = (LinkTag) tag;
				String linkUrl = link.getLink();
				if (filter.accept(linkUrl))
					links.add(linkUrl);
			} 
			else{
				String frame = tag.getText();
				int start = frame.indexOf("src=");
				frame = frame.substring(start);
				int end = frame.indexOf(" ");
				if (end == -1)
					end = frame.indexOf(">");
				String frameUrl = frame.substring(5, end - 1);
				if (filter.accept(frameUrl))
					links.add(frameUrl);
			}
		}
		System.out.println(links);
	} catch (ParserException e){
		e.printStackTrace();
	}
	return links;
}
 
开发者ID:MelissaChen15,项目名称:Crawler2015,代码行数:48,代码来源:HtmlParserTool.java

示例8: extracLinks

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
public static void extracLinks(String url) {
	try {
		Parser parser = new Parser(url);
		parser.setEncoding("utf-8");// gb2312
		// 过滤 <frame> 标签的 filter,用来提取 frame 标签里的 src 属性所、表示的链接
		NodeFilter frameFilter = new NodeFilter() {
			public boolean accept(Node node) {
				if (node.getText().startsWith("frame src=")) {
					return true;
				} else {
					return false;
				}
			}
		};
		// OrFilter 来设置过滤 <a> 标签,<img> 标签和 <frame> 标签,三个标签是 or 的关系
		OrFilter orFilter = new OrFilter(
				new NodeClassFilter(LinkTag.class), new NodeClassFilter(
						ImageTag.class));
		OrFilter linkFilter = new OrFilter(orFilter, frameFilter);
		// 得到所有经过过滤的标签
		NodeList list = parser.extractAllNodesThatMatch(linkFilter);
		for (int i = 0; i < list.size(); i++) {
			Node tag = list.elementAt(i);
			if (tag instanceof LinkTag)// <a> 标签
			{
				LinkTag link = (LinkTag) tag;
				String linkUrl = link.getLink();// url
				String text = link.getLinkText();// 链接文字
				System.out.println(linkUrl + "**********" + text);
			} else if (tag instanceof ImageTag)// <img> 标签
			{
				ImageTag image = (ImageTag) list.elementAt(i);
				System.out.print(image.getImageURL() + "********");// 图片地址
				System.out.println(image.getText());// 图片文字
			} else// <frame> 标签
			{
				// 提取 frame 里 src 属性的链接如 <frame src="test.html"/>
				String frame = tag.getText();
				int start = frame.indexOf("src=");
				frame = frame.substring(start);
				int end = frame.indexOf(" ");
				if (end == -1)
					end = frame.indexOf(">");
				frame = frame.substring(5, end - 1);
				System.out.println(frame);
			}
		}
	} catch (ParserException e) {
		e.printStackTrace();
	}
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:52,代码来源:HtmlParserTest.java

示例9: extracLinks

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
/**
 * 获取一个网页上的链接,并加入到队列中
 * @param content
 * @param filter 用来过滤链接
 * @return Set<String>
 * @author cxn 2015年11月5日
 */
public static Set<String> extracLinks(String content, LinkFilter filter) {
	Set<String> links = new HashSet<String>();
	try {
		Parser parser = new Parser(content);
		// parser.setEncoding("utf-8");
		// 过滤 <frame >标签的 filter,用来提取 frame 标签里的 src 属性所表示的链接
		NodeFilter frameFilter = new NodeFilter() {
			public boolean accept(Node node) {
				if (node.getText().startsWith("frame src=")) {
					return true;
				} else {
					return false;
				}
			}
		};
		// OrFilter 来设置过滤 <a> 标签,和 <frame> 标签
		OrFilter linkFilter = new OrFilter(new NodeClassFilter(
				LinkTag.class), frameFilter);
		// 得到所有经过过滤的标签
		NodeList list = parser.extractAllNodesThatMatch(linkFilter);
		for (int i = 0; i < list.size(); i++) {
			Node tag = list.elementAt(i);
			if (tag instanceof LinkTag){
				LinkTag link = (LinkTag) tag;
				String linkUrl = link.getLink();
				if(linkUrl.startsWith("http") && filter.accept(linkUrl, Main.keyWord)){
					links.add(linkUrl);
				}else if(linkUrl.startsWith("/") && filter.accept(Main.baseUrl+linkUrl, Main.keyWord)){
					links.add(Main.baseUrl+linkUrl);
				}
			}else{
				// 提取 frame 里 src 属性的链接如 <frame src="test.html"/>
				String frame = tag.getText();
				int start = frame.indexOf("src=");
				frame = frame.substring(start);
				int end = frame.indexOf(" ");
				if (end == -1){
					end = frame.indexOf(">");
				}
				String frameUrl = frame.substring(5, end - 1);
				if (filter.accept(frameUrl)){
					links.add(frameUrl);
				}
			}
		}
	} catch (ParserException e) {
		e.printStackTrace();
	}
	LinkQueue.addUnvisitedUrl(links);
	return links;
}
 
开发者ID:cxn945,项目名称:my_crawler,代码行数:59,代码来源:PageParser.java

示例10: list

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "sdlist", results = { @Result(type = "json", params = {
		"root", "list" }) })
public String list() {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey =domain+listid + page;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(domain+"/"+listid+"/list"
				+ page+".htm");
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new AttributeRegexFilter(
							"href", ".*/page\\.htm"));
			SimpleNodeIterator i = ls.elements();
			while (i.hasMoreNodes()) {
				Node n = i.nextNode();
				if (n instanceof TagNode) {
					TagNode tn = (TagNode) n;
					News news = new News();
					String href = tn.getAttribute("href");						
					news.setId(href);
					news.setTitle(tn.getAttribute("alt"));
					Node tmp=tn.getParent().getNextSibling();
					while(tmp!=null &&!(tmp instanceof TableColumn))
						tmp=tmp.getNextSibling();
					if(tmp!=null)
						news.setPubdate(tmp.toPlainTextString());
					list.add(news);
				}
			}
			c.put(new Element(ckey, list));
		} catch (ParserException e) {

			e.printStackTrace();
		}
	}
	jsonp(list);
	return NONE;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:46,代码来源:SudyPageAction.java

示例11: list

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "newslist", results = { @Result(type = "json", params = {
		"root", "list" }) })
public String list() {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "newslist"+listid + page;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(RD+"/news/"+listid+"/"+page+".html");
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new HasAttributeFilter("class","date"));
			SimpleNodeIterator i = ls.elements();
			while (i.hasMoreNodes()) {
				Node n = i.nextNode();
				if (n instanceof TagNode) {
					TagNode tn = (TagNode) n;
					News news = new News();
					news.setPubdate(tn.toPlainTextString());
					Node tmp=tn.getNextSibling();
					while(tmp!=null &&!(tmp instanceof LinkTag))
						tmp=tmp.getNextSibling();
					if(tmp!=null)
					{
						LinkTag link=(LinkTag)tmp;
						news.setId(link.getAttribute("href"));
						news.setTitle(link.getAttribute("title"));
					}
					list.add(news);
				}
			}
			c.put(new Element(ckey, list));
		} catch (ParserException e) {

			e.printStackTrace();
		}
	}

	return SUCCESS;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:46,代码来源:CampusNewsAction.java

示例12: list

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Action(value = "eventlist")
public String list() throws IOException {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "eventlist"+page ;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(RD+"/calendar/?a=list&&m=recent&range=30&_="+System.currentTimeMillis()+"&type=0&place=0&type="+page	);
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new HasAttributeFilter("class","clear"));
			if(ls.size()==2)
			{
				int tk1=ls.elementAt(0).getEndPosition();
				int tk2=ls.elementAt(1).getStartPosition();
				ServletActionContext.getResponse().setCharacterEncoding("utf-8");
				p=Parser.createParser(retstr.substring(tk1+6, tk2), "utf-8");
				NodeList nl=p.parse(null);
				NodeList links=nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class),true);
				SimpleNodeIterator i=links.elements();
				while(i.hasMoreNodes())
				{
					LinkTag lt=(LinkTag)i.nextNode();
					NodeList ll=new NodeList();
					ll.add(new TextNode(lt.getAttribute("title")));
					lt.setChildren(ll);
					lt.removeAttribute("title");
				}
				
				
				ServletActionContext.getResponse().getWriter().print(nl.toHtml());
			}
		} catch (ParserException e) {
			e.printStackTrace();
		}
	}

	return NONE;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:45,代码来源:CampusEventAction.java

示例13: content

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@Action(value = "eventcontent", results = { @Result(type = "json", params = {
		"root", "en" }) })
public String content() {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "eventcontent" + newsid;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		en = (News) ele.getObjectValue();
	} else {
		StringBuffer retstr = fetch(RD+"/calendar/?a=one&evid="
				+ newsid+"&_="+System.currentTimeMillis());
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		try {
			NodeList nl = p.extractAllNodesThatMatch(new OrFilter(
					new TagNameFilter("h1"), new TagNameFilter("table")));
			SimpleNodeIterator i = nl.elements();
			en = new News();
			en.setId(newsid);
			while (i.hasMoreNodes()) {
				Node n = i.nextNode();
				if (n instanceof TagNode) {
					TagNode tn = (TagNode) n;
					if (tn.getTagName().equalsIgnoreCase("h1"))
						en.setTitle(tn.toPlainTextString());
					if (tn.getTagName().equalsIgnoreCase("table")) {
					en.setContent(tn.toHtml());							
						 
						}
						
					}

				}
			 String str=retstr.toString().trim();
			 int tk=retstr.indexOf("imageurl");
			 if(tk>0)
			 {
				 tk=retstr.indexOf("'",tk);
				 int tk1=retstr.indexOf("'", tk+1);
				 
			  String imgurl=RD+str.substring(tk+1,tk1);
				String imgid = EncodeHelper.digest(
						imgurl, "MD5");
				BasicDBObject obj = new BasicDBObject("id",
						imgid);
				DBCollection col = MongoUtil.getInstance().getDB()
						.getCollection("CrawlerImages");							
				DBObject dbo = col.findOne(obj);
				if (dbo == null)
					col.save(obj.append("url",imgurl));
				en.setPubdate(imgid);	
			 }
		} catch (ParserException e) {

			e.printStackTrace();
		}
		if (!CommonUtil.isEmpty(en) && !CommonUtil.isEmpty(en.getContent()))
			c.put(new Element(ckey, en));
	}
	return SUCCESS;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:61,代码来源:CampusEventAction.java

示例14: list

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@SuppressWarnings({ "rawtypes", "unchecked" })
@Action(value = "calist", results = { @Result(type = "json", params = {
		"root", "list" }) })
public String list() {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "calist" + page;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(RD+"/announce/announce_list.php?page="
				+ page);
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new AttributeRegexFilter(
							"href", "announce/\\?announceid=\\d+"));
			SimpleNodeIterator i = ls.elements();
			while (i.hasMoreNodes()) {
				Node n = i.nextNode();
				if (n instanceof TagNode) {
					TagNode tn = (TagNode) n;
					News news = new News();
					String href = tn.getAttribute("href");
					int tk = href.indexOf("=");
					if (tk > 0)
						news.setId(href.substring(tk + 1));
					news.setTitle(tn.toPlainTextString());
					list.add(news);
				}
			}
			c.put(new Element(ckey, list));
		} catch (ParserException e) {

			e.printStackTrace();
		}
	}

	return SUCCESS;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:43,代码来源:CampusAnnouceAction.java

示例15: list

import org.htmlparser.util.ParserException; //导入方法依赖的package包/类
@SuppressWarnings({ "unchecked", "rawtypes" })
@Action(value = "liblist", results = { @Result(type = "json", params = {
		"root", "list" }) })
public String list() {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "liblist" + page;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(RD
				+ "/ddlib/getPublishInfoList.shtml?tid=1012&k=&p="
				+ (page - 1));
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new AttributeRegexFilter(
							"href", "publishInfo\\.shtml\\?.+"));
			SimpleNodeIterator i = ls.elements();
			while (i.hasMoreNodes()) {
				Node n = i.nextNode();
				if (n instanceof TagNode) {
					TagNode tn = (TagNode) n;
					News news = new News();
					String href = tn.getAttribute("href");
					news.setId(href);
					news.setTitle(tn.toPlainTextString());
					Node tmp = tn.getNextSibling();
					if (tmp != null && tmp instanceof TextNode) {
						if (tmp.getText() != null)
							news.setPubdate(tmp.getText().replaceAll(
									"&nbsp;", ""));
					}
					list.add(news);
				}
			}
			c.put(new Element(ckey, list));
		} catch (ParserException e) {

			e.printStackTrace();
		}
	}

	return SUCCESS;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:48,代码来源:LibAnnouceAction.java


注:本文中的org.htmlparser.util.ParserException.printStackTrace方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。