当前位置: 首页>>代码示例>>Java>>正文


Java Parser.parse方法代码示例

本文整理汇总了Java中org.htmlparser.Parser.parse方法的典型用法代码示例。如果您正苦于以下问题:Java Parser.parse方法的具体用法?Java Parser.parse怎么用?Java Parser.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.htmlparser.Parser的用法示例。


在下文中一共展示了Parser.parse方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: splitHtml

import org.htmlparser.Parser; //导入方法依赖的package包/类
private List<String> splitHtml() {
	List<String> resultList = new ArrayList<String>();
	try {
		Parser parser = Parser.createParser(content, "UTF-8");
		NodeList nodeList = parser.parse(null);
		resultList = recusiveSplitHtml(nodeList);
		StringBuffer lastPageContent = new StringBuffer();
		for (TagNode tagNode : tagNodeList) {
			if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
				lastPageContent.append("<");
				lastPageContent.append(tagNode.getText());
				lastPageContent.append(">");
			}
		}
		lastPageContent.append(content.substring(startPosition));
		Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
		NodeList pageContentNodeList = lastPageContentParser.parse(null);
		resultList.add(pageContentNodeList.toHtml());
	} catch (ParserException e) {
		e.printStackTrace();
	}
	return resultList;
}
 
开发者ID:wangko27,项目名称:SelfSoftShop,代码行数:24,代码来源:Article.java

示例2: extractKeyWordText

import org.htmlparser.Parser; //导入方法依赖的package包/类
public static void extractKeyWordText(String url, String keyword) {
	try {
		// 生成一个解析器对象,用网页的 url 作为参数
		Parser parser = new Parser(url);
		// 设置网页的编码,这里只是请求了一个 gb2312 编码网页
		parser.setEncoding("utf-8");// gb2312
		// 迭代所有节点, null 表示不使用 NodeFilter
		NodeList list = parser.parse(null);
		// 从初始的节点列表跌倒所有的节点
		processNodeList(list, keyword);
	} catch (ParserException e) {
		e.printStackTrace();
	}
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:15,代码来源:HtmlParserTest.java

示例3: readTextAndLinkAndTitle

import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
 * 分别读纯文本和链接.
 * @param result 网页的内容
 * @throws Exception
 */
public static void readTextAndLinkAndTitle(String result) throws Exception {
    Parser parser;
    NodeList nodelist;
    parser = Parser.createParser(result, "utf8");

    NodeFilter textFilter = new NodeClassFilter(TextNode.class);
    NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
    NodeFilter titleFilter = new NodeClassFilter(TitleTag.class);
    OrFilter lastFilter = new OrFilter();
    lastFilter.setPredicates(new NodeFilter[] { textFilter, linkFilter, titleFilter });
    nodelist = parser.parse(lastFilter);
    Node[] nodes = nodelist.toNodeArray();
    String line = "";
    
    for (int i = 0; i < nodes.length; i++) {
        Node node = nodes[i];
        if (node instanceof TextNode) {
            TextNode textnode = (TextNode) node;
            line = textnode.getText();
        } else if (node instanceof LinkTag) {
            LinkTag link = (LinkTag) node;
            line = link.getLink();
        } else if (node instanceof TitleTag) {
            TitleTag titlenode = (TitleTag) node;
            line = titlenode.getTitle();
        }
        
        if (isTrimEmpty(line))
            continue;
        System.out.println(line);
    }
}
 
开发者ID:YufangWoo,项目名称:news-crawler,代码行数:38,代码来源:HtmlParserTest.java

示例4: main

import org.htmlparser.Parser; //导入方法依赖的package包/类
public static void main(String[] args) {
	try {
		URL url = new URL(pro.getProperty("mlink"));
		SocketAddress address = new InetSocketAddress(pro.getProperty("host"), Integer.parseInt(pro.getProperty("port")));
		Proxy proxy = new Proxy(Proxy.Type.HTTP, address);
		URLConnection conn = url.openConnection(proxy);
		Authenticator.setDefault(new MyAuthenticator(pro.getProperty("username"), pro.getProperty("password")));
		
		conn.setConnectTimeout(Integer.parseInt(pro.getProperty("timeout")));
		Parser parser = new Parser(conn);
		
		NodeList nodeList = parser.parse(new TagNameFilter("A")); 
		System.out.println(nodeList.size());
		
		for (SimpleNodeIterator it = nodeList.elements(); it.hasMoreNodes(); ) {
			TagNode node = (TagNode) it.nextNode();
			String href = node.getAttribute("href");
			String dhref = URLDecoder.decode(href, "UTF-8");
			if (CommonHelper.checkIsAlink(dhref)) {
				System.out.println(dhref);	
			}
			
		}
		
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
开发者ID:toulezu,项目名称:play,代码行数:29,代码来源:TestParser.java

示例5: processHTMLBodyWithBASE64Images

import org.htmlparser.Parser; //导入方法依赖的package包/类
private void processHTMLBodyWithBASE64Images(MimeMultipart multipart) throws ParserException,
        MessagingException, NoSuchAlgorithmException, SMIMEException, java.security.NoSuchProviderException {
    if (null != body && body.contains("base64")) {
        Parser parser = new Parser(body);
        NodeList nodeList = parser.parse(null);
        HtmlImageNodeVisitor htmlImageNodeVisitor = new HtmlImageNodeVisitor();
        nodeList.visitAllNodesWith(htmlImageNodeVisitor);
        body = nodeList.toHtml();

        addAllBase64ImagesToMimeMultipart(multipart, htmlImageNodeVisitor.getBase64Images());
    }
}
 
开发者ID:CloudSlang,项目名称:cs-actions,代码行数:13,代码来源:SendMail.java

示例6: getHtmlRoot

import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
 * Get HTML root element as node list
 * 
 * @param html
 * @return
 */
public static NodeList getHtmlRoot(String html) {
	Parser parser = new Parser(new Lexer(html));
	try {
		parser.setEncoding("UTF-8");
		return parser.parse(null);
	} catch (ParserException e) {
		System.err.println(e.getMessage());
		return null;
	}
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:17,代码来源:HtmlUtil.java

示例7: parseTheEmbeddedObject

import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
 * Parses the embedded object, creates the Flash embedded object out of it,
 * if possible, then serializes it into string and returns the string.
 * If the object could not be parseed or it turnes out to be a non Flash
 * embedded object, then an exception is thrown
 * @param textToParse the text to parse
 * @return the string with the filtered, verified and completed embedded
 *         Flash animation embedding code. Creates Flash with the sameDomain
 *         security level.
 * @throws MessageException if the provided HTML code is broken or the animation was detected to be not a flash movie
 */
private String parseTheEmbeddedObject( final String textToParse ) throws MessageException {
	String result = "";
	try{
		logger.debug("Trying to parse the found message-embedded object: " + textToParse );
		Parser parser = new Parser( new Lexer( textToParse ) );
		NodeList nodes = parser.parse( null );
		//Process the nodes in the result
		NodeList objects = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.OBJECT_TAG_NAME ) );
		/* Create Flash with the never security level, to prevent Flash injection,
		   the user can have a url pointing to XCure itself but not an external
		   flash with the getURL exevuting malicius JavaScript that, e.g. reads
		   the user's session coockies */
		FlashEmbeddedObject flashObject = new FlashEmbeddedObject( xcureDomainPattern );
		if( (objects.size() <= 2 ) && ( objects.size() > 0 ) ) {
			//If there are OBJECT tags then parse them
			parseFlashObjectTag( objects, flashObject );
		} else {
			//If there are no OBJECT tags then parse the EMBED tags
			NodeList embeds = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.EMBED_TAG_NAME ) );
			if( embeds.size() <= 2 ) {
				//There should not be more than two EMBED tags because one is the open and another is the close tags
				parseFlashEmbedTag( embeds, flashObject );
			} else {
				logger.error("An improper number of the object (" + objects.size() +
							 ") and embed (" + embeds.size() + ") tags in the string: " + textToParse);
				throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
			}
		}
		//Validate the obtained flash object
		if( flashObject.isValidEmbedFlash() ) {
			//Complete the flash object
			flashObject.completeEmbedFlash();
			//Serialize the object into String
			result = flashObject.toString();
		} else {
			logger.error( "The parsed embedded object '" + textToParse +
						  "' was not recognized as a valid flash animation, we got:" + flashObject.toString() );
			throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
		}
	} catch( Exception e ) {
		logger.error("Unable to parse the embedded object from the user's message: " + textToParse, e);
		throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
	}
	return result;
}
 
开发者ID:ivan-zapreev,项目名称:x-cure-chat,代码行数:57,代码来源:FlashEmbeddedParser.java

示例8: filter

import org.htmlparser.Parser; //导入方法依赖的package包/类
private AX2JClassTranslator filter(String content) {
    try {
        Parser parser = Parser.createParser(content, Config.ENCODE);
        AndFilter andFilter1 =
                new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class","alt-color api apilevel-"));
        AndFilter andFilter2 =
                //kill me, the " api apilevel-" has a space at the start
                new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class"," api apilevel-"));
        OrFilter orFilter = new OrFilter(andFilter1, andFilter2);
        NodeList tableNodeList = parser.parse(orFilter);
        NodeIterator tableIt = tableNodeList.elements();

        AX2JClassTranslator map = new AX2JClassTranslator(type);
        while(tableIt.hasMoreNodes()) {
            Node trNode = tableIt.nextNode();
            NodeList trNodeList = trNode.getChildren();
            /**
             * ***** trNodeList example *****
             *    Txt (268[6,37],269[7,0]): \nTag (269[7,0],292[7,23]): td class="jd-linkcol"
             *      Tag (292[7,23],381[7,112]): a href="../../../reference/android/view/View.html...
             *        Txt (381[7,112],412[7,143]): android:accessibilityLiveRegion
             *        End (412[7,143],416[7,147]): /a
             *      End (416[7,147],421[7,152]): /td
             *    Txt (421[7,152],422[8,0]): \nTag (422[8,0],445[8,23]): td class="jd-linkcol"
             *      Txt (445[8,23],446[9,0]): \n
             *      Tag (446[9,0],530[9,84]): a href="../../../reference/android/view/View.html#s...
             *        Txt (530[9,84],561[9,115]): setAccessibilityLiveRegion(int)
             *        End (561[9,115],565[9,119]): /a
             *      Txt (565[9,119],566[10,0]): \n
             *      End (566[10,0],571[10,5]): /td
             *    Txt (571[10,5],572[11,0]): \nTag (572[11,0],609[11,37]): td class="jd-descrcol" width="100%"
             *      Txt (609[11,37],712[14,0]): \nIndicates to accessibility services whether the...
             *      End (712[14,0],717[14,5]): /td
             *    Txt (717[14,5],718[15,0]): \n
             * ***** trNodeList example *****
             */
            if (trNodeList.size() != 7) {
                throw new AndroidDocException(AndroidDocException.ATM_FORMAT_ERROR);
            }

            String attr = trNodeList.elementAt(1).toPlainTextString();
            attr = attr.replace("\n", "");
            String method = trNodeList.elementAt(3).toPlainTextString();
            map.add(attr, method);
        }
        return map;
    } catch (ParserException e) {
        throw new AndroidDocException(AndroidDocException.AXML_FORMAT_ERROR);
    }
}
 
开发者ID:sickworm,项目名称:AndroidXMLToJava,代码行数:51,代码来源:Filter2014.java

示例9: list

import org.htmlparser.Parser; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Action(value = "eventlist")
public String list() throws IOException {
	Cache c = CacheManager.getInstance().getCache("News");
	String ckey = "eventlist"+page ;
	Element ele = c.get(ckey);
	if (!CommonUtil.isEmpty(ele)) {
		list = (List) ele.getObjectValue();

	} else {
		StringBuffer retstr = fetch(RD+"/calendar/?a=list&&m=recent&range=30&_="+System.currentTimeMillis()+"&type=0&place=0&type="+page	);
		Parser p = Parser.createParser(retstr.toString(), "utf-8");
		list = new ArrayList<News>();
		try {
			NodeList ls = p
					.extractAllNodesThatMatch(new HasAttributeFilter("class","clear"));
			if(ls.size()==2)
			{
				int tk1=ls.elementAt(0).getEndPosition();
				int tk2=ls.elementAt(1).getStartPosition();
				ServletActionContext.getResponse().setCharacterEncoding("utf-8");
				p=Parser.createParser(retstr.substring(tk1+6, tk2), "utf-8");
				NodeList nl=p.parse(null);
				NodeList links=nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class),true);
				SimpleNodeIterator i=links.elements();
				while(i.hasMoreNodes())
				{
					LinkTag lt=(LinkTag)i.nextNode();
					NodeList ll=new NodeList();
					ll.add(new TextNode(lt.getAttribute("title")));
					lt.setChildren(ll);
					lt.removeAttribute("title");
				}
				
				
				ServletActionContext.getResponse().getWriter().print(nl.toHtml());
			}
		} catch (ParserException e) {
			e.printStackTrace();
		}
	}

	return NONE;
}
 
开发者ID:BaixiangLiu,项目名称:fudanweixin,代码行数:45,代码来源:CampusEventAction.java


注:本文中的org.htmlparser.Parser.parse方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。