当前位置: 首页>>代码示例>>Java>>正文


Java Lexer类代码示例

本文整理汇总了Java中org.htmlparser.lexer.Lexer的典型用法代码示例。如果您正苦于以下问题:Java Lexer类的具体用法?Java Lexer怎么用?Java Lexer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Lexer类属于org.htmlparser.lexer包,在下文中一共展示了Lexer类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: html2Text

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
public static String html2Text(String html, int len) {
	try {
		Lexer lexer = new Lexer(html);
		Node node;
		StringBuilder sb = new StringBuilder(html.length());
		while ((node = lexer.nextNode()) != null) {
			if (node instanceof TextNode) {
				sb.append(node.toHtml());
			}
			if (sb.length() > len) {
				break;
			}
		}
		return sb.toString();
	} catch (ParserException e) {
		throw new RuntimeException(e);
	}
}
 
开发者ID:huanzhou,项目名称:jeecms6,代码行数:19,代码来源:StrUtils.java

示例2: ensureAllAttributesAreSafe

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
 * Given an input, analyze each HTML tag and remove unsecure attributes from
 * them.
 * 
 * @param contents
 *            The content to verify
 * @return the content, secure.
 */
public String ensureAllAttributesAreSafe(String contents) {
	StringBuffer sb = new StringBuffer(contents.length());

	try {
		Lexer lexer = new Lexer(contents);
		Node node;

		while ((node = lexer.nextNode()) != null) {
			if (node instanceof Tag) {
				Tag tag = (Tag) node;

				this.checkAndValidateAttributes(tag, false);

				sb.append(tag.toHtml());
			} else {
				sb.append(node.toHtml());
			}
		}
	} catch (Exception e) {
		throw new RuntimeException("Problems while parsing HTML", e);
	}

	return sb.toString();
}
 
开发者ID:8090boy,项目名称:gomall.la,代码行数:33,代码来源:SafeHtml.java

示例3: attachKeyword

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
@Transactional(readOnly = true)
public String attachKeyword(Integer siteId, String txt) {
	if (StringUtils.isBlank(txt)) {
		return txt;
	}
	List<CmsKeyword> list = getListBySiteId(siteId, true, true);
	int len = list.size();
	if (len <= 0) {
		return txt;
	}
	String[] searchArr = new String[len];
	String[] replacementArr = new String[len];
	int i = 0;
	for (CmsKeyword k : list) {
		searchArr[i] = k.getName();
		replacementArr[i] = k.getUrl();
		i++;
	}
	try {
		Lexer lexer = new Lexer(txt);
		Node node;
		StringBuilder sb = new StringBuilder((int) (txt.length() * 1.2));
		while ((node = lexer.nextNode()) != null) {
			if (node instanceof TextNode) {
				sb.append(StringUtils.replaceEach(node.toHtml(), searchArr,
						replacementArr));
			} else {
				sb.append(node.toHtml());
			}
		}
		return sb.toString();
	} catch (ParserException e) {
		throw new RuntimeException(e);
	}
}
 
开发者ID:huanzhou,项目名称:jeecms6,代码行数:36,代码来源:CmsKeywordMngImpl.java

示例4: ensureAllAttributesAreSafe

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
 * Given an input, analyze each HTML tag and remove unsecure attributes from them.
 *
 * @param contents The content to verify
 * @return the content, secure.
 */
public String ensureAllAttributesAreSafe(String contents) {
	StringBuilder sb = new StringBuilder(contents.length());

	try {
		Lexer lexer = new Lexer(contents);
		Node node;

		while ((node = lexer.nextNode()) != null) {
			if (node instanceof Tag) {
				Tag tag = (Tag) node;

				this.checkAndValidateAttributes(tag, false);

				sb.append(tag.toHtml());
			}
			else {
				sb.append(node.toHtml());
			}
		}
	}
	catch (Exception e) {
		throw new ForumException("Problems while parsing HTML: " + e, e);
	}

	return sb.toString();
}
 
开发者ID:eclipse123,项目名称:JForum,代码行数:33,代码来源:SafeHtml.java

示例5: getHtmlRoot

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
 * Get HTML root element as node list
 * 
 * @param html
 * @return
 */
public static NodeList getHtmlRoot(String html) {
	Parser parser = new Parser(new Lexer(html));
	try {
		parser.setEncoding("UTF-8");
		return parser.parse(null);
	} catch (ParserException e) {
		System.err.println(e.getMessage());
		return null;
	}
}
 
开发者ID:javajoker,项目名称:infoecos,代码行数:17,代码来源:HtmlUtil.java

示例6: parserNode

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
 * 对结点进行词法分析
 * @param node 所要分析的结点
 */
private void parserNode(Node node) {
	depth ++;
	String regex = "[ \b\t\n\f\r]*";
	if(node instanceof TextNode) {	// 若为文本结点,则进行分词
		if(depth == 1) {
			System.out.println("TextNode!");
			Lexer lexer = new Lexer(node.getPage());
			Parser parser = new Parser(lexer, Parser.STDOUT);
			//TODO filter script & style
			OrFilter it = new OrFilter(new NotFilter(new TagNameFilter("script ")), new NotFilter(new TagNameFilter("style ")));

			try {
				NodeList nl = parser.extractAllNodesThatMatch(it);
				NodeIterator nit = nl.elements();
				while(nit.hasMoreNodes()) {
					Node n = nit.nextNode();
					if(n instanceof TextNode) {
						if(!(n.getText().matches(regex))) {	// 用正则表达式进行匹配,对非空的文本进行分词
							segment(n.getText());	// 对网页中的文本进行分词
						}
					}
				}
			}
			catch(ParserException exc) {
				System.out.println("ParserException");
				//exc.printStackTrace();
			}
		}
	}
	else if(node instanceof TagNode) {	// 若为链接结点,则扩展外链
		if(node instanceof LinkTag) {
			LinkTag tag = (LinkTag)node;
			if(!(tag.getLink().matches(regex))) {	
				urlInfo.addExtendedURL(tag.getLink());	// 将得到的外链加入到urlInfo中
			}
		}			
		dealTag(node);
	}
	depth --;
}
 
开发者ID:uraplutonium,项目名称:hadoop-distributed-crawler,代码行数:45,代码来源:URLAnalyzer.java

示例7: parseTheEmbeddedObject

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
 * Parses the embedded object, creates the Flash embedded object out of it,
 * if possible, then serializes it into string and returns the string.
 * If the object could not be parseed or it turnes out to be a non Flash
 * embedded object, then an exception is thrown
 * @param textToParse the text to parse
 * @return the string with the filtered, verified and completed embedded
 *         Flash animation embedding code. Creates Flash with the sameDomain
 *         security level.
 * @throws MessageException if the provided HTML code is broken or the animation was detected to be not a flash movie
 */
private String parseTheEmbeddedObject( final String textToParse ) throws MessageException {
	String result = "";
	try{
		logger.debug("Trying to parse the found message-embedded object: " + textToParse );
		Parser parser = new Parser( new Lexer( textToParse ) );
		NodeList nodes = parser.parse( null );
		//Process the nodes in the result
		NodeList objects = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.OBJECT_TAG_NAME ) );
		/* Create Flash with the never security level, to prevent Flash injection,
		   the user can have a url pointing to XCure itself but not an external
		   flash with the getURL exevuting malicius JavaScript that, e.g. reads
		   the user's session coockies */
		FlashEmbeddedObject flashObject = new FlashEmbeddedObject( xcureDomainPattern );
		if( (objects.size() <= 2 ) && ( objects.size() > 0 ) ) {
			//If there are OBJECT tags then parse them
			parseFlashObjectTag( objects, flashObject );
		} else {
			//If there are no OBJECT tags then parse the EMBED tags
			NodeList embeds = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.EMBED_TAG_NAME ) );
			if( embeds.size() <= 2 ) {
				//There should not be more than two EMBED tags because one is the open and another is the close tags
				parseFlashEmbedTag( embeds, flashObject );
			} else {
				logger.error("An improper number of the object (" + objects.size() +
							 ") and embed (" + embeds.size() + ") tags in the string: " + textToParse);
				throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
			}
		}
		//Validate the obtained flash object
		if( flashObject.isValidEmbedFlash() ) {
			//Complete the flash object
			flashObject.completeEmbedFlash();
			//Serialize the object into String
			result = flashObject.toString();
		} else {
			logger.error( "The parsed embedded object '" + textToParse +
						  "' was not recognized as a valid flash animation, we got:" + flashObject.toString() );
			throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
		}
	} catch( Exception e ) {
		logger.error("Unable to parse the embedded object from the user's message: " + textToParse, e);
		throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
	}
	return result;
}
 
开发者ID:ivan-zapreev,项目名称:x-cure-chat,代码行数:57,代码来源:FlashEmbeddedParser.java

示例8: createParser

import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
public static Parser createParser(String inputHTML) {
    Lexer mLexer = new Lexer(new Page(inputHTML));
    return new Parser(mLexer, (ParserFeedback) new DefaultParserFeedback(DefaultParserFeedback.QUIET));
}
 
开发者ID:toulezu,项目名称:play,代码行数:5,代码来源:CommonHelper.java


注:本文中的org.htmlparser.lexer.Lexer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。