本文整理汇总了Java中org.htmlparser.lexer.Lexer类的典型用法代码示例。如果您正苦于以下问题:Java Lexer类的具体用法?Java Lexer怎么用?Java Lexer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Lexer类属于org.htmlparser.lexer包,在下文中一共展示了Lexer类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: html2Text
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
public static String html2Text(String html, int len) {
try {
Lexer lexer = new Lexer(html);
Node node;
StringBuilder sb = new StringBuilder(html.length());
while ((node = lexer.nextNode()) != null) {
if (node instanceof TextNode) {
sb.append(node.toHtml());
}
if (sb.length() > len) {
break;
}
}
return sb.toString();
} catch (ParserException e) {
throw new RuntimeException(e);
}
}
示例2: ensureAllAttributesAreSafe
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
* Given an input, analyze each HTML tag and remove unsecure attributes from
* them.
*
* @param contents
* The content to verify
* @return the content, secure.
*/
public String ensureAllAttributesAreSafe(String contents) {
StringBuffer sb = new StringBuffer(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
this.checkAndValidateAttributes(tag, false);
sb.append(tag.toHtml());
} else {
sb.append(node.toHtml());
}
}
} catch (Exception e) {
throw new RuntimeException("Problems while parsing HTML", e);
}
return sb.toString();
}
示例3: attachKeyword
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
@Transactional(readOnly = true)
public String attachKeyword(Integer siteId, String txt) {
if (StringUtils.isBlank(txt)) {
return txt;
}
List<CmsKeyword> list = getListBySiteId(siteId, true, true);
int len = list.size();
if (len <= 0) {
return txt;
}
String[] searchArr = new String[len];
String[] replacementArr = new String[len];
int i = 0;
for (CmsKeyword k : list) {
searchArr[i] = k.getName();
replacementArr[i] = k.getUrl();
i++;
}
try {
Lexer lexer = new Lexer(txt);
Node node;
StringBuilder sb = new StringBuilder((int) (txt.length() * 1.2));
while ((node = lexer.nextNode()) != null) {
if (node instanceof TextNode) {
sb.append(StringUtils.replaceEach(node.toHtml(), searchArr,
replacementArr));
} else {
sb.append(node.toHtml());
}
}
return sb.toString();
} catch (ParserException e) {
throw new RuntimeException(e);
}
}
示例4: ensureAllAttributesAreSafe
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
* Given an input, analyze each HTML tag and remove unsecure attributes from them.
*
* @param contents The content to verify
* @return the content, secure.
*/
public String ensureAllAttributesAreSafe(String contents) {
StringBuilder sb = new StringBuilder(contents.length());
try {
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
if (node instanceof Tag) {
Tag tag = (Tag) node;
this.checkAndValidateAttributes(tag, false);
sb.append(tag.toHtml());
}
else {
sb.append(node.toHtml());
}
}
}
catch (Exception e) {
throw new ForumException("Problems while parsing HTML: " + e, e);
}
return sb.toString();
}
示例5: getHtmlRoot
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
* Get HTML root element as node list
*
* @param html
* @return
*/
public static NodeList getHtmlRoot(String html) {
Parser parser = new Parser(new Lexer(html));
try {
parser.setEncoding("UTF-8");
return parser.parse(null);
} catch (ParserException e) {
System.err.println(e.getMessage());
return null;
}
}
示例6: parserNode
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
* 对结点进行词法分析
* @param node 所要分析的结点
*/
private void parserNode(Node node) {
depth ++;
String regex = "[ \b\t\n\f\r]*";
if(node instanceof TextNode) { // 若为文本结点,则进行分词
if(depth == 1) {
System.out.println("TextNode!");
Lexer lexer = new Lexer(node.getPage());
Parser parser = new Parser(lexer, Parser.STDOUT);
//TODO filter script & style
OrFilter it = new OrFilter(new NotFilter(new TagNameFilter("script ")), new NotFilter(new TagNameFilter("style ")));
try {
NodeList nl = parser.extractAllNodesThatMatch(it);
NodeIterator nit = nl.elements();
while(nit.hasMoreNodes()) {
Node n = nit.nextNode();
if(n instanceof TextNode) {
if(!(n.getText().matches(regex))) { // 用正则表达式进行匹配,对非空的文本进行分词
segment(n.getText()); // 对网页中的文本进行分词
}
}
}
}
catch(ParserException exc) {
System.out.println("ParserException");
//exc.printStackTrace();
}
}
}
else if(node instanceof TagNode) { // 若为链接结点,则扩展外链
if(node instanceof LinkTag) {
LinkTag tag = (LinkTag)node;
if(!(tag.getLink().matches(regex))) {
urlInfo.addExtendedURL(tag.getLink()); // 将得到的外链加入到urlInfo中
}
}
dealTag(node);
}
depth --;
}
示例7: parseTheEmbeddedObject
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
/**
* Parses the embedded object, creates the Flash embedded object out of it,
* if possible, then serializes it into string and returns the string.
* If the object could not be parseed or it turnes out to be a non Flash
* embedded object, then an exception is thrown
* @param textToParse the text to parse
* @return the string with the filtered, verified and completed embedded
* Flash animation embedding code. Creates Flash with the sameDomain
* security level.
* @throws MessageException if the provided HTML code is broken or the animation was detected to be not a flash movie
*/
private String parseTheEmbeddedObject( final String textToParse ) throws MessageException {
String result = "";
try{
logger.debug("Trying to parse the found message-embedded object: " + textToParse );
Parser parser = new Parser( new Lexer( textToParse ) );
NodeList nodes = parser.parse( null );
//Process the nodes in the result
NodeList objects = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.OBJECT_TAG_NAME ) );
/* Create Flash with the never security level, to prevent Flash injection,
the user can have a url pointing to XCure itself but not an external
flash with the getURL exevuting malicius JavaScript that, e.g. reads
the user's session coockies */
FlashEmbeddedObject flashObject = new FlashEmbeddedObject( xcureDomainPattern );
if( (objects.size() <= 2 ) && ( objects.size() > 0 ) ) {
//If there are OBJECT tags then parse them
parseFlashObjectTag( objects, flashObject );
} else {
//If there are no OBJECT tags then parse the EMBED tags
NodeList embeds = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.EMBED_TAG_NAME ) );
if( embeds.size() <= 2 ) {
//There should not be more than two EMBED tags because one is the open and another is the close tags
parseFlashEmbedTag( embeds, flashObject );
} else {
logger.error("An improper number of the object (" + objects.size() +
") and embed (" + embeds.size() + ") tags in the string: " + textToParse);
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
}
//Validate the obtained flash object
if( flashObject.isValidEmbedFlash() ) {
//Complete the flash object
flashObject.completeEmbedFlash();
//Serialize the object into String
result = flashObject.toString();
} else {
logger.error( "The parsed embedded object '" + textToParse +
"' was not recognized as a valid flash animation, we got:" + flashObject.toString() );
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
} catch( Exception e ) {
logger.error("Unable to parse the embedded object from the user's message: " + textToParse, e);
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
return result;
}
示例8: createParser
import org.htmlparser.lexer.Lexer; //导入依赖的package包/类
public static Parser createParser(String inputHTML) {
Lexer mLexer = new Lexer(new Page(inputHTML));
return new Parser(mLexer, (ParserFeedback) new DefaultParserFeedback(DefaultParserFeedback.QUIET));
}