本文整理汇总了Java中org.htmlparser.Parser.parse方法的典型用法代码示例。如果您正苦于以下问题:Java Parser.parse方法的具体用法?Java Parser.parse怎么用?Java Parser.parse使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlparser.Parser
的用法示例。
在下文中一共展示了Parser.parse方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: splitHtml
import org.htmlparser.Parser; //导入方法依赖的package包/类
private List<String> splitHtml() {
List<String> resultList = new ArrayList<String>();
try {
Parser parser = Parser.createParser(content, "UTF-8");
NodeList nodeList = parser.parse(null);
resultList = recusiveSplitHtml(nodeList);
StringBuffer lastPageContent = new StringBuffer();
for (TagNode tagNode : tagNodeList) {
if (tagNode.getStartPosition() < startPosition && tagNode.getEndTag().getEndPosition() >= startPosition) {
lastPageContent.append("<");
lastPageContent.append(tagNode.getText());
lastPageContent.append(">");
}
}
lastPageContent.append(content.substring(startPosition));
Parser lastPageContentParser = Parser.createParser(lastPageContent.toString(), "UTF-8");
NodeList pageContentNodeList = lastPageContentParser.parse(null);
resultList.add(pageContentNodeList.toHtml());
} catch (ParserException e) {
e.printStackTrace();
}
return resultList;
}
示例2: extractKeyWordText
import org.htmlparser.Parser; //导入方法依赖的package包/类
public static void extractKeyWordText(String url, String keyword) {
try {
// 生成一个解析器对象,用网页的 url 作为参数
Parser parser = new Parser(url);
// 设置网页的编码,这里只是请求了一个 gb2312 编码网页
parser.setEncoding("utf-8");// gb2312
// 迭代所有节点, null 表示不使用 NodeFilter
NodeList list = parser.parse(null);
// 从初始的节点列表跌倒所有的节点
processNodeList(list, keyword);
} catch (ParserException e) {
e.printStackTrace();
}
}
示例3: readTextAndLinkAndTitle
import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
* 分别读纯文本和链接.
* @param result 网页的内容
* @throws Exception
*/
public static void readTextAndLinkAndTitle(String result) throws Exception {
Parser parser;
NodeList nodelist;
parser = Parser.createParser(result, "utf8");
NodeFilter textFilter = new NodeClassFilter(TextNode.class);
NodeFilter linkFilter = new NodeClassFilter(LinkTag.class);
NodeFilter titleFilter = new NodeClassFilter(TitleTag.class);
OrFilter lastFilter = new OrFilter();
lastFilter.setPredicates(new NodeFilter[] { textFilter, linkFilter, titleFilter });
nodelist = parser.parse(lastFilter);
Node[] nodes = nodelist.toNodeArray();
String line = "";
for (int i = 0; i < nodes.length; i++) {
Node node = nodes[i];
if (node instanceof TextNode) {
TextNode textnode = (TextNode) node;
line = textnode.getText();
} else if (node instanceof LinkTag) {
LinkTag link = (LinkTag) node;
line = link.getLink();
} else if (node instanceof TitleTag) {
TitleTag titlenode = (TitleTag) node;
line = titlenode.getTitle();
}
if (isTrimEmpty(line))
continue;
System.out.println(line);
}
}
示例4: main
import org.htmlparser.Parser; //导入方法依赖的package包/类
public static void main(String[] args) {
try {
URL url = new URL(pro.getProperty("mlink"));
SocketAddress address = new InetSocketAddress(pro.getProperty("host"), Integer.parseInt(pro.getProperty("port")));
Proxy proxy = new Proxy(Proxy.Type.HTTP, address);
URLConnection conn = url.openConnection(proxy);
Authenticator.setDefault(new MyAuthenticator(pro.getProperty("username"), pro.getProperty("password")));
conn.setConnectTimeout(Integer.parseInt(pro.getProperty("timeout")));
Parser parser = new Parser(conn);
NodeList nodeList = parser.parse(new TagNameFilter("A"));
System.out.println(nodeList.size());
for (SimpleNodeIterator it = nodeList.elements(); it.hasMoreNodes(); ) {
TagNode node = (TagNode) it.nextNode();
String href = node.getAttribute("href");
String dhref = URLDecoder.decode(href, "UTF-8");
if (CommonHelper.checkIsAlink(dhref)) {
System.out.println(dhref);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
示例5: processHTMLBodyWithBASE64Images
import org.htmlparser.Parser; //导入方法依赖的package包/类
private void processHTMLBodyWithBASE64Images(MimeMultipart multipart) throws ParserException,
MessagingException, NoSuchAlgorithmException, SMIMEException, java.security.NoSuchProviderException {
if (null != body && body.contains("base64")) {
Parser parser = new Parser(body);
NodeList nodeList = parser.parse(null);
HtmlImageNodeVisitor htmlImageNodeVisitor = new HtmlImageNodeVisitor();
nodeList.visitAllNodesWith(htmlImageNodeVisitor);
body = nodeList.toHtml();
addAllBase64ImagesToMimeMultipart(multipart, htmlImageNodeVisitor.getBase64Images());
}
}
示例6: getHtmlRoot
import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
* Get HTML root element as node list
*
* @param html
* @return
*/
public static NodeList getHtmlRoot(String html) {
Parser parser = new Parser(new Lexer(html));
try {
parser.setEncoding("UTF-8");
return parser.parse(null);
} catch (ParserException e) {
System.err.println(e.getMessage());
return null;
}
}
示例7: parseTheEmbeddedObject
import org.htmlparser.Parser; //导入方法依赖的package包/类
/**
* Parses the embedded object, creates the Flash embedded object out of it,
* if possible, then serializes it into string and returns the string.
* If the object could not be parseed or it turnes out to be a non Flash
* embedded object, then an exception is thrown
* @param textToParse the text to parse
* @return the string with the filtered, verified and completed embedded
* Flash animation embedding code. Creates Flash with the sameDomain
* security level.
* @throws MessageException if the provided HTML code is broken or the animation was detected to be not a flash movie
*/
private String parseTheEmbeddedObject( final String textToParse ) throws MessageException {
String result = "";
try{
logger.debug("Trying to parse the found message-embedded object: " + textToParse );
Parser parser = new Parser( new Lexer( textToParse ) );
NodeList nodes = parser.parse( null );
//Process the nodes in the result
NodeList objects = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.OBJECT_TAG_NAME ) );
/* Create Flash with the never security level, to prevent Flash injection,
the user can have a url pointing to XCure itself but not an external
flash with the getURL exevuting malicius JavaScript that, e.g. reads
the user's session coockies */
FlashEmbeddedObject flashObject = new FlashEmbeddedObject( xcureDomainPattern );
if( (objects.size() <= 2 ) && ( objects.size() > 0 ) ) {
//If there are OBJECT tags then parse them
parseFlashObjectTag( objects, flashObject );
} else {
//If there are no OBJECT tags then parse the EMBED tags
NodeList embeds = nodes.extractAllNodesThatMatch( new TagNameFilter( FlashEmbeddedObject.EMBED_TAG_NAME ) );
if( embeds.size() <= 2 ) {
//There should not be more than two EMBED tags because one is the open and another is the close tags
parseFlashEmbedTag( embeds, flashObject );
} else {
logger.error("An improper number of the object (" + objects.size() +
") and embed (" + embeds.size() + ") tags in the string: " + textToParse);
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
}
//Validate the obtained flash object
if( flashObject.isValidEmbedFlash() ) {
//Complete the flash object
flashObject.completeEmbedFlash();
//Serialize the object into String
result = flashObject.toString();
} else {
logger.error( "The parsed embedded object '" + textToParse +
"' was not recognized as a valid flash animation, we got:" + flashObject.toString() );
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
} catch( Exception e ) {
logger.error("Unable to parse the embedded object from the user's message: " + textToParse, e);
throw new MessageException( MessageException.IMPROPER_EMBEDDED_OBJECT );
}
return result;
}
示例8: filter
import org.htmlparser.Parser; //导入方法依赖的package包/类
private AX2JClassTranslator filter(String content) {
try {
Parser parser = Parser.createParser(content, Config.ENCODE);
AndFilter andFilter1 =
new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class","alt-color api apilevel-"));
AndFilter andFilter2 =
//kill me, the " api apilevel-" has a space at the start
new AndFilter(new TagNameFilter("tr"), new HasAttributeFilter("class"," api apilevel-"));
OrFilter orFilter = new OrFilter(andFilter1, andFilter2);
NodeList tableNodeList = parser.parse(orFilter);
NodeIterator tableIt = tableNodeList.elements();
AX2JClassTranslator map = new AX2JClassTranslator(type);
while(tableIt.hasMoreNodes()) {
Node trNode = tableIt.nextNode();
NodeList trNodeList = trNode.getChildren();
/**
* ***** trNodeList example *****
* Txt (268[6,37],269[7,0]): \nTag (269[7,0],292[7,23]): td class="jd-linkcol"
* Tag (292[7,23],381[7,112]): a href="../../../reference/android/view/View.html...
* Txt (381[7,112],412[7,143]): android:accessibilityLiveRegion
* End (412[7,143],416[7,147]): /a
* End (416[7,147],421[7,152]): /td
* Txt (421[7,152],422[8,0]): \nTag (422[8,0],445[8,23]): td class="jd-linkcol"
* Txt (445[8,23],446[9,0]): \n
* Tag (446[9,0],530[9,84]): a href="../../../reference/android/view/View.html#s...
* Txt (530[9,84],561[9,115]): setAccessibilityLiveRegion(int)
* End (561[9,115],565[9,119]): /a
* Txt (565[9,119],566[10,0]): \n
* End (566[10,0],571[10,5]): /td
* Txt (571[10,5],572[11,0]): \nTag (572[11,0],609[11,37]): td class="jd-descrcol" width="100%"
* Txt (609[11,37],712[14,0]): \nIndicates to accessibility services whether the...
* End (712[14,0],717[14,5]): /td
* Txt (717[14,5],718[15,0]): \n
* ***** trNodeList example *****
*/
if (trNodeList.size() != 7) {
throw new AndroidDocException(AndroidDocException.ATM_FORMAT_ERROR);
}
String attr = trNodeList.elementAt(1).toPlainTextString();
attr = attr.replace("\n", "");
String method = trNodeList.elementAt(3).toPlainTextString();
map.add(attr, method);
}
return map;
} catch (ParserException e) {
throw new AndroidDocException(AndroidDocException.AXML_FORMAT_ERROR);
}
}
示例9: list
import org.htmlparser.Parser; //导入方法依赖的package包/类
@SuppressWarnings("rawtypes")
@Action(value = "eventlist")
public String list() throws IOException {
Cache c = CacheManager.getInstance().getCache("News");
String ckey = "eventlist"+page ;
Element ele = c.get(ckey);
if (!CommonUtil.isEmpty(ele)) {
list = (List) ele.getObjectValue();
} else {
StringBuffer retstr = fetch(RD+"/calendar/?a=list&&m=recent&range=30&_="+System.currentTimeMillis()+"&type=0&place=0&type="+page );
Parser p = Parser.createParser(retstr.toString(), "utf-8");
list = new ArrayList<News>();
try {
NodeList ls = p
.extractAllNodesThatMatch(new HasAttributeFilter("class","clear"));
if(ls.size()==2)
{
int tk1=ls.elementAt(0).getEndPosition();
int tk2=ls.elementAt(1).getStartPosition();
ServletActionContext.getResponse().setCharacterEncoding("utf-8");
p=Parser.createParser(retstr.substring(tk1+6, tk2), "utf-8");
NodeList nl=p.parse(null);
NodeList links=nl.extractAllNodesThatMatch(new NodeClassFilter(LinkTag.class),true);
SimpleNodeIterator i=links.elements();
while(i.hasMoreNodes())
{
LinkTag lt=(LinkTag)i.nextNode();
NodeList ll=new NodeList();
ll.add(new TextNode(lt.getAttribute("title")));
lt.setChildren(ll);
lt.removeAttribute("title");
}
ServletActionContext.getResponse().getWriter().print(nl.toHtml());
}
} catch (ParserException e) {
e.printStackTrace();
}
}
return NONE;
}