本文整理汇总了Java中org.htmlcleaner.CleanerProperties.setBooleanAttributeValues方法的典型用法代码示例。如果您正苦于以下问题:Java CleanerProperties.setBooleanAttributeValues方法的具体用法?Java CleanerProperties.setBooleanAttributeValues怎么用?Java CleanerProperties.setBooleanAttributeValues使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.htmlcleaner.CleanerProperties
的用法示例。
在下文中一共展示了CleanerProperties.setBooleanAttributeValues方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSerialized
import org.htmlcleaner.CleanerProperties; //导入方法依赖的package包/类
/**
* Convenience method (for xml/xhtml): serializes the parsed page.
*
* @param inSerializer
* {@link XmlSerializer}
* @return String the cleaned and serialized html
* @throws IOException
*/
public String getSerialized(final XmlSerializer inSerializer)
throws IOException {
if (docNode == null) {
return ""; //$NON-NLS-1$
}
final CleanerProperties lProps = new HtmlCleaner().getProperties();
lProps.setUseCdataForScriptAndStyle(true);
lProps.setRecognizeUnicodeChars(true);
lProps.setUseEmptyElementTags(true);
lProps.setAdvancedXmlEscape(true);
lProps.setTranslateSpecialEntities(true);
lProps.setBooleanAttributeValues("empty"); //$NON-NLS-1$
lProps.setNamespacesAware(true);
lProps.setOmitXmlDeclaration(false);
lProps.setOmitDoctypeDeclaration(true);
lProps.setOmitHtmlEnvelope(false);
docNode.getAttributes().remove(NS_XML);
return inSerializer.getSerializer(lProps).getXmlAsString(docNode);
}
示例2: ExtractInfoWithHtmlCleaner
import org.htmlcleaner.CleanerProperties; //导入方法依赖的package包/类
private ExtractInfoWithHtmlCleaner()
{
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true);
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
}
示例3: main
import org.htmlcleaner.CleanerProperties; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, XPatherException {
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true);
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
String result ="";
File file = new File("E:/test4java/tangniaobing.htm");
URL url = new URL("http://www.haodf.com/wenda/anzhentaohong_g_638200415.htm");
TagNode node = cleaner.clean(url,"gb2312");
//Object[] ns = node.getElementsByName("", true);
Object[] ns = node.evaluateXPath("//*[@class=\"bb_d3 bl_d3 pb20\"]/div[3]/div[2]/p[2]");
//Object[] ns = node.("//*[@id=\"shequREP_pageNumLab\"]/a");
for (Object object : ns)
{
TagNode dd = (TagNode) object;
result = result +dd.getText()+"\n";
}
result = result.replace(" ", "").replace("\r", "").replace(";", "");
result = CommonUtil.getDateString(result,".*?([0-9]+.[0-9]+.[0-9]+).*");
/*result = "?uthorid=4917458&page=6&tid=16785968";
String rex = "\\?(?!authorid=).*";
Pattern p = Pattern.compile(rex);
Matcher m = p.matcher(result);
boolean s = m.matches();
for(int i=1;i<=m.groupCount();i++)
{
System.out.println(m.group(i));
}*/
System.out.print(result);
}
示例4: getStandardCredit
import org.htmlcleaner.CleanerProperties; //导入方法依赖的package包/类
public static ArrayList<String> getStandardCredit(String year, int index,
String department) throws Exception {
try {
ArrayList<String> standard = new ArrayList<>();
HashMap<String, String> params = new HashMap<>();
params.put("format", "-3");
params.put("year", year);
params.put("matric", matrics.get(index));
String result = Connector
.getDataByPost(getStandardUri(lang), params, "big5");
result = result.replace("<td", "</td><td");
result = result.replace("<tr>", "</td><tr>");
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties props = cleaner.getProperties();
props.setUseCdataForScriptAndStyle(true);
props.setRecognizeUnicodeChars(true);
props.setUseEmptyElementTags(true);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
props.setBooleanAttributeValues("empty");
result = new PrettyHtmlSerializer(props).getAsString(result);
TagNode tagNode = cleaner.clean(result);
TagNode[] tables = tagNode.getElementsByAttValue("border", "1",
true, false);
TagNode[] rows = tables[0].getElementsByName("tr", true);
for (int i = 1; i < rows.length; i++) {
TagNode[] cols = rows[i].getElementsByName("td", true);
String temp = cols[0].getText().toString();
if (temp.replace(" ", "").replace("\n", "").contains(department.replace(" ", "").replace("\n", ""))) {
for (int j = 1; j < 9; j++) {
String credit = Utility.cleanString(cols[j].getText()
.toString());
standard.add(credit);
}
return standard;
}
}
throw new Exception();
} catch (Exception e) {
e.printStackTrace();
throw new Exception("畢業學分標準讀取時發生錯誤");
}
}