本文整理汇总了Java中org.htmlcleaner.HtmlCleaner类的典型用法代码示例。如果您正苦于以下问题:Java HtmlCleaner类的具体用法?Java HtmlCleaner怎么用?Java HtmlCleaner使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HtmlCleaner类属于org.htmlcleaner包,在下文中一共展示了HtmlCleaner类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createHtmlCleaner
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
private static HtmlCleaner createHtmlCleaner() {
HtmlCleaner result = new HtmlCleaner();
CleanerProperties cleanerProperties = result.getProperties();
cleanerProperties.setAdvancedXmlEscape(true);
cleanerProperties.setOmitXmlDeclaration(true);
cleanerProperties.setOmitDoctypeDeclaration(false);
cleanerProperties.setTranslateSpecialEntities(true);
cleanerProperties.setTransResCharsToNCR(true);
cleanerProperties.setRecognizeUnicodeChars(true);
cleanerProperties.setIgnoreQuestAndExclam(true);
cleanerProperties.setUseEmptyElementTags(false);
cleanerProperties.setPruneTags("script,title");
return result;
}
示例2: loginCredit
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public static String loginCredit() throws Exception {
try {
String result = Connector.getDataByGet(POST_CREDIT_URI, "big5", "http://nportal.ntut.edu.tw/aptreeList.do?apDn=ou=aa,ou=aproot,o=ldaproot");
TagNode tagNode;
tagNode = new HtmlCleaner().clean(result);
TagNode[] nodes = tagNode.getElementsByAttValue("name",
"sessionId", true, false);
String sessionId = nodes[0].getAttributeByName("value");
nodes = tagNode
.getElementsByAttValue("name", "userid", true, false);
String userid = nodes[0].getAttributeByName("value");
HashMap<String, String> params = new HashMap<>();
params.put("sessionId", sessionId);
params.put("userid", userid);
result = Connector.getDataByPost(CREDITS_URI, params, "big5");
return result;
} catch (Exception e) {
e.printStackTrace();
throw new Exception("登入學生查詢系統時發生錯誤");
}
}
示例3: getYearList
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public static ArrayList<String> getYearList() throws Exception {
try {
ArrayList<String> year_list = new ArrayList<>();
HashMap<String, String> params = new HashMap<>();
params.put("format", "-1");
String result = Connector
.getDataByPost(getStandardUri(lang), params, "big5");
TagNode tagNode;
tagNode = new HtmlCleaner().clean(result);
TagNode[] rows = tagNode.getElementsByName("a", true);
for (TagNode row : rows) {
String year = row.getText().toString();
year_list.add(year);
}
return year_list;
} catch (Exception e) {
e.printStackTrace();
throw new Exception("入學年度清單讀取時發生錯誤");
}
}
示例4: getDivisionList
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public static ArrayList<String> getDivisionList(String year)
throws Exception {
try {
matrics.clear();
ArrayList<String> division_list = new ArrayList<>();
HashMap<String, String> params = new HashMap<>();
params.put("format", "-2");
params.put("year", year);
String result = Connector
.getDataByPost(getStandardUri(lang), params, "big5");
TagNode tagNode;
tagNode = new HtmlCleaner().clean(result);
TagNode[] rows = tagNode.getElementsByName("a", true);
for (TagNode row : rows) {
String division = row.getText().toString();
String[] temp = row.getAttributeByName("href").split("=");
String matric = temp[temp.length - 1];
matrics.add(matric);
division_list.add(division);
}
return division_list;
} catch (Exception e) {
e.printStackTrace();
throw new Exception("學制清單讀取時發生錯誤");
}
}
示例5: login_2_2
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public static String login_2_2(String redirectUri, String account,
String password) throws Exception {
try {
String result = Connector.getDataByGet(redirectUri, "big5");
TagNode tagNode;
tagNode = new HtmlCleaner().clean(result);
TagNode[] nodes = tagNode.getElementsByName("input", true);
String __VIEWSTATE = nodes[0].getAttributeByName("value");
String __EVENTVALIDATION = nodes[1].getAttributeByName("value");
HashMap<String, String> params = new HashMap<>();
params.put("__VIEWSTATE", __VIEWSTATE);
params.put("__EVENTVALIDATION",
__EVENTVALIDATION);
params.put("__EVENTTARGET", "");
params.put("__EVENTARGUMENT", "");
params.put("TxtBox_loginName", account);
params.put("TxtBox_password", password);
params.put("btnSubmit_AD", "登入");
result = Connector.getDataByPost(redirectUri, params, "big5");
return result;
} catch (Exception e) {
throw new Exception("Ntutcc登入時發生錯誤");
}
}
示例6: getCourseType
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
static String getCourseType(String courseNo) throws Exception {
try {
if (!isLogin) {
loginCourse();
}
HashMap<String, String> params = new HashMap<>();
params.put("format", "-1");
params.put("code", courseNo);
String result = Connector.getDataByPost(getCourseUri("zh"), params, "big5");
TagNode tagNode;
tagNode = new HtmlCleaner().clean(result);
TagNode[] tables = tagNode.getElementsByAttValue("border", "1",
true, false);
TagNode[] rows = tables[0].getElementsByName("tr", true);
TagNode[] temp = rows[7].getElementsByName("td", true);
return temp[0].getText().toString();
} catch (Exception ex) {
throw new Exception("課程類別讀取時發生錯誤");
}
}
示例7: toHTML
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* Converts a given xml to HTML String
* @param htmlIn - xml in String
* @return String - in HTML format
*/
public static String toHTML( String htmlIn )
{
try
{
HtmlCleaner cleaner = new HtmlCleaner();
cleaner.getProperties().setNamespacesAware( true );
XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), " " );
String htmlData = xmlSerializer.getAsString( htmlIn );
htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
return htmlData;
}
catch( Exception e )
{
e.printStackTrace();
return null;
}
}
示例8: toXML
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* htmlcleaner로 html string을 xml string으로 바꿔주는 메소드.
* @param source
* @return
*/
private String toXML(String source){
try {
CleanerProperties props = new CleanerProperties();
props.setTranslateSpecialEntities(true);
props.setOmitComments(true);
props.setPruneTags("script,style");
// namespace를 무시한다.
props.setNamespacesAware(false);
props.setAdvancedXmlEscape(true);
props.setTranslateSpecialEntities(true);
HtmlCleaner cl = new HtmlCleaner(props);
TagNode tagNode = cl.clean(source);
source = new PrettyXmlSerializer(props).getXmlAsString(tagNode);
} catch (IOException e) {
logger.error("",e);
}
return source;
}
示例9: processFollow
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* 解析关注页面,关注与被关注
*
* @param followUrl
*/
public static void processFollow(String followUrl) {
String content = PageUtil.getContent(followUrl);
HtmlCleaner htmlCleaner = new HtmlCleaner();
TagNode tNode = htmlCleaner.clean(content);
extractUserUrl(content);
try {
Object[] pageNumObj = tNode
.evaluateXPath("//*[@id=\"Profile-following\"]//div[@class=\"Pagination\"]/button");
if (pageNumObj != null && pageNumObj.length > 0) {
TagNode node = (TagNode) pageNumObj[pageNumObj.length - 2];
int pagenum = Integer.parseInt(node.getText().toString());
for (int i = 2; i <= pagenum; i++) {
String url = followUrl + "?page=" + i;
content = PageUtil.getContent(url);
extractUserUrl(content);
}
}
} catch (XPatherException e) {
logger.error(e.getMessage());
}
}
示例10: htmlOutputStreamViaHtmlCleaner
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* To Output html Stream via Html Cleaner.
*
* @param pathOfHOCRFile String
* @param outputFilePath String
* @throws IOException
*/
public static void htmlOutputStreamViaHtmlCleaner(String pathOfHOCRFile, String outputFilePath) throws IOException {
CleanerProperties cleanerProps = new CleanerProperties();
// set some properties to non-default values
cleanerProps.setTransResCharsToNCR(true);
cleanerProps.setTranslateSpecialEntities(true);
cleanerProps.setOmitComments(true);
cleanerProps.setOmitDoctypeDeclaration(true);
cleanerProps.setOmitXmlDeclaration(false);
HtmlCleaner cleaner = new HtmlCleaner(cleanerProps);
// take default cleaner properties
// CleanerProperties props = cleaner.getProperties();
FileInputStream hOCRFileInputStream = new FileInputStream(pathOfHOCRFile);
TagNode tagNode = cleaner.clean(hOCRFileInputStream, UTF_ENCODING);
if (null != hOCRFileInputStream) {
hOCRFileInputStream.close();
}
try {
new PrettyHtmlSerializer(cleanerProps).writeToFile(tagNode, outputFilePath, UTF_ENCODING);
} catch (Exception e) { // NOPMD.
}
}
示例11: JoinedBefore
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public static boolean JoinedBefore(ChatPlayer mp, int year, int month, int day) throws Exception {
URL url = new URL("https://www.reddit.com/u/" + mp.UserName());
URLConnection con = url.openConnection();
con.setRequestProperty("User-Agent", "TheButtonAutoFlair");
InputStream in = con.getInputStream();
HtmlCleaner cleaner = new HtmlCleaner();
TagNode node = cleaner.clean(in);
node = node.getElementsByAttValue("class", "age", true, true)[0];
node = node.getElementsByName("time", false)[0];
String joindate = node.getAttributeByName("datetime");
SimpleDateFormat parserSDF = new SimpleDateFormat("yyyy-MM-dd");
joindate = joindate.split("T")[0];
Date date = parserSDF.parse(joindate);
return date.before(new Calendar.Builder().setTimeZone(TimeZone.getTimeZone("UTC")).setDate(year, month, day)
.build().getTime());
}
示例12: testProcessTimeformPage
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* Test of processTimeformPage method, of class RaceCardProcessor.
*/
@Test
public void testProcessTimeformPage() {
System.out.println("processTimeformPage");
URL webPageURL;
try {
webPageURL = new URL("http://form.horseracing.betfair.com/daypage?date=20120907");
}
catch (MalformedURLException mue) {
throw new RuntimeException(mue);
}
TagNode root;
try {
InputStream is = getClass().getResourceAsStream("exampleDayPage.html");
root = new HtmlCleaner().clean(is);
}
catch (IOException ioe) {
throw new RuntimeException(ioe);
}
RaceCardProcessor instance = new RaceCardProcessor();
instance.getRootNode(null);
ArrayList expResult = null;
ArrayList result = instance.processTimeformPage(webPageURL, root);
assertEquals(expResult, result);
}
示例13: cleanFile
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
*
* @param props
* @param path
* @param nameFile
* @param newNameFile
*/
public static void cleanFile(CleanerProperties props, String path, String nameFile, String newNameFile)
{
File fileURL = new File(path + File.separator + nameFile);
// do parsing
try
{
TagNode tagNode = new HtmlCleaner(props).clean(fileURL, "utf-8");
// serialize to xml file
new CompactHtmlSerializer(props).writeToFile(
tagNode, path + File.separator + newNameFile, "UTF-8"
);
LOG.info(path + File.separator + nameFile + " cleaned!");
}
catch(Exception ex)
{
LOG.log(Level.WARNING, ex.getMessage() + " " + path + File.separator + nameFile + " NOT FOUND!");
}
}
示例14: getHTML
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
public byte[] getHTML(HSSFWorkbook book) throws IOException {
double width = 21.0;
double height = 29.7;
if (isLandscape()) {
width += height;
height = width - height;
width = width - height;
}
byte[] html = convert(book, width, height).getBytes();
ByteArrayInputStream in = new ByteArrayInputStream(html);
// Clean up the HTML to be well formed
HtmlCleaner cleaner = new HtmlCleaner();
CleanerProperties props = cleaner.getProperties();
TagNode node = cleaner.clean(in, "UTF-8");
// ByteArrayOutputStream out = new ByteArrayOutputStream();
// Instead of writing to System.out we now write to the ByteArray buffer
// return new PrettyXmlSerializer(props).getAsString(node, "UTF-8").getBytes();
ByteArrayOutputStream out = new ByteArrayOutputStream();
// Instead of writing to System.out we now write to the ByteArray buffer
new PrettyXmlSerializer(props).writeToStream(node, out);
return out.toByteArray();
}
示例15: createDocument
import org.htmlcleaner.HtmlCleaner; //导入依赖的package包/类
/**
* Convenience method (for xml/xhtml): creates a <code>Document</code> from
* the specified URL.
*
* @param inUrl
* {@link URL}
* @return {@link Document} the parsed XML document, may be
* <code>null</code>
* @throws ParserConfigurationException
* @throws IOException
* @throws SAXException
*/
public static Document createDocument(final URL inUrl)
throws ParserConfigurationException, IOException, SAXException {
final HtmlCleaner lCleaner = new HtmlCleaner();
lCleaner.clean(inUrl);
final DocumentBuilder lBuilder = DocumentBuilderFactory.newInstance()
.newDocumentBuilder();
final URLConnection lConnection = inUrl.openConnection();
Document outDocument = null;
try (BufferedInputStream lStream = new BufferedInputStream(
lConnection.getInputStream());) {
outDocument = lBuilder.parse(lStream);
}
return outDocument;
}