本文整理匯總了Java中org.w3c.tidy.Tidy.parseDOM方法的典型用法代碼示例。如果您正苦於以下問題:Java Tidy.parseDOM方法的具體用法?Java Tidy.parseDOM怎麽用?Java Tidy.parseDOM使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.w3c.tidy.Tidy
的用法示例。
在下文中一共展示了Tidy.parseDOM方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getHtmlDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private Document getHtmlDocument(String htmlContent) {
StringBuilder sb = new StringBuilder();
sb.append("<html>");
sb.append("<head><style language='text/css'>");
sb.append("@page{ margin: 0; }");
sb.append("body{ margin:0;}");
sb.append("</style></head>");
sb.append("<body>");
sb.append(htmlContent);
sb.append("</body>");
sb.append("</html>");
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
return tidy.parseDOM(new ByteArrayInputStream(sb.toString().getBytes()), null);
}
示例2: tidyDoc
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Create a document using Tidy
*
* @param stream - input
* @param quiet - set Tidy quiet?
* @param showWarnings - show Tidy warnings?
* @param report_errors - log errors and throw TidyException?
* @param isXML - treat document as XML?
* @param out OutputStream, null if no output required
* @return the document
*
* @throws TidyException if a ParseError is detected and report_errors is true
*/
private static Document tidyDoc(InputStream stream, boolean quiet, boolean showWarnings, boolean report_errors,
boolean isXML, OutputStream out) throws TidyException {
StringWriter sw = new StringWriter();
Tidy tidy = makeTidyParser(quiet, showWarnings, isXML, sw);
Document doc = tidy.parseDOM(stream, out);
doc.normalize();
if (tidy.getParseErrors() > 0) {
if (report_errors) {
log.error("TidyException: " + sw.toString());
throw new TidyException(tidy.getParseErrors(),tidy.getParseWarnings());
}
log.warn("Tidy errors: " + sw.toString());
}
return doc;
}
示例3: getDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Permet de retourner un objet de type Document construit autour de sText.
*
* @param sText
* Le texte original.
* @param errout
* Flux o� sera affich� les erreurs de syntaxe html. Si � null,
* alors par d�faut,
* @param showWarnings
* Permet de sp�cifier si on veut que les warnings soient
* affich�s ou pas dans errout.
* @return Un objet de type Document.
* @throws IOException
* Lev�e si une erreur se produit.
*/
private static Document getDocument(String sText, PrintWriter errout,
boolean showWarnings) throws IOException {
File temp = File.createTempFile("TwikiToHtml", ".tmp");
temp.deleteOnExit();
PrintWriter msg = new PrintWriter(new FileWriter(temp));
msg.print(sText);
msg.close();
// new StringReader(sText);
Tidy tidy = new Tidy();
tidy.setShowWarnings(showWarnings);
tidy.setMakeClean(true);
tidy.setXHTML(true);
if (errout != null) {
tidy.setErrout(errout);
}
return tidy.parseDOM(new FileInputStream(temp), null);
}
示例4: cleanXMLData
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String cleanXMLData(String data) throws UnsupportedEncodingException {
// data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+data;
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
// tidy.setPrintBodyOnly(true);
tidy.setXmlOut(true);
tidy.setXmlTags(true);
tidy.setSmartIndent(true);
tidy.setMakeClean(true);
tidy.setForceOutput(true);
ByteArrayInputStream inputStream = new ByteArrayInputStream(data.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.parseDOM(inputStream, outputStream);
return outputStream.toString("UTF-8");
}
示例5: tidyHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
protected Document tidyHtml(InputStream in) {
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
Document dom = tidy.parseDOM(in, null);
return dom;
}
示例6: beautyHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String beautyHTML(String html) throws UnsupportedEncodingException {
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
tidy.setXmlOut(true);
tidy.setXmlTags(true);
tidy.setSmartIndent(true);
ByteArrayInputStream inputStream = new ByteArrayInputStream(html.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Document doc = tidy.parseDOM(inputStream, null);
tidy.pprint(doc, outputStream);
return outputStream.toString("UTF-8");
}
示例7: HtmlDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Instanciates an HtmlDocument after having built the DOM tree.
*
* @param strHtml
* The Html code to be parsed.
* @param strBaseUrl
* The Base url used to retrieve urls.
* @param useAbsoluteUrl
* Determine if we use absolute or relative url for HTML element's names
*/
public HtmlDocument( String strHtml, String strBaseUrl, boolean useAbsoluteUrl )
{
// use of tidy to retrieve the DOM tree
Tidy tidy = new Tidy( );
tidy.setQuiet( true );
tidy.setShowWarnings( false );
_content = tidy.parseDOM( new ByteArrayInputStream( strHtml.getBytes( ) ), null );
_strBaseUrl = ( strBaseUrl == null ) ? "" : strBaseUrl;
_useAbsoluteUrl = useAbsoluteUrl;
}
示例8: tidyDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
protected ByteArrayInputStream tidyDocument(InputStream inputStream) throws FileNotFoundException, IOException {
Tidy tidy = new Tidy();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.setTrimEmptyElements(true);
tidy.setMakeClean(true);
tidy.setQuoteNbsp(true);
tidy.setXmlOut(true);
tidy.setInputEncoding("UTF-8");
tidy.parseDOM(inputStream, outputStream);
LOGGER.trace(outputStream.toString("UTF-8"));
return new ByteArrayInputStream(outputStream.toByteArray());
}
示例9: convertHtmlToXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Diese Methode nutzt den TidyParser, um HTML zu korrektem XHTML zu wandeln
*
* @param html Der HTML code
* @return Der generierte XHTML Code
*/
// Author: Marco Dörfler
public static String convertHtmlToXhtml(String html) {
// Fehlerausgaben unterdrücken
PrintStream errStream = System.err;
System.setErr(new PrintStream(new OutputStream() {
@Override
public void write(int b) throws IOException {
// Nichts geschieht....
}
}));
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setCharEncoding(Configuration.UTF8);
ByteArrayInputStream inputStream = new ByteArrayInputStream(html.getBytes
(StandardCharsets.UTF_8));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.parseDOM(inputStream, outputStream);
// Fehlerausgaben wieder zulassen
System.setErr(errStream);
try {
return outputStream.toString(StandardCharsets.UTF_8.toString());
} catch (UnsupportedEncodingException e) {
return html;
}
}
示例10: convert
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
@Override
public Object convert(Class type, Object value) {
String htmlText = (String) value;
if (htmlText == null || htmlText.length() == 0) {
return null;
}
ByteArrayInputStream inStream = new ByteArrayInputStream(htmlText.getBytes(StandardCharsets.UTF_8));
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
Tidy tidy = createTidyParser();
TidyErrorsListener errorListener = new TidyErrorsListener();
tidy.setMessageListener(errorListener);
Document document = tidy.parseDOM(inStream, null);
if (errorListener.isBogus()) {
throw new ConversionException("renderers.converter.safe.invalid");
}
parseDocument(outStream, tidy, document);
try {
return filterOutput(new String(outStream.toByteArray(), ENCODING));
} catch (UnsupportedEncodingException e) {
logger.error(e.getMessage(), e);
throw new ConversionException("tidy.converter.ending.notSupported.critical");
}
}
示例11: parseInputStream
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public Document parseInputStream(InputStream is){
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
Document d = tidy.parseDOM(is, null);
return d;
}
示例12: parseFileToDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Return a document based on file,
* Document tpe
*
* @param file
* @return
*/
public Document parseFileToDocument(File file, Boolean setXML, Boolean setXHTML, Boolean setParseMark, String docType){
tidy = new Tidy();
//default format is XML
tidy.setXmlOut(setXML);
//set if extensible html
tidy.setXHTML(setXHTML);
//this removes the tidy meta tag in the header
tidy.setTidyMark(setParseMark);
tidy.setDocType(docType);
FileInputStream fis;
Document document = null;
try {
fis = new FileInputStream(file);
//parse input stream and return a DOM Document
document = tidy.parseDOM(fis, null);
fis.close();
} catch (Exception e) {
throw new RuntimeException("Unable to parse the file :" + file.getAbsolutePath() + " throws", e);
}
return document;
}
示例13: parse
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static void parse(InputStream in, OutputStream out, boolean removeScripts) {
final Tidy tidy = createTidy();
if (!removeScripts) {
tidy.parse(in, out);
} else {
final Document doc = tidy.parseDOM(in, null);
removeElement(doc.getDocumentElement(), "script");
removeElement(doc.getDocumentElement(), "style");
removeDuplicateAttributes(doc.getDocumentElement());
tidy.pprint(doc, out);
}
}
示例14: showRenderXMLResponse
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private void showRenderXMLResponse(SampleResult res) {
results.setContentType("text/xml"); // $NON-NLS-1$
results.setCaretPosition(0);
byte[] source = res.getResponseData();
final ByteArrayInputStream baIS = new ByteArrayInputStream(source);
for(int i=0; i<source.length-XML_PFX.length; i++){
if (JOrphanUtils.startsWith(source, XML_PFX, i)){
baIS.skip(i);// Skip the leading bytes (if any)
break;
}
}
// there is also a javax.swing.text.Document class.
org.w3c.dom.Document document = null;
StringWriter sw = new StringWriter();
Tidy tidy = XPathUtil.makeTidyParser(true, true, true, sw);
document = tidy.parseDOM(baIS, null);
document.normalize();
if (tidy.getParseErrors() > 0) {
showErrorMessageDialog(sw.toString(),
"Tidy: " + tidy.getParseErrors() + " errors, " + tidy.getParseWarnings() + " warnings",
JOptionPane.WARNING_MESSAGE);
}
JPanel domTreePanel = new DOMTreePanel(document);
resultsScrollPane.setViewportView(domTreePanel);
}
示例15: valueOf
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public String valueOf(final String string) {
if (StringUtils.isBlank(string)) {
return removeBlankspaces ? null : string;
}
final Tidy tidy = new Tidy(); // obtain a new Tidy instance
tidy.setXHTML(false); // set desired config options using tidy setters
tidy.setQuiet(true);
tidy.setShowErrors(0);
tidy.setShowWarnings(false);
tidy.setIndentContent(false);
tidy.setXmlOut(true);
final Document document = tidy.parseDOM(new StringReader(string), null);
removeBadNodes(document);
final NodeList bodies = document.getElementsByTagName("body");
if (bodies.getLength() == 0) {
// No body element? return null
return null;
} else {
// Result will contain the xml header plus the body element itself. We need to body content only
String result = XmlHelper.toString(bodies.item(0));
result = result.substring(result.indexOf("<body>") + "<body>".length(), result.indexOf("</body>"));
// Remove the nbsps
if (removeBlankspaces) {
int begin = 0;
while (result.charAt(begin) == NBSP) {
begin++;
if (begin == result.length()) {
// All the string was NBSPs
return null;
}
}
int end = result.length();
while (result.charAt(end - 1) == NBSP) {
end--;
}
return StringUtils.trimToNull(result.substring(begin, end));
} else {
return StringUtils.trimToNull(result);
}
}
}