本文整理匯總了Java中org.w3c.tidy.Tidy.setXHTML方法的典型用法代碼示例。如果您正苦於以下問題:Java Tidy.setXHTML方法的具體用法?Java Tidy.setXHTML怎麽用?Java Tidy.setXHTML使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.w3c.tidy.Tidy
的用法示例。
在下文中一共展示了Tidy.setXHTML方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getHtmlDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private Document getHtmlDocument(String htmlContent) {
StringBuilder sb = new StringBuilder();
sb.append("<html>");
sb.append("<head><style language='text/css'>");
sb.append("@page{ margin: 0; }");
sb.append("body{ margin:0;}");
sb.append("</style></head>");
sb.append("<body>");
sb.append(htmlContent);
sb.append("</body>");
sb.append("</html>");
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
return tidy.parseDOM(new ByteArrayInputStream(sb.toString().getBytes()), null);
}
示例2: htmlOutputStreamForISOEncoding
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* To Output html Stream for ISO Encoding.
*
* @param pathOfHOCRFile String
* @param outputFilePath String
* @return FileWriter
* @throws IOException
*/
public static void htmlOutputStreamForISOEncoding(final String pathOfHOCRFile, final String outputFilePath) throws IOException {
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setDocType(DOC_TYPE_OMIT);
tidy.setInputEncoding(ISO_ENCODING);
tidy.setOutputEncoding(ISO_ENCODING);
tidy.setHideEndTags(false);
FileInputStream inputStream = null;
FileWriter outputStream = null;
try {
inputStream = new FileInputStream(pathOfHOCRFile);
outputStream = new FileWriter(outputFilePath);
tidy.parse(inputStream, outputStream);
} finally {
if (null != inputStream) {
inputStream.close();
}
if (null != outputStream) {
outputStream.flush();
outputStream.close();
}
}
}
示例3: getDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Permet de retourner un objet de type Document construit autour de sText.
*
* @param sText
* Le texte original.
* @param errout
* Flux o� sera affich� les erreurs de syntaxe html. Si � null,
* alors par d�faut,
* @param showWarnings
* Permet de sp�cifier si on veut que les warnings soient
* affich�s ou pas dans errout.
* @return Un objet de type Document.
* @throws IOException
* Lev�e si une erreur se produit.
*/
private static Document getDocument(String sText, PrintWriter errout,
boolean showWarnings) throws IOException {
File temp = File.createTempFile("TwikiToHtml", ".tmp");
temp.deleteOnExit();
PrintWriter msg = new PrintWriter(new FileWriter(temp));
msg.print(sText);
msg.close();
// new StringReader(sText);
Tidy tidy = new Tidy();
tidy.setShowWarnings(showWarnings);
tidy.setMakeClean(true);
tidy.setXHTML(true);
if (errout != null) {
tidy.setErrout(errout);
}
return tidy.parseDOM(new FileInputStream(temp), null);
}
示例4: JTidyBookProcessor
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public JTidyBookProcessor()
{
tidy = new Tidy();
// tidy.setConfigurationFromFile(JTidyBookProcessor.class.getResource("/jtidy.properties").getFile());
tidy.setSpaces(2);
tidy.setIndentContent(true);
tidy.setSmartIndent(true);
tidy.setXHTML(true);
tidy.setQuoteMarks(false);
tidy.setQuoteAmpersand(true);
tidy.setDropEmptyParas(false);
tidy.setTidyMark(false);
tidy.setJoinClasses(true);
tidy.setJoinStyles(true);
tidy.setWraplen(0);
tidy.setDropProprietaryAttributes(true);
tidy.setEscapeCdata(true);
Properties props = new Properties();
props.put("new-blocklevel-tags", "svg image altGlyph altGlyphDef altGlyphItem animate animateColor animateMotion animateTransform circle clipPath color-profile cursor defs desc ellipse feBlend feColorMatrix feComponentTransfer feComposite feConvolveMatrix feDiffuseLighting feDisplacementMap feDistantLight feFlood feFuncA feFuncB feFuncG feFuncR feGaussianBlur feImage feMerge feMergeNode feMorphology feOffset fePointLight feSpecularLighting feSpotLight feTile feTurbulence filter font font-face font-face-format font-face-name font-face-src font-face-uri foreignObject g glyph glyphRef hkern image line linearGradient marker mask metadata missing-glyph mpath path pattern polygon polyline radialGradient rect script set stop style svg switch symbol text textPath title tref tspan use view vkern");
tidy.getConfiguration().addProps(props);
}
示例5: getXHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String getXHTML(String html){
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setMakeClean(true);
tidy.setShowWarnings(false);
tidy.setShowErrors(0);
tidy.setQuiet(true);
tidy.setPrintBodyOnly(true);
tidy.setOutputEncoding("ISO-8859-1");
StringWriter stringWriter = new StringWriter();
tidy.parse(new StringReader(html), stringWriter);
return stringWriter.toString();
}
示例6: validate
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Validate the HTML content received after executed partnership operation. The content
* is passed as a input stream <code>ins</code>.
* <br/><br/>
* This operation is quite expensive because it first transform the whole HTML content
* received to a well-formed XHTML before parsing by the SAX Parser.
*
* @param ins The HTML content to validate the result of partnership operation
* @throws SAXException
* <ol>
* <li>When unable to down-load the HTML DTD from the web. Check your Internet connectivity</li>
* <li>When IO related problems occur</li>
* </ol>
* @throws ParserConfigurationException
* When SAX parser mis-configures.
*/
public void validate(InputStream ins) throws SAXException, ParserConfigurationException
{
if (ins == null)
throw new NullPointerException("Missing 'input stream' for validation");
try{
// TODO: SLOW, It requires two full-scan transformation to find the result of the partnership operation.
ByteArrayOutputStream baos = new ByteArrayOutputStream();
/* Transforms to well-formed XHTML */
Tidy t = new Tidy();
t.setXHTML(true); t.setQuiet(true); t.setShowWarnings(false);
t.parse(ins, baos);
// For debug purpose
// System.out.println(hk.hku.cecid.piazza.commons.io.IOHandler.readString(ins, null));
// System.out.println("Test: " + new String(baos.toByteArray(), "UTF-8"));
/* Pipe to another input stream */
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Create a custom SAX handler for parsing the partnership op result from the HTML.
PageletContentVerifer verifer = new PageletContentVerifer();
// Create SAX parser for parsing the HTML coming back after executing partnership operation.
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
SAXParser parser = spf.newSAXParser();
parser.parse(bais, verifer);
boolean result = verifer.getIsVerifiedWithNoError();
if (!result) throw new SAXException("Fail to execute partnership operation as : " + verifer.getVerifiedMessage());
}
catch(ConnectException cex){
cex.printStackTrace();
throw new SAXException("Seems unable to download correct DTD from the web, behind proxy/firewall?", cex);
}
catch(IOException ioex){
throw new SAXException("IO Error during SAX parsing.", ioex);
}
}
示例7: formatHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String formatHtml(String html) {
StringReader stringReader = new StringReader(html);
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setTidyMark(false);
tidy.setSmartIndent(true);
tidy.setForceOutput(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
StringWriter stringWriter = new StringWriter();
tidy.parse(stringReader, stringWriter);
return stringWriter.toString();
}
示例8: htmlOutputStreamViaTidy
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* To Output html Stream via Tidy.
*
* @param pathOfHOCRFile String
* @param outputFilePath String
* @throws IOException
*/
public static void htmlOutputStreamViaTidy(final String pathOfHOCRFile, final String outputFilePath) throws IOException {
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setDocType(DOC_TYPE_OMIT);
tidy.setInputEncoding(UTF_ENCODING);
tidy.setOutputEncoding(UTF_ENCODING);
tidy.setForceOutput(true);
tidy.setWraplen(0);
FileInputStream inputStream = null;
OutputStream fout = null;
OutputStream bout = null;
OutputStreamWriter out = null;
try {
/*
* Fix for UTF-8 encoding to support special characters in turkish and czech language. UTF-8 encoding supports major
* characters in all the languages
*/
fout = new FileOutputStream(outputFilePath);
bout = new BufferedOutputStream(fout);
out = new OutputStreamWriter(bout, UTF_ENCODING);
inputStream = new FileInputStream(pathOfHOCRFile);
tidy.parse(inputStream, out);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(out);
IOUtils.closeQuietly(bout);
IOUtils.closeQuietly(fout);
}
}
示例9: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例10: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Method used to transform the data from the html file given as parameter to
* xhtml format file which will be stored in the second file given.
* @param fileHtml input html file path.
* @param fileXhtmlAux output xhtml file path.
* @throws Exception if the files can not be read or written.
*/
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例11: useJTidy
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private Document useJTidy(File file) throws IOException {
File xmlFile = new File(file.getParentFile(), "index.xml");
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.parse(new FileInputStream(file), new FileOutputStream(xmlFile));
Document document = getDocument(xmlFile);
xmlFile.deleteOnExit();
return document;
}
示例12: getXHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String getXHTML( String _html ){
Tidy tidy = new Tidy();
tidy.setQuiet( true );
tidy.setNumEntities( true );
tidy.setShowWarnings( false );
StringWriter result = new StringWriter();
tidy.setMakeClean( true );
tidy.setXHTML( true );
tidy.parse( new StringReader( _html ), result );
return result.toString();
}
示例13: newTidy
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Configures a new JTidy instance.
*/
private static Tidy newTidy() {
Tidy tidy = new Tidy();
tidy.setMessageListener(new TidyMessageListener() {
@Override
public void messageReceived(TidyMessage msg) {
logger.warn(String.format("HTML warning at %s:%s: %s", msg.getLine(), msg.getColumn(), msg.getMessage()));
}
});
tidy.setDropEmptyParas(false);
tidy.setDropFontTags(false);
tidy.setDropProprietaryAttributes(false);
tidy.setTrimEmptyElements(false);
tidy.setXHTML(true);
tidy.setIndentAttributes(false);
tidy.setIndentCdata(false);
tidy.setIndentContent(false);
tidy.setQuiet(true);
tidy.setShowWarnings(!Options.isQuietEnabled());
tidy.setShowErrors(0);
tidy.setEncloseBlockText(false);
tidy.setEscapeCdata(false);
tidy.setDocType("omit");
tidy.setInputEncoding("UTF-8");
tidy.setRawOut(true);
tidy.setOutputEncoding("UTF-8");
tidy.setFixUri(false);
Properties prop = new Properties();
prop.put("new-blocklevel-tags", "canvas");
tidy.getConfiguration().addProps(prop);
return tidy;
}
示例14: convertHtmlToXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Diese Methode nutzt den TidyParser, um HTML zu korrektem XHTML zu wandeln
*
* @param html Der HTML code
* @return Der generierte XHTML Code
*/
// Author: Marco Dörfler
public static String convertHtmlToXhtml(String html) {
// Fehlerausgaben unterdrücken
PrintStream errStream = System.err;
System.setErr(new PrintStream(new OutputStream() {
@Override
public void write(int b) throws IOException {
// Nichts geschieht....
}
}));
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setCharEncoding(Configuration.UTF8);
ByteArrayInputStream inputStream = new ByteArrayInputStream(html.getBytes
(StandardCharsets.UTF_8));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.parseDOM(inputStream, outputStream);
// Fehlerausgaben wieder zulassen
System.setErr(errStream);
try {
return outputStream.toString(StandardCharsets.UTF_8.toString());
} catch (UnsupportedEncodingException e) {
return html;
}
}
示例15: parseFileToDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Return a document based on file,
* Document tpe
*
* @param file
* @return
*/
public Document parseFileToDocument(File file, Boolean setXML, Boolean setXHTML, Boolean setParseMark, String docType){
tidy = new Tidy();
//default format is XML
tidy.setXmlOut(setXML);
//set if extensible html
tidy.setXHTML(setXHTML);
//this removes the tidy meta tag in the header
tidy.setTidyMark(setParseMark);
tidy.setDocType(docType);
FileInputStream fis;
Document document = null;
try {
fis = new FileInputStream(file);
//parse input stream and return a DOM Document
document = tidy.parseDOM(fis, null);
fis.close();
} catch (Exception e) {
throw new RuntimeException("Unable to parse the file :" + file.getAbsolutePath() + " throws", e);
}
return document;
}