本文整理匯總了Java中org.w3c.tidy.Tidy.setXmlOut方法的典型用法代碼示例。如果您正苦於以下問題:Java Tidy.setXmlOut方法的具體用法?Java Tidy.setXmlOut怎麽用?Java Tidy.setXmlOut使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.w3c.tidy.Tidy
的用法示例。
在下文中一共展示了Tidy.setXmlOut方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: cleanNfo
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Try to clean the NFO(XML) content with JTidy.
*
* @param sourceNfoContent
* the XML content to be cleaned
* @return the cleaned XML content (or the source, if any Exceptions occur)
*/
public static String cleanNfo(String sourceNfoContent) {
try {
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
tidy.setXmlOut(true);
tidy.setSmartIndent(true);
tidy.setXmlTags(true);
tidy.setMakeClean(true);
tidy.setForceOutput(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
StringReader in = new StringReader(sourceNfoContent);
StringWriter out = new StringWriter();
tidy.parse(in, out);
return out.toString();
}
catch (Exception e) {
}
return sourceNfoContent;
}
示例2: run
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* start the tidification
*/
@Override
public void run() {
URL url;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(xmlOut);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
url = new URL(strUrl);
in = new BufferedInputStream(url.openStream());
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
}
catch ( IOException e ) {
log.warn( this.toString() + e.toString() );
}
}
示例3: cleanupHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String cleanupHtml(String story) {
Tidy tidy = new Tidy();
tidy.setInputEncoding(ENCODING);
tidy.setOutputEncoding(ENCODING);
tidy.setPrintBodyOnly(true);
tidy.setXmlOut(true);
tidy.setSmartIndent(false);
tidy.setBreakBeforeBR(false);
tidy.setMakeBare(true);
tidy.setMakeClean(true);
tidy.setNumEntities(true);
tidy.setWraplen(0);
StringWriter writer = new StringWriter();
StringReader reader = new StringReader(story);
tidy.parse(reader, writer);
return writer.toString();
}
示例4: cleanXMLData
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String cleanXMLData(String data) throws UnsupportedEncodingException {
// data = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"+data;
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
// tidy.setPrintBodyOnly(true);
tidy.setXmlOut(true);
tidy.setXmlTags(true);
tidy.setSmartIndent(true);
tidy.setMakeClean(true);
tidy.setForceOutput(true);
ByteArrayInputStream inputStream = new ByteArrayInputStream(data.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.parseDOM(inputStream, outputStream);
return outputStream.toString("UTF-8");
}
示例5: convert
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void convert() {
URL u;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName),true));
u = new URL(url);
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
in.close();
out.close();
} catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
示例6: formatXml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String formatXml(@NotNull String xml) throws TransformerException {
StringReader stringReader = new StringReader(xml);
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setTidyMark(false);
tidy.setForceOutput(true);
tidy.setSmartIndent(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
StringWriter stringWriter = new StringWriter();
tidy.parse(stringReader, stringWriter);
return stringWriter.toString();
}
示例7: beautyHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String beautyHTML(String html) throws UnsupportedEncodingException {
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
tidy.setXmlOut(true);
tidy.setXmlTags(true);
tidy.setSmartIndent(true);
ByteArrayInputStream inputStream = new ByteArrayInputStream(html.getBytes("UTF-8"));
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Document doc = tidy.parseDOM(inputStream, null);
tidy.pprint(doc, outputStream);
return outputStream.toString("UTF-8");
}
示例8: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例9: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Method used to transform the data from the html file given as parameter to
* xhtml format file which will be stored in the second file given.
* @param fileHtml input html file path.
* @param fileXhtmlAux output xhtml file path.
* @throws Exception if the files can not be read or written.
*/
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例10: initializeTidyBuilder
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Initializes the tidy document builder.
*/
private void initializeTidyBuilder() {
tidyBuilder = new Tidy();
tidyBuilder.setInputEncoding("UTF-8");
tidyBuilder.setOutputEncoding("UTF-8");
tidyBuilder.setXmlOut(true);
tidyBuilder.setShowWarnings(false);
tidyBuilder.setQuiet(true);
tidyBuilder.setDropEmptyParas(false);
tidyBuilder.setTidyMark(false);
tidyBuilder.setFixComments(false);
tidyBuilder.setTrimEmptyElements(false);
tidyBuilder.setJoinStyles(false);
tidyBuilder.setXmlTags(true); // important, otherwise jtidy manipulates the markup
}
示例11: tidyDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
protected ByteArrayInputStream tidyDocument(InputStream inputStream) throws FileNotFoundException, IOException {
Tidy tidy = new Tidy();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
tidy.setTrimEmptyElements(true);
tidy.setMakeClean(true);
tidy.setQuoteNbsp(true);
tidy.setXmlOut(true);
tidy.setInputEncoding("UTF-8");
tidy.parseDOM(inputStream, outputStream);
LOGGER.trace(outputStream.toString("UTF-8"));
return new ByteArrayInputStream(outputStream.toByteArray());
}
示例12: getTidyHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Clean HTML document and return XML as byte array
*
* @param resourceMap map of resources
* @param resID unique ID of resource
* @return clean XHTML document as {@code byte[]}
* @throws IOException
*/
private byte[] getTidyHtml(PandaSettings pandaSettings, String resID) throws IOException {
byte[] doc = null;
// Get local path to file, if null the URL field will be used to
// retrieve resource
ResourceInfo resInfo = pandaSettings.getResourceMap().getMap().get(resID);
String filePath = resInfo.getFilePath();
// properties for HTML cleaning
Tidy tidy = new Tidy();
// no output of warnings/errors
tidy.setQuiet(true);
tidy.setShowWarnings(false);
tidy.setHideEndTags(true);
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
// set output to XML
tidy.setXmlOut(true);
// get HTML document, parse HTML
InputStream htmlDoc = null;
if (filePath != null) {
htmlDoc = new FileInputStream(filePath);
} else {
// Get online resource
URL resURL = pandaSettings.getResourceMap().getMap().get(resID).getURL();
htmlDoc = getOnlineResource(resURL);
}
ByteArrayOutputStream out = new ByteArrayOutputStream();
tidy.parse(htmlDoc, out);
doc = out.toByteArray();
return doc;
}
示例13: parseFileToDocument
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Return a document based on file,
* Document tpe
*
* @param file
* @return
*/
public Document parseFileToDocument(File file, Boolean setXML, Boolean setXHTML, Boolean setParseMark, String docType){
tidy = new Tidy();
//default format is XML
tidy.setXmlOut(setXML);
//set if extensible html
tidy.setXHTML(setXHTML);
//this removes the tidy meta tag in the header
tidy.setTidyMark(setParseMark);
tidy.setDocType(docType);
FileInputStream fis;
Document document = null;
try {
fis = new FileInputStream(file);
//parse input stream and return a DOM Document
document = tidy.parseDOM(fis, null);
fis.close();
} catch (Exception e) {
throw new RuntimeException("Unable to parse the file :" + file.getAbsolutePath() + " throws", e);
}
return document;
}
示例14: valueOf
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public String valueOf(final String string) {
if (StringUtils.isBlank(string)) {
return removeBlankspaces ? null : string;
}
final Tidy tidy = new Tidy(); // obtain a new Tidy instance
tidy.setXHTML(false); // set desired config options using tidy setters
tidy.setQuiet(true);
tidy.setShowErrors(0);
tidy.setShowWarnings(false);
tidy.setIndentContent(false);
tidy.setXmlOut(true);
final Document document = tidy.parseDOM(new StringReader(string), null);
removeBadNodes(document);
final NodeList bodies = document.getElementsByTagName("body");
if (bodies.getLength() == 0) {
// No body element? return null
return null;
} else {
// Result will contain the xml header plus the body element itself. We need to body content only
String result = XmlHelper.toString(bodies.item(0));
result = result.substring(result.indexOf("<body>") + "<body>".length(), result.indexOf("</body>"));
// Remove the nbsps
if (removeBlankspaces) {
int begin = 0;
while (result.charAt(begin) == NBSP) {
begin++;
if (begin == result.length()) {
// All the string was NBSPs
return null;
}
}
int end = result.length();
while (result.charAt(end - 1) == NBSP) {
end--;
}
return StringUtils.trimToNull(result.substring(begin, end));
} else {
return StringUtils.trimToNull(result);
}
}
}
示例15: convertToXMLOLD
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String convertToXMLOLD(String xml){
//======================================
// String newXML = xml;
// String xml1 = "";
// String xml2 = "";
// String html = "";
// if (xml.indexOf("<text>")>-1) {
// xml1 = xml.substring(0,xml.indexOf("<text>")+6);
// xml2 = xml.substring(xml.indexOf("</text>"));
// html = xml.substring(xml.indexOf("<text>")+6,xml.indexOf("</text>"));
// }
// if (xml.indexOf("<comment>")>-1) {
// xml1 = xml.substring(0,xml.indexOf("<comment>")+9);
// xml2 = xml.substring(xml.indexOf("</comment>"));
// html = xml.substring(xml.indexOf("<comment>")+9,xml.indexOf("</comment>"));
// }
// if (xml.indexOf("<description>")>-1) {
// xml1 = xml.substring(0,xml.indexOf("<description>")+13);
// xml2 = xml.substring(xml.indexOf("</description>"));
// html = xml.substring(xml.indexOf("<description>")+13,xml.indexOf("</description>"));
// }
// if (html.length()>0) { // xml is html
// html = "<!DOCTYPE HTML PUBLIC '-//W3C//DTD HTML 4.01 Transitional//EN' 'http://www.w3.org/TR/html4/loose.dtd'><head><title></title></head><body>" +html+"</body>";
StringReader in = new StringReader(xml);
StringWriter out = new StringWriter();
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
tidy.setPrintBodyOnly(true);
tidy.setMakeClean(true);
// tidy.setForceOutput(true);
tidy.setSmartIndent(true);
tidy.setXmlTags(true);
tidy.setXmlOut(true);
// tidy.setWraplen(0);
tidy.parseDOM(in, out);
String newXML = out.toString();
// newXML = xml1+newHTML.substring(newHTML.indexOf("<body>")+6,newHTML.indexOf("</body>"))+xml2;
// } else {
// newXML =xml;
// }
// return newXML;
return newXML;
}