本文整理匯總了Java中org.w3c.tidy.Tidy.parse方法的典型用法代碼示例。如果您正苦於以下問題:Java Tidy.parse方法的具體用法?Java Tidy.parse怎麽用?Java Tidy.parse使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.w3c.tidy.Tidy
的用法示例。
在下文中一共展示了Tidy.parse方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: cleanNfo
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Try to clean the NFO(XML) content with JTidy.
*
* @param sourceNfoContent
* the XML content to be cleaned
* @return the cleaned XML content (or the source, if any Exceptions occur)
*/
public static String cleanNfo(String sourceNfoContent) {
try {
Tidy tidy = new Tidy();
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setWraplen(Integer.MAX_VALUE);
tidy.setXmlOut(true);
tidy.setSmartIndent(true);
tidy.setXmlTags(true);
tidy.setMakeClean(true);
tidy.setForceOutput(true);
tidy.setQuiet(true);
tidy.setShowWarnings(false);
StringReader in = new StringReader(sourceNfoContent);
StringWriter out = new StringWriter();
tidy.parse(in, out);
return out.toString();
}
catch (Exception e) {
}
return sourceNfoContent;
}
示例2: htmlOutputStreamForISOEncoding
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* To Output html Stream for ISO Encoding.
*
* @param pathOfHOCRFile String
* @param outputFilePath String
* @return FileWriter
* @throws IOException
*/
public static void htmlOutputStreamForISOEncoding(final String pathOfHOCRFile, final String outputFilePath) throws IOException {
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setDocType(DOC_TYPE_OMIT);
tidy.setInputEncoding(ISO_ENCODING);
tidy.setOutputEncoding(ISO_ENCODING);
tidy.setHideEndTags(false);
FileInputStream inputStream = null;
FileWriter outputStream = null;
try {
inputStream = new FileInputStream(pathOfHOCRFile);
outputStream = new FileWriter(outputFilePath);
tidy.parse(inputStream, outputStream);
} finally {
if (null != inputStream) {
inputStream.close();
}
if (null != outputStream) {
outputStream.flush();
outputStream.close();
}
}
}
示例3: run
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* start the tidification
*/
@Override
public void run() {
URL url;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(xmlOut);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true));
url = new URL(strUrl);
in = new BufferedInputStream(url.openStream());
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
}
catch ( IOException e ) {
log.warn( this.toString() + e.toString() );
}
}
示例4: cleanupHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String cleanupHtml(String story) {
Tidy tidy = new Tidy();
tidy.setInputEncoding(ENCODING);
tidy.setOutputEncoding(ENCODING);
tidy.setPrintBodyOnly(true);
tidy.setXmlOut(true);
tidy.setSmartIndent(false);
tidy.setBreakBeforeBR(false);
tidy.setMakeBare(true);
tidy.setMakeClean(true);
tidy.setNumEntities(true);
tidy.setWraplen(0);
StringWriter writer = new StringWriter();
StringReader reader = new StringReader(story);
tidy.parse(reader, writer);
return writer.toString();
}
示例5: getXHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String getXHTML(String html){
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setMakeClean(true);
tidy.setShowWarnings(false);
tidy.setShowErrors(0);
tidy.setQuiet(true);
tidy.setPrintBodyOnly(true);
tidy.setOutputEncoding("ISO-8859-1");
StringWriter stringWriter = new StringWriter();
tidy.parse(new StringReader(html), stringWriter);
return stringWriter.toString();
}
示例6: convert
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void convert() {
URL u;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
try {
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName),true));
u = new URL(url);
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName);
tidy.parse(in, out);
in.close();
out.close();
} catch (IOException e) {
System.out.println(this.toString() + e.toString());
}
}
示例7: validate
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Validate the HTML content received after executed partnership operation. The content
* is passed as a input stream <code>ins</code>.
* <br/><br/>
* This operation is quite expensive because it first transform the whole HTML content
* received to a well-formed XHTML before parsing by the SAX Parser.
*
* @param ins The HTML content to validate the result of partnership operation
* @throws SAXException
* <ol>
* <li>When unable to down-load the HTML DTD from the web. Check your Internet connectivity</li>
* <li>When IO related problems occur</li>
* </ol>
* @throws ParserConfigurationException
* When SAX parser mis-configures.
*/
public void validate(InputStream ins) throws SAXException, ParserConfigurationException
{
if (ins == null)
throw new NullPointerException("Missing 'input stream' for validation");
try{
// TODO: SLOW, It requires two full-scan transformation to find the result of the partnership operation.
ByteArrayOutputStream baos = new ByteArrayOutputStream();
/* Transforms to well-formed XHTML */
Tidy t = new Tidy();
t.setXHTML(true); t.setQuiet(true); t.setShowWarnings(false);
t.parse(ins, baos);
// For debug purpose
// System.out.println(hk.hku.cecid.piazza.commons.io.IOHandler.readString(ins, null));
// System.out.println("Test: " + new String(baos.toByteArray(), "UTF-8"));
/* Pipe to another input stream */
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
// Create a custom SAX handler for parsing the partnership op result from the HTML.
PageletContentVerifer verifer = new PageletContentVerifer();
// Create SAX parser for parsing the HTML coming back after executing partnership operation.
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
SAXParser parser = spf.newSAXParser();
parser.parse(bais, verifer);
boolean result = verifer.getIsVerifiedWithNoError();
if (!result) throw new SAXException("Fail to execute partnership operation as : " + verifer.getVerifiedMessage());
}
catch(ConnectException cex){
cex.printStackTrace();
throw new SAXException("Seems unable to download correct DTD from the web, behind proxy/firewall?", cex);
}
catch(IOException ioex){
throw new SAXException("IO Error during SAX parsing.", ioex);
}
}
示例8: formatXml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String formatXml(@NotNull String xml) throws TransformerException {
StringReader stringReader = new StringReader(xml);
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setTidyMark(false);
tidy.setForceOutput(true);
tidy.setSmartIndent(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
StringWriter stringWriter = new StringWriter();
tidy.parse(stringReader, stringWriter);
return stringWriter.toString();
}
示例9: formatHtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public static String formatHtml(String html) {
StringReader stringReader = new StringReader(html);
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setInputEncoding("UTF-8");
tidy.setOutputEncoding("UTF-8");
tidy.setTidyMark(false);
tidy.setSmartIndent(true);
tidy.setForceOutput(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
StringWriter stringWriter = new StringWriter();
tidy.parse(stringReader, stringWriter);
return stringWriter.toString();
}
示例10: htmlOutputStreamViaTidy
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* To Output html Stream via Tidy.
*
* @param pathOfHOCRFile String
* @param outputFilePath String
* @throws IOException
*/
public static void htmlOutputStreamViaTidy(final String pathOfHOCRFile, final String outputFilePath) throws IOException {
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.setDocType(DOC_TYPE_OMIT);
tidy.setInputEncoding(UTF_ENCODING);
tidy.setOutputEncoding(UTF_ENCODING);
tidy.setForceOutput(true);
tidy.setWraplen(0);
FileInputStream inputStream = null;
OutputStream fout = null;
OutputStream bout = null;
OutputStreamWriter out = null;
try {
/*
* Fix for UTF-8 encoding to support special characters in turkish and czech language. UTF-8 encoding supports major
* characters in all the languages
*/
fout = new FileOutputStream(outputFilePath);
bout = new BufferedOutputStream(fout);
out = new OutputStreamWriter(bout, UTF_ENCODING);
inputStream = new FileInputStream(pathOfHOCRFile);
tidy.parse(inputStream, out);
} finally {
IOUtils.closeQuietly(inputStream);
IOUtils.closeQuietly(out);
IOUtils.closeQuietly(bout);
IOUtils.closeQuietly(fout);
}
}
示例11: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例12: ConverterXhtml
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
/**
* Method used to transform the data from the html file given as parameter to
* xhtml format file which will be stored in the second file given.
* @param fileHtml input html file path.
* @param fileXhtmlAux output xhtml file path.
* @throws Exception if the files can not be read or written.
*/
public void ConverterXhtml(String fileHtml, String fileXhtmlAux) throws Exception {
Tidy tidy = new Tidy();
FileInputStream in = new FileInputStream(fileHtml);
FileOutputStream out = new FileOutputStream(fileXhtmlAux);
tidy.setTidyMark(false);
tidy.setDocType("omit");
tidy.setAltText("");
tidy.setFixBackslash(true);
tidy.setFixComments(true);
tidy.setXmlPi(true);
tidy.setQuoteAmpersand(true);
tidy.setQuoteNbsp(true);
tidy.setNumEntities(true);
tidy.setXmlOut(true);
tidy.setWraplen(999);
tidy.setWriteback(true);
tidy.setQuoteMarks(true);
tidy.setLogicalEmphasis(true);
tidy.setEncloseText(true);
tidy.setHideEndTags(true);
tidy.setShowWarnings(false);
tidy.setQuiet(true);
tidy.setXHTML(true);
tidy.parse(in, out);
in.close();
out.close();
}
示例13: useJTidy
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private Document useJTidy(File file) throws IOException {
File xmlFile = new File(file.getParentFile(), "index.xml");
Tidy tidy = new Tidy();
tidy.setXHTML(true);
tidy.parse(new FileInputStream(file), new FileOutputStream(xmlFile));
Document document = getDocument(xmlFile);
xmlFile.deleteOnExit();
return document;
}
示例14: getXHTML
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
private String getXHTML( String _html ){
Tidy tidy = new Tidy();
tidy.setQuiet( true );
tidy.setNumEntities( true );
tidy.setShowWarnings( false );
StringWriter result = new StringWriter();
tidy.setMakeClean( true );
tidy.setXHTML( true );
tidy.parse( new StringReader( _html ), result );
return result.toString();
}
示例15: testRunFromUrlJobAndConf
import org.w3c.tidy.Tidy; //導入方法依賴的package包/類
public void testRunFromUrlJobAndConf() throws Throwable {
// first check if we have a connection
try {
InetAddress.getByName("eobjects.org");
} catch (UnknownHostException e) {
System.err.println("Skipping test " + getClass().getSimpleName() + "." + getName()
+ " since we don't seem to be able to reach eobjects.org");
e.printStackTrace();
return;
}
String filename = "target/test_run_from_url_job_and_conf.html";
Main.main(("-ot HTML -of " + filename + " -job http://eobjects.org/resources/example_repo/DC/jobs/random_number_generation.analysis.xml -conf http://eobjects.org/resources/example_repo/DC/conf.xml")
.split(" "));
File file = new File(filename);
assertTrue(file.exists());
String result = FileHelper.readFileAsString(file);
String[] lines = result.split("\n");
assertEquals("<html>", lines[1]);
Tidy tidy = new Tidy();
StringWriter writer = new StringWriter();
tidy.setTrimEmptyElements(false);
tidy.setErrout(new PrintWriter(writer));
tidy.parse(FileHelper.getReader(file), System.out);
String parserOutput = writer.toString();
assertTrue("Parser output was:\n" + parserOutput,
parserOutput.indexOf("no warnings or errors were found") != -1);
}