当前位置: 首页>>代码示例>>Java>>正文


Java HtmlCleaner.getProperties方法代码示例

本文整理汇总了Java中org.htmlcleaner.HtmlCleaner.getProperties方法的典型用法代码示例。如果您正苦于以下问题:Java HtmlCleaner.getProperties方法的具体用法?Java HtmlCleaner.getProperties怎么用?Java HtmlCleaner.getProperties使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.htmlcleaner.HtmlCleaner的用法示例。


在下文中一共展示了HtmlCleaner.getProperties方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createHtmlCleaner

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
private static HtmlCleaner createHtmlCleaner() {
    HtmlCleaner result = new HtmlCleaner();
    CleanerProperties cleanerProperties = result.getProperties();

    cleanerProperties.setAdvancedXmlEscape(true);

    cleanerProperties.setOmitXmlDeclaration(true);
    cleanerProperties.setOmitDoctypeDeclaration(false);

    cleanerProperties.setTranslateSpecialEntities(true);
    cleanerProperties.setTransResCharsToNCR(true);
    cleanerProperties.setRecognizeUnicodeChars(true);

    cleanerProperties.setIgnoreQuestAndExclam(true);
    cleanerProperties.setUseEmptyElementTags(false);

    cleanerProperties.setPruneTags("script,title");

    return result;
}
 
开发者ID:SysdataSpA,项目名称:SDHtmlTextView,代码行数:21,代码来源:HtmlSpanner.java

示例2: toHTML

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public static String toHTML( String htmlIn )
{
	try
    {
        HtmlCleaner cleaner = new HtmlCleaner();
        cleaner.getProperties().setNamespacesAware( true ); 
        
        XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );

        String htmlData = xmlSerializer.getAsString( htmlIn );
        
        htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
        
        return htmlData;

    }
    catch( Exception e )
    {
    	e.printStackTrace();
        return null;
    }
}
 
开发者ID:xframium,项目名称:xframium-java,代码行数:28,代码来源:XMLEscape.java

示例3: getHTML

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public byte[] getHTML(HSSFWorkbook book) throws IOException {
        double width = 21.0;
        double height = 29.7;
        if (isLandscape()) {
            width += height;
            height = width - height;
            width = width - height;
        }
        byte[] html = convert(book, width, height).getBytes();
        ByteArrayInputStream in = new ByteArrayInputStream(html);

        // Clean up the HTML to be well formed
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        TagNode node = cleaner.clean(in, "UTF-8");

//        ByteArrayOutputStream out = new ByteArrayOutputStream();
        // Instead of writing to System.out we now write to the ByteArray buffer
//        return 	new PrettyXmlSerializer(props).getAsString(node, "UTF-8").getBytes();
        ByteArrayOutputStream out = new ByteArrayOutputStream();
        // Instead of writing to System.out we now write to the ByteArray buffer
        new PrettyXmlSerializer(props).writeToStream(node, out);

        return out.toByteArray();
    }
 
开发者ID:rmage,项目名称:gnvc-ims,代码行数:26,代码来源:ReportModel.java

示例4: toHTML

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
/**
 * Converts a given xml to HTML String
 * @param htmlIn - xml in String
 * @return String - in HTML format
 */
public InputStream toHTML( InputStream htmlIn )
{
    try
    {
        

        
        byte[] buffer = new byte[ 512 ];
        int bytesRead = 0;
        
        StringBuilder sB = new StringBuilder();
        while ( (bytesRead = htmlIn.read( buffer ) ) != -1 )
        {
            sB.append( new String( buffer, 0, bytesRead ) );
        }
        
        if ( sB.indexOf( "html" ) != -1 )
        {
        
            HtmlCleaner cleaner = new HtmlCleaner();
            cleaner.getProperties().setNamespacesAware( true ); 
            
            XmlSerializer xmlSerializer = new PrettyXmlSerializer( cleaner.getProperties(), "  " );
            String htmlData = xmlSerializer.getAsString( sB.toString() );
            
            htmlData = escapeXML( htmlData.replaceAll("(?m)^[ \t]*\r?\n", "") );
            
            htmlData = htmlData.replace( "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">", "" );
            return new ByteArrayInputStream( htmlData.getBytes() );
        }
        else
            return null;

    }
    catch( Exception e )
    {
        return null;
    }
}
 
开发者ID:xframium,项目名称:xframium-java,代码行数:45,代码来源:HTTPLinkCheck.java

示例5: htmlToWiki

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public static String htmlToWiki(String html, String contextPath, int projectId) throws Exception {

    // Strip the nbsp because it gets converted to unicode
    html = StringUtils.replace(html, "&nbsp;", " ");

    // Take the html create DOM for parsing
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    TagNode node = cleaner.clean(html);
    Document document = new DomSerializer(props, true).createDOM(node);
    if (LOG.isTraceEnabled()) {
      LOG.trace(html);
    }

    // Process each node and output the wiki equivalent
    StringBuffer sb = new StringBuffer();
    ArrayList<Node> nodeList = new ArrayList<Node>();
    for (int i = 0; i < document.getChildNodes().getLength(); i++) {
      Node n = document.getChildNodes().item(i);
      nodeList.add(n);
    }
    processChildNodes(nodeList, sb, 0, true, true, false, "", contextPath, projectId);
    if (sb.length() > 0) {
      String content = sb.toString().trim();
      if (content.contains("&apos;")) {
        // Determine if this is where the &apos; is being introduced
        content = StringUtils.replace(content, "&apos;", "'");
      }
      if (!content.endsWith(CRLF)) {
        return content + CRLF;
      } else {
        return content;
      }
    } else {
      return "";
    }
  }
 
开发者ID:Concursive,项目名称:concourseconnect-community,代码行数:38,代码来源:HTMLToWikiUtils.java

示例6: createHtmlCleaner

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
private static HtmlCleaner createHtmlCleaner() {
    HtmlCleaner result = new HtmlCleaner();
    CleanerProperties cleanerProperties = result.getProperties();
    cleanerProperties.setOmitXmlDeclaration(true);
    cleanerProperties.setOmitDoctypeDeclaration(false);
    cleanerProperties.setRecognizeUnicodeChars(true);
    cleanerProperties.setTranslateSpecialEntities(false);
    cleanerProperties.setIgnoreQuestAndExclam(true);
    cleanerProperties.setUseEmptyElementTags(false);
    return result;
}
 
开发者ID:DASAR,项目名称:epublib-android,代码行数:12,代码来源:HtmlCleanerBookProcessor.java

示例7: parseHhc

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public static List<TOCReference> parseHhc(InputStream hhcFile, Resources resources) throws IOException, ParserConfigurationException,	XPathExpressionException {
	HtmlCleaner htmlCleaner = new HtmlCleaner();
	CleanerProperties props = htmlCleaner.getProperties();
	TagNode node = htmlCleaner.clean(hhcFile);
	Document hhcDocument = new DomSerializer(props).createDOM(node);
	XPath xpath = XPathFactory.newInstance().newXPath();
	Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument
			.getDocumentElement(), XPathConstants.NODE);
	List<TOCReference> sections = processUlNode(ulNode, resources);
	return sections;
}
 
开发者ID:DASAR,项目名称:epublib-android,代码行数:12,代码来源:HHCParser.java

示例8: getStandardCredit

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public static ArrayList<String> getStandardCredit(String year, int index,
                                                  String department) throws Exception {
    try {
        ArrayList<String> standard = new ArrayList<>();
        HashMap<String, String> params = new HashMap<>();
        params.put("format", "-3");
        params.put("year", year);
        params.put("matric", matrics.get(index));
        String result = Connector
                .getDataByPost(getStandardUri(lang), params, "big5");
        result = result.replace("<td", "</td><td");
        result = result.replace("<tr>", "</td><tr>");
        HtmlCleaner cleaner = new HtmlCleaner();
        CleanerProperties props = cleaner.getProperties();
        props.setUseCdataForScriptAndStyle(true);
        props.setRecognizeUnicodeChars(true);
        props.setUseEmptyElementTags(true);
        props.setAdvancedXmlEscape(true);
        props.setTranslateSpecialEntities(true);
        props.setBooleanAttributeValues("empty");
        result = new PrettyHtmlSerializer(props).getAsString(result);
        TagNode tagNode = cleaner.clean(result);
        TagNode[] tables = tagNode.getElementsByAttValue("border", "1",
                true, false);
        TagNode[] rows = tables[0].getElementsByName("tr", true);
        for (int i = 1; i < rows.length; i++) {
            TagNode[] cols = rows[i].getElementsByName("td", true);
            String temp = cols[0].getText().toString();
            if (temp.replace(" ", "").replace("\n", "").contains(department.replace(" ", "").replace("\n", ""))) {
                for (int j = 1; j < 9; j++) {
                    String credit = Utility.cleanString(cols[j].getText()
                            .toString());
                    standard.add(credit);
                }
                return standard;
            }
        }
        throw new Exception();
    } catch (Exception e) {
        e.printStackTrace();
        throw new Exception("畢業學分標準讀取時發生錯誤");
    }
}
 
开发者ID:kamisakihideyoshi,项目名称:TaipeiTechRefined,代码行数:44,代码来源:CreditConnector.java

示例9: stripSignatureForHtmlMessage

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public static String stripSignatureForHtmlMessage(String content) {
    Matcher dashSignatureHtml = DASH_SIGNATURE_HTML.matcher(content);
    if (dashSignatureHtml.find()) {
        Matcher blockquoteStart = BLOCKQUOTE_START.matcher(content);
        Matcher blockquoteEnd = BLOCKQUOTE_END.matcher(content);
        List<Integer> start = new ArrayList<>();
        List<Integer> end = new ArrayList<>();

        while (blockquoteStart.find()) {
            start.add(blockquoteStart.start());
        }
        while (blockquoteEnd.find()) {
            end.add(blockquoteEnd.start());
        }
        if (start.size() != end.size()) {
            Log.d(K9.LOG_TAG, "There are " + start.size() + " <blockquote> tags, but " +
                    end.size() + " </blockquote> tags. Refusing to strip.");
        } else if (start.size() > 0) {
            // Ignore quoted signatures in blockquotes.
            dashSignatureHtml.region(0, start.get(0));
            if (dashSignatureHtml.find()) {
                // before first <blockquote>.
                content = content.substring(0, dashSignatureHtml.start());
            } else {
                for (int i = 0; i < start.size() - 1; i++) {
                    // within blockquotes.
                    if (end.get(i) < start.get(i + 1)) {
                        dashSignatureHtml.region(end.get(i), start.get(i + 1));
                        if (dashSignatureHtml.find()) {
                            content = content.substring(0, dashSignatureHtml.start());
                            break;
                        }
                    }
                }
                if (end.get(end.size() - 1) < content.length()) {
                    // after last </blockquote>.
                    dashSignatureHtml.region(end.get(end.size() - 1), content.length());
                    if (dashSignatureHtml.find()) {
                        content = content.substring(0, dashSignatureHtml.start());
                    }
                }
            }
        } else {
            // No blockquotes found.
            content = content.substring(0, dashSignatureHtml.start());
        }
    }

    // Fix the stripping off of closing tags if a signature was stripped,
    // as well as clean up the HTML of the quoted message.
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties properties = cleaner.getProperties();

    // see http://htmlcleaner.sourceforge.net/parameters.php for descriptions
    properties.setNamespacesAware(false);
    properties.setAdvancedXmlEscape(false);
    properties.setOmitXmlDeclaration(true);
    properties.setOmitDoctypeDeclaration(false);
    properties.setTranslateSpecialEntities(false);
    properties.setRecognizeUnicodeChars(false);

    TagNode node = cleaner.clean(content);
    SimpleHtmlSerializer htmlSerialized = new SimpleHtmlSerializer(properties);
    content = htmlSerialized.getAsString(node, "UTF8");
    return content;
}
 
开发者ID:scoute-dich,项目名称:K9-MailClient,代码行数:67,代码来源:QuotedMessageHelper.java

示例10: updateArtists

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public static Boolean updateArtists(StaticDataStore db){
	Logging.Log(LOG_TAG, "Fetching Artists");
	ArrayList<ArrayList<String>> artists = new ArrayList<ArrayList<String>>();

	HtmlCleaner pageParser = new HtmlCleaner();
	CleanerProperties props = pageParser.getProperties();
	props.setAllowHtmlInsideAttributes(true);
	props.setAllowMultiWordAttributes(true);
	props.setRecognizeUnicodeChars(true);
	props.setOmitComments(true);

	try {
		String url = "http://www.archive.org/browse.php?field=/metadata/bandWithMP3s&collection=etree";

		HttpParams params = new BasicHttpParams();
		int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS);
		HttpConnectionParams.setConnectionTimeout(params, timeout);
		HttpConnectionParams.setSoTimeout(params, timeout);
		HttpClient client = new DefaultHttpClient(params);

		HttpGet request = new HttpGet(url);
		HttpResponse response = client.execute(request);
		StatusLine status = response.getStatusLine();
		if (status.getStatusCode() == HttpStatus.SC_OK) {
			ResponseHandler<String> responseHandler = new BasicResponseHandler();
			TagNode node = pageParser.clean(responseHandler.handleResponse(response));
			client.getConnectionManager().shutdown();

			org.w3c.dom.Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
			XPath xpath = XPathFactory.newInstance().newXPath();
			NodeList artistNodes = (NodeList) xpath.evaluate("//div[@class='row']//div[@class='col-sm-4']/a", doc, XPathConstants.NODESET);
			NodeList numberNodes = (NodeList) xpath.evaluate("//div[@class='row']//div[@class='col-sm-4']/text()[preceding-sibling::a]", doc, XPathConstants.NODESET);
			Logging.Log(LOG_TAG, "artistNodes: " + artistNodes.getLength());
			Logging.Log(LOG_TAG, "numberNodes: " + numberNodes.getLength());

			if(artistNodes.getLength() == numberNodes.getLength()){
				for (int i = 0; i < artistNodes.getLength(); i++) {
					ArrayList<String> artistPair = new ArrayList<String>();
					artistPair.add(artistNodes.item(i).getTextContent().replace("&apos;", "'").replace("&gt;", ">").replace("&lt;", "<").replace("&quot;", "\"").replace("&amp;", "&"));
					artistPair.add(numberNodes.item(i).getTextContent());
					artists.add(artistPair);
				}
			}
			if (artists.size() > 0) {
				db.insertArtistBulk(artists);
				String s = DateFormat.format("yyyy-MM-dd", new GregorianCalendar().getTime()).toString();
				db.updatePref("artistUpdate", s);
				Logging.Log(LOG_TAG, "Finished Fetching Artists");
			}
			else {
				Logging.Log(LOG_TAG, "Error Fetching Artists");
			}
		}
		else {
			client.getConnectionManager().shutdown();
		}
	} catch(Exception e) {
		e.printStackTrace();
		Logging.Log(LOG_TAG, "Error Fetching Artists");
	}
	return true;

}
 
开发者ID:sedenardi,项目名称:vibevault,代码行数:64,代码来源:Searching.java

示例11: HtmlCleanerParser

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public HtmlCleanerParser() {
	super(HtmlParserEnum.HtmlCleanerParser);
	cleaner = new HtmlCleaner();
	CleanerProperties props = cleaner.getProperties();
	props.setNamespacesAware(true);
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:7,代码来源:HtmlCleanerParser.java

示例12: cleanHTML

import org.htmlcleaner.HtmlCleaner; //导入方法依赖的package包/类
public void cleanHTML(String path, String out, String encoding) throws IOException {

	HtmlCleaner cleaner = new HtmlCleaner();

	CleanerProperties props = cleaner.getProperties();

	CleanerTransformations transformations = new CleanerTransformations();

	AttributeTransformationPatternImpl attPattern = new AttributeTransformationPatternImpl(
			Pattern.compile("^\\s*class", Pattern.CASE_INSENSITIVE), null,
			null);
	transformations.addGlobalTransformation(attPattern);

	AttributeTransformationPatternImpl attPattern2 = new AttributeTransformationPatternImpl(
			Pattern.compile("^\\s*id", Pattern.CASE_INSENSITIVE), null,
			null);
	transformations.addGlobalTransformation(attPattern2);

	props.setCleanerTransformations(transformations);

	// set some properties to non-default values
	props.setTranslateSpecialEntities(true);
	props.setTransResCharsToNCR(false);
	props.setOmitComments(true);
	props.setPruneTags("script,style,img,form");
	
	

	// do parsing
	TagNode tagNode = new HtmlCleaner(props)
			.clean(new File(path), encoding);

	tagNode.removeAttribute("class");

	// serialize to xml file
	new PrettyHtmlSerializer(props).writeToFile(tagNode,
			out, "utf-8");

}
 
开发者ID:fauconnier,项目名称:LaToe,代码行数:40,代码来源:HTML_Service.java


注:本文中的org.htmlcleaner.HtmlCleaner.getProperties方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。