当前位置: 首页>>代码示例>>Java>>正文


Java DomSerializer类代码示例

本文整理汇总了Java中org.htmlcleaner.DomSerializer的典型用法代码示例。如果您正苦于以下问题:Java DomSerializer类的具体用法?Java DomSerializer怎么用?Java DomSerializer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


DomSerializer类属于org.htmlcleaner包,在下文中一共展示了DomSerializer类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: isHealthy

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
@Override
public boolean isHealthy() {
    HttpGet getMethod = new HttpGet(GET_REQUEST_URL);

    CloseableHttpResponse response = null;
    CloseableHttpClient httpClient = null;
    try {
        httpClient = HttpClientBuilder.create().build();
        response = httpClient.execute(getMethod);
        int statusCode = response.getStatusLine().getStatusCode();

        if (statusCode != HttpStatus.SC_OK) {
            LOG.info("Health check failed, got response code: %d", statusCode);
            return false;
        }

        String htmlContents = EntityUtils.toString(response.getEntity());
        TagNode tagNode = new HtmlCleaner().clean(htmlContents);
        Document doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

        XPath xpath = XPathFactory.newInstance().newXPath();
        String submitId = (String) xpath.evaluate(XPATH_TO_SUBMIT_ID, doc, XPathConstants.STRING);

        if (StringUtils.isBlank(submitId)) {
            LOG.info("Health check failed, submitId token was null or empty.");
            return false;
        }

    } catch (Throwable t) {
        LOG.info("Health check failed, exception thrown: %s", t.getMessage());
    } finally {
        closeHttpObjects(response, httpClient);
    }

    return true;
}
 
开发者ID:sgskinner,项目名称:StashThisBot,代码行数:37,代码来源:ArchiveIsServiceImpl.java

示例2: testXpathExtraction

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
@Test
public void testXpathExtraction() throws IOException, ParserConfigurationException, XPathExpressionException {
    byte[] encoded = Files.readAllBytes(Paths.get("src/test/resources/raw_data/archive.is.html"));
    String htmlContents = new String(encoded, StandardCharsets.UTF_8);

    TagNode tagNode = new HtmlCleaner().clean(htmlContents);
    Document doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);

    XPath xpath = XPathFactory.newInstance().newXPath();
    String str = (String) xpath.evaluate("//*[@id=\"submiturl\"]/input/@value", doc, XPathConstants.STRING);

    String actualValue = "YHuwL/nTgL370PMDM2G2vkuvMg3kmNqk/y/i7NRSaLyf2JSIU+/now+AYw+X0nX8";
    Assert.assertTrue("Did not extract expected value!", str.equals(actualValue));
}
 
开发者ID:sgskinner,项目名称:StashThisBot,代码行数:15,代码来源:ArchiveIsServiceTest.java

示例3: htmlToWiki

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
public static String htmlToWiki(String html, String contextPath, int projectId) throws Exception {

    // Strip the nbsp because it gets converted to unicode
    html = StringUtils.replace(html, " ", " ");

    // Take the html create DOM for parsing
    HtmlCleaner cleaner = new HtmlCleaner();
    CleanerProperties props = cleaner.getProperties();
    TagNode node = cleaner.clean(html);
    Document document = new DomSerializer(props, true).createDOM(node);
    if (LOG.isTraceEnabled()) {
      LOG.trace(html);
    }

    // Process each node and output the wiki equivalent
    StringBuffer sb = new StringBuffer();
    ArrayList<Node> nodeList = new ArrayList<Node>();
    for (int i = 0; i < document.getChildNodes().getLength(); i++) {
      Node n = document.getChildNodes().item(i);
      nodeList.add(n);
    }
    processChildNodes(nodeList, sb, 0, true, true, false, "", contextPath, projectId);
    if (sb.length() > 0) {
      String content = sb.toString().trim();
      if (content.contains("&apos;")) {
        // Determine if this is where the &apos; is being introduced
        content = StringUtils.replace(content, "&apos;", "'");
      }
      if (!content.endsWith(CRLF)) {
        return content + CRLF;
      } else {
        return content;
      }
    } else {
      return "";
    }
  }
 
开发者ID:Concursive,项目名称:concourseconnect-community,代码行数:38,代码来源:HTMLToWikiUtils.java

示例4: parseHhc

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
public static List<TOCReference> parseHhc(InputStream hhcFile, Resources resources) throws IOException, ParserConfigurationException,	XPathExpressionException {
	HtmlCleaner htmlCleaner = new HtmlCleaner();
	CleanerProperties props = htmlCleaner.getProperties();
	TagNode node = htmlCleaner.clean(hhcFile);
	Document hhcDocument = new DomSerializer(props).createDOM(node);
	XPath xpath = XPathFactory.newInstance().newXPath();
	Node ulNode = (Node) xpath.evaluate("body/ul", hhcDocument
			.getDocumentElement(), XPathConstants.NODE);
	List<TOCReference> sections = processUlNode(ulNode, resources);
	return sections;
}
 
开发者ID:DASAR,项目名称:epublib-android,代码行数:12,代码来源:HHCParser.java

示例5: getHtmlDocumentModel

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
public static Document getHtmlDocumentModel(String htmlContent) {

        try {
            TagNode tagNode = new HtmlCleaner().clean(htmlContent);
            Document doc;
            try {
                doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
            } catch (ParserConfigurationException e) {
                throw new RuntimeException(e);
            }
            return doc;
        } catch (RuntimeException rte) {
            return null;
        }
    }
 
开发者ID:trywildcard,项目名称:pair-java,代码行数:16,代码来源:HtmlParserUtil.java

示例6: HtmlXpathSelector

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
public HtmlXpathSelector(String content) throws ParserConfigurationException, SAXException, IOException
{

	HtmlCleaner htmlCleaner = new HtmlCleaner();  
	TagNode rootTagNode = htmlCleaner.clean(content);  
	rootDocument = new DomSerializer(new CleanerProperties()).createDOM(rootTagNode);
	xPath=XPathFactory.newInstance().newXPath();
	
}
 
开发者ID:hxt168,项目名称:webpasser,代码行数:10,代码来源:HtmlXpathSelector.java

示例7: getDomHtmlNode

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
private DomHtmlNode getDomHtmlNode() throws ParserConfigurationException {
	Document document = new DomSerializer(cleaner.getProperties(), true).createDOM(rootTagNode);
	String lang = rootTagNode.getAttributeByName("lang");
	if (lang != null)
		document.getDocumentElement().setAttribute("lang", lang);
	return new DomHtmlNode(document);
}
 
开发者ID:jaeksoft,项目名称:opensearchserver,代码行数:8,代码来源:HtmlCleanerParser.java

示例8: updateArtists

import org.htmlcleaner.DomSerializer; //导入依赖的package包/类
public static Boolean updateArtists(StaticDataStore db){
	Logging.Log(LOG_TAG, "Fetching Artists");
	ArrayList<ArrayList<String>> artists = new ArrayList<ArrayList<String>>();

	HtmlCleaner pageParser = new HtmlCleaner();
	CleanerProperties props = pageParser.getProperties();
	props.setAllowHtmlInsideAttributes(true);
	props.setAllowMultiWordAttributes(true);
	props.setRecognizeUnicodeChars(true);
	props.setOmitComments(true);

	try {
		String url = "http://www.archive.org/browse.php?field=/metadata/bandWithMP3s&collection=etree";

		HttpParams params = new BasicHttpParams();
		int timeout = (int) (15 * DateUtils.SECOND_IN_MILLIS);
		HttpConnectionParams.setConnectionTimeout(params, timeout);
		HttpConnectionParams.setSoTimeout(params, timeout);
		HttpClient client = new DefaultHttpClient(params);

		HttpGet request = new HttpGet(url);
		HttpResponse response = client.execute(request);
		StatusLine status = response.getStatusLine();
		if (status.getStatusCode() == HttpStatus.SC_OK) {
			ResponseHandler<String> responseHandler = new BasicResponseHandler();
			TagNode node = pageParser.clean(responseHandler.handleResponse(response));
			client.getConnectionManager().shutdown();

			org.w3c.dom.Document doc = new DomSerializer(new CleanerProperties()).createDOM(node);
			XPath xpath = XPathFactory.newInstance().newXPath();
			NodeList artistNodes = (NodeList) xpath.evaluate("//div[@class='row']//div[@class='col-sm-4']/a", doc, XPathConstants.NODESET);
			NodeList numberNodes = (NodeList) xpath.evaluate("//div[@class='row']//div[@class='col-sm-4']/text()[preceding-sibling::a]", doc, XPathConstants.NODESET);
			Logging.Log(LOG_TAG, "artistNodes: " + artistNodes.getLength());
			Logging.Log(LOG_TAG, "numberNodes: " + numberNodes.getLength());

			if(artistNodes.getLength() == numberNodes.getLength()){
				for (int i = 0; i < artistNodes.getLength(); i++) {
					ArrayList<String> artistPair = new ArrayList<String>();
					artistPair.add(artistNodes.item(i).getTextContent().replace("&apos;", "'").replace("&gt;", ">").replace("&lt;", "<").replace("&quot;", "\"").replace("&amp;", "&"));
					artistPair.add(numberNodes.item(i).getTextContent());
					artists.add(artistPair);
				}
			}
			if (artists.size() > 0) {
				db.insertArtistBulk(artists);
				String s = DateFormat.format("yyyy-MM-dd", new GregorianCalendar().getTime()).toString();
				db.updatePref("artistUpdate", s);
				Logging.Log(LOG_TAG, "Finished Fetching Artists");
			}
			else {
				Logging.Log(LOG_TAG, "Error Fetching Artists");
			}
		}
		else {
			client.getConnectionManager().shutdown();
		}
	} catch(Exception e) {
		e.printStackTrace();
		Logging.Log(LOG_TAG, "Error Fetching Artists");
	}
	return true;

}
 
开发者ID:sedenardi,项目名称:vibevault,代码行数:64,代码来源:Searching.java


注:本文中的org.htmlcleaner.DomSerializer类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。