本文整理汇总了PHP中tidy::html方法的典型用法代码示例。如果您正苦于以下问题:PHP tidy::html方法的具体用法?PHP tidy::html怎么用?PHP tidy::html使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类tidy
的用法示例。
在下文中一共展示了tidy::html方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: tidyHtml
function tidyHtml($html)
{
$config = ["indent" => 2, "clean" => false, "char-encoding" => "utf8"];
$tidy = new tidy();
$tidy->parseString($html, $config, 'utf8');
$tidy->cleanRepair();
$ret = $tidy->html()->child[1]->value;
$ret = substr($ret, 7, -7);
return $ret;
}
示例2: GetXML
function GetXML($html)
{
// Specify configuration
$config = array('output-xml' => true, 'numeric-entities' => true, 'hide-comments' => true);
// Tidy
$tidy = new tidy();
$tidy->parseString($html, $config, 'utf8');
$tidy->cleanRepair();
$xHTML = $tidy->html();
return new SimpleXMLElement($xHTML);
}
示例3: rawToSimpleXML
static private function rawToSimpleXML($data)
{
/*
* Конфиг Tidy
*/
$tidy_config = array(
'input-encoding' => 'utf-8',
'output-encoding' => 'utf8',
'output-xml' => TRUE,
'add-xml-decl' => TRUE,
'hide-comments' => TRUE
);
/*
* Загрузка данных и очистка от ошибок
*/
$tidy = new tidy();
$tidy->parseString($data, $tidy_config, $tidy_config['output-encoding']);
$tidy->cleanRepair();
$tidy_out = $tidy->html()->value;
unset($tidy);
/*
* Инициализация XML DOM
*/
$dom = new DOMDocument();
$dom->strictErrorChecking = FALSE;
@$dom->loadHTML($tidy_out);
/*
* Инициализация SimpleXML
*/
$simplexml = simplexml_import_dom($dom);
unset($dom);
return $simplexml;
}
示例4: get
/**
* Get URL info
*
* This method fetches info about the URL, like the HTTP response code and content type.
*
* @return array Info about the URL
*/
public function get()
{
$res = $this->server->get($this->url_key);
if ($res === false) {
curl_setopt($this->curl, CURLOPT_URL, $this->url);
curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($this->curl, CURLOPT_FILETIME, true);
curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($this->curl, CURLOPT_MAXREDIRS, 6);
curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10");
$html = curl_exec($this->curl);
// print_r(htmlentities($body));
curl_close($this->curl);
$tidy_config = array('clean' => true, 'output-html' => true, 'wrap' => 78, 'quiet' => 1);
$tidy = new tidy();
$tidy->parseString($html, $tidy_config);
$tidy->cleanRepair();
$html = $tidy->html()->value;
// Buffer DOM errors rather than emitting them as warnings
$oldSetting = libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->loadHTML($html);
$xpath = new DOMXPath($dom);
$titles = $xpath->evaluate('//*[name()="title"]');
$title = $titles->item(0)->nodeValue;
// Clear any existing errors from previous operations
libxml_clear_errors();
// Revert error buffering to its previous setting
libxml_use_internal_errors($oldSetting);
$res = array('title' => $title);
$res = json_encode($res);
$this->server->add($this->url_key, $res, MEMCACHE_COMPRESSED, self::CACHE_LIMIT);
}
return json_decode($res, TRUE);
}
示例5: add_html
public function add_html($html, $title, $config)
{
if ($config['tidy']) {
$tidy = new tidy();
$tidy->parseString($html, $config, 'utf8');
$tidy->cleanRepair();
$html = $tidy->html()->value;
}
$doc = new DOMDocument();
@$doc->loadHTML($html);
//$html = $doc->saveXML();
if (!$title) {
$title = 'Untitled';
$heads = $doc->getElementsByTagName('head');
if ($heads) {
$titles = $heads->item(0)->getElementsByTagName('title');
if ($titles) {
$title = $titles->item(0)->nodeValue;
}
}
}
// Check images
// Handle <img> tags
$html = preg_replace_callback('~(<img [^>]*?)src=([\'"])(.+?)[\'"]~', array($this, 'img_callback'), $html);
if ($config['split']) {
$splits = $this->split($html);
$first = TRUE;
foreach ($splits as $split) {
$this->add_spine_item($split[0], $split[1]);
if ($config['toc']) {
if ($first) {
$this->set_item_toc($title, TRUE, FALSE);
} else {
$this->set_item_toc(NULL, TRUE, TRUE);
}
$first = FALSE;
}
}
} else {
$this->add_spine_item($html);
if ($config['toc']) {
$this->set_item_toc($title, TRUE);
}
}
return $title;
}
示例6: cy_html_repair
/**
* 修正html中的语法错误
*
*/
function cy_html_repair($html, $encoding = 'UTF8')
{
$config = array('clean' => true, 'output-xml' => true, 'output-xhtml' => true, 'wrap' => 200);
$t = new tidy();
$t->parseString($html, $config, $encoding);
$t->cleanRepair();
// fix html
return $t->html();
}
示例7: getXhtml
/**
* Return array contains formated XHTML string
* created from the responded HTML of the given URL.
* array[code] => HTTP status code
* array[headers] => HTTP headers
* array[headers] => formated XHTML string made from the entity body
* Throw exception if error.
*
* @param string $url
* @param integer $cache_lifetime
* @param boolean $conditional_request
* @param array $headers
* @param array $post
* @return array
*/
public final function getXhtml($url, $cache_lifetime = 0, $conditional_request = false, $headers = array(), $post = array())
{
/*
* \x21\x23-\x3b\x3d\x3f-\x5a\x5c\x5f\x61-\x7a\x7c\x7e
*/
if (!preg_match('/^https?:\\/\\/\\w[\\w\\-\\.]+/i', $url)) {
throw new Exception("Not a valid or fully qualified HTTP URL.");
}
$data = false;
$cache_lifetime = (int) $cache_lifetime;
$use_cache = !empty($this->cacheDir) and $cache_lifetime > 0;
if ($use_cache) {
$cache = new Cache_Lite(array('cacheDir' => $this->cacheDir, 'lifeTime' => $cache_lifetime));
$params = array();
foreach ($headers as $key => $value) {
if (!empty($value)) {
$params[] = urlencode($key) . '=' . urlencode($value);
}
}
foreach ($post as $key => $value) {
$params[] = urlencode($key) . '=' . urlencode($value);
}
$cache_id = "{$url}?" . implode('&', $params);
if (false !== ($data = $cache->get($cache_id))) {
$data = unserialize($data);
}
}
/*
* Access to the URL if not cached
* or if the cache has either Last-Modified or Etag header
* and conditional request is specified.
*/
if ($conditional_request and (!isset($data['headers']['last-modified']) or !isset($data['headers']['etag']))) {
$conditional_request = false;
}
if (!$data or $conditional_request) {
if (isset($data['headers']['last-modified']) and (!isset($headers['last-modified']) or empty($headers['last-modified']))) {
$headers['last-modified'] = $data['headers']['last-modified'];
}
if (isset($data['headers']['etag']) and (!isset($headers['etag']) or empty($headers['etag']))) {
$headers['etag'] = $data['headers']['etag'];
}
try {
$response = $this->getHttpResponse($url, $headers, $post);
} catch (Exception $e) {
if (!$data) {
throw $e;
}
}
/*
* Use cache if the responded HTTP status code is 304.
* If 200, format the responded HTML of the given URL to XHTML.
*/
if (!$data or isset($response['code']) and $response['code'] != 304) {
$data =& $response;
/*
* If status code was 200 and Content-Type was not (X)HTML,
* the status code was forcibly altered to 204.
* @see HTTP_Request_Listener_Extended->update().
*/
if ($data['code'] != 200 and $data['code'] != 204) {
throw new Exception("Responded HTTP Status Code is {$data['code']}.");
} elseif (isset($data['headers']['content-type']) and !preg_match('/^(?:text|application)\\/x?html\\b/', $data['headers']['content-type'])) {
throw new Exception("Responded Content-Type is {$data['headers']['content-type']}");
} elseif (empty($data['body'])) {
throw new Exception("Responded entity body is empty.");
} elseif (!preg_match('/<\\w+[^>]*?>/', $data['body'], $matches)) {
throw new Exception("Responded entity body does not contain a markup symbol.");
} elseif (false !== strpos($matches[0], "")) {
throw new Exception("Responded entity body contains NULL.");
}
/*
* Remove BOM and NULLs.
*/
$data['body'] = preg_replace('/^\\xef\\xbb\\xbf/', '', $data['body']);
$data['body'] = str_replace("", '', $data['body']);
/*
* Initialize the backups.
*/
$this->backup = array();
$this->backup_count = 0;
/*
* Removing SCRIPT and STYLE is recommended.
* The following substitute code will capsulate the content of the tags in CDATA.
* If use it, be sure that some JavaScript method such as document.write
//.........这里部分代码省略.........