当前位置: 首页>>代码示例>>PHP>>正文


PHP tidy::html方法代码示例

本文整理汇总了PHP中tidy::html方法的典型用法代码示例。如果您正苦于以下问题:PHP tidy::html方法的具体用法?PHP tidy::html怎么用?PHP tidy::html使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在tidy的用法示例。


在下文中一共展示了tidy::html方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: tidyHtml

function tidyHtml($html)
{
    $config = ["indent" => 2, "clean" => false, "char-encoding" => "utf8"];
    $tidy = new tidy();
    $tidy->parseString($html, $config, 'utf8');
    $tidy->cleanRepair();
    $ret = $tidy->html()->child[1]->value;
    $ret = substr($ret, 7, -7);
    return $ret;
}
开发者ID:Bodigrim,项目名称:durmstrang,代码行数:10,代码来源:misc.php

示例2: GetXML

 function GetXML($html)
 {
     // Specify configuration
     $config = array('output-xml' => true, 'numeric-entities' => true, 'hide-comments' => true);
     // Tidy
     $tidy = new tidy();
     $tidy->parseString($html, $config, 'utf8');
     $tidy->cleanRepair();
     $xHTML = $tidy->html();
     return new SimpleXMLElement($xHTML);
 }
开发者ID:sharathvignesh,项目名称:Tamil-Readers-Association,代码行数:11,代码来源:HttpUtils.php

示例3: rawToSimpleXML

    static private function rawToSimpleXML($data)
	{

        /*
        * Конфиг Tidy
        */
		$tidy_config = array(
			'input-encoding' => 'utf-8',
			'output-encoding' => 'utf8',
			'output-xml' => TRUE,
			'add-xml-decl' => TRUE,
			'hide-comments' => TRUE
		);

		/*
		* Загрузка данных и очистка от ошибок
		*/
		$tidy = new tidy();
		$tidy->parseString($data, $tidy_config, $tidy_config['output-encoding']);
		$tidy->cleanRepair();
		$tidy_out = $tidy->html()->value;
		unset($tidy);

		/*
		* Инициализация XML DOM
		*/
		$dom = new DOMDocument();
		$dom->strictErrorChecking = FALSE;
		@$dom->loadHTML($tidy_out);

		/*
		* Инициализация SimpleXML
		*/
		$simplexml = simplexml_import_dom($dom);
		unset($dom);

		return $simplexml;
    }
开发者ID:nekto,项目名称:vkweather,代码行数:38,代码来源:htmlparser.class.php

示例4: get

 /**
  * Get URL info
  *
  * This method fetches info about the URL, like the HTTP response code and content type.
  *
  * @return array  Info about the URL
  */
 public function get()
 {
     $res = $this->server->get($this->url_key);
     if ($res === false) {
         curl_setopt($this->curl, CURLOPT_URL, $this->url);
         curl_setopt($this->curl, CURLOPT_RETURNTRANSFER, true);
         curl_setopt($this->curl, CURLOPT_FILETIME, true);
         curl_setopt($this->curl, CURLOPT_AUTOREFERER, true);
         curl_setopt($this->curl, CURLOPT_FOLLOWLOCATION, true);
         curl_setopt($this->curl, CURLOPT_MAXREDIRS, 6);
         curl_setopt($this->curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10");
         $html = curl_exec($this->curl);
         // print_r(htmlentities($body));
         curl_close($this->curl);
         $tidy_config = array('clean' => true, 'output-html' => true, 'wrap' => 78, 'quiet' => 1);
         $tidy = new tidy();
         $tidy->parseString($html, $tidy_config);
         $tidy->cleanRepair();
         $html = $tidy->html()->value;
         // Buffer DOM errors rather than emitting them as warnings
         $oldSetting = libxml_use_internal_errors(true);
         $dom = new DOMDocument();
         $dom->loadHTML($html);
         $xpath = new DOMXPath($dom);
         $titles = $xpath->evaluate('//*[name()="title"]');
         $title = $titles->item(0)->nodeValue;
         // Clear any existing errors from previous operations
         libxml_clear_errors();
         // Revert error buffering to its previous setting
         libxml_use_internal_errors($oldSetting);
         $res = array('title' => $title);
         $res = json_encode($res);
         $this->server->add($this->url_key, $res, MEMCACHE_COMPRESSED, self::CACHE_LIMIT);
     }
     return json_decode($res, TRUE);
 }
开发者ID:jeremykendall,项目名称:spaz-api,代码行数:43,代码来源:Urltitle.php

示例5: add_html

 public function add_html($html, $title, $config)
 {
     if ($config['tidy']) {
         $tidy = new tidy();
         $tidy->parseString($html, $config, 'utf8');
         $tidy->cleanRepair();
         $html = $tidy->html()->value;
     }
     $doc = new DOMDocument();
     @$doc->loadHTML($html);
     //$html = $doc->saveXML();
     if (!$title) {
         $title = 'Untitled';
         $heads = $doc->getElementsByTagName('head');
         if ($heads) {
             $titles = $heads->item(0)->getElementsByTagName('title');
             if ($titles) {
                 $title = $titles->item(0)->nodeValue;
             }
         }
     }
     // Check images
     // Handle <img> tags
     $html = preg_replace_callback('~(<img [^>]*?)src=([\'"])(.+?)[\'"]~', array($this, 'img_callback'), $html);
     if ($config['split']) {
         $splits = $this->split($html);
         $first = TRUE;
         foreach ($splits as $split) {
             $this->add_spine_item($split[0], $split[1]);
             if ($config['toc']) {
                 if ($first) {
                     $this->set_item_toc($title, TRUE, FALSE);
                 } else {
                     $this->set_item_toc(NULL, TRUE, TRUE);
                 }
                 $first = FALSE;
             }
         }
     } else {
         $this->add_spine_item($html);
         if ($config['toc']) {
             $this->set_item_toc($title, TRUE);
         }
     }
     return $title;
 }
开发者ID:ecampbell,项目名称:moodle-booktool_wordexport,代码行数:46,代码来源:LuciEPUB.php

示例6: cy_html_repair

/**
 * 修正html中的语法错误 
 *
 */
function cy_html_repair($html, $encoding = 'UTF8')
{
    $config = array('clean' => true, 'output-xml' => true, 'output-xhtml' => true, 'wrap' => 200);
    $t = new tidy();
    $t->parseString($html, $config, $encoding);
    $t->cleanRepair();
    // fix html
    return $t->html();
}
开发者ID:xiaoyjy,项目名称:retry,代码行数:13,代码来源:string.php

示例7: getXhtml

 /**
  * Return array contains formated XHTML string
  * created from the responded HTML of the given URL.
  * array[code] => HTTP status code
  * array[headers] => HTTP headers
  * array[headers] => formated XHTML string made from the entity body
  * Throw exception if error.
  *
  * @param  string  $url
  * @param  integer $cache_lifetime
  * @param  boolean $conditional_request
  * @param  array   $headers
  * @param  array   $post
  * @return array
  */
 public final function getXhtml($url, $cache_lifetime = 0, $conditional_request = false, $headers = array(), $post = array())
 {
     /*
      * \x21\x23-\x3b\x3d\x3f-\x5a\x5c\x5f\x61-\x7a\x7c\x7e
      */
     if (!preg_match('/^https?:\\/\\/\\w[\\w\\-\\.]+/i', $url)) {
         throw new Exception("Not a valid or fully qualified HTTP URL.");
     }
     $data = false;
     $cache_lifetime = (int) $cache_lifetime;
     $use_cache = !empty($this->cacheDir) and $cache_lifetime > 0;
     if ($use_cache) {
         $cache = new Cache_Lite(array('cacheDir' => $this->cacheDir, 'lifeTime' => $cache_lifetime));
         $params = array();
         foreach ($headers as $key => $value) {
             if (!empty($value)) {
                 $params[] = urlencode($key) . '=' . urlencode($value);
             }
         }
         foreach ($post as $key => $value) {
             $params[] = urlencode($key) . '=' . urlencode($value);
         }
         $cache_id = "{$url}?" . implode('&', $params);
         if (false !== ($data = $cache->get($cache_id))) {
             $data = unserialize($data);
         }
     }
     /*
      * Access to the URL if not cached
      * or if the cache has either Last-Modified or Etag header
      * and conditional request is specified.
      */
     if ($conditional_request and (!isset($data['headers']['last-modified']) or !isset($data['headers']['etag']))) {
         $conditional_request = false;
     }
     if (!$data or $conditional_request) {
         if (isset($data['headers']['last-modified']) and (!isset($headers['last-modified']) or empty($headers['last-modified']))) {
             $headers['last-modified'] = $data['headers']['last-modified'];
         }
         if (isset($data['headers']['etag']) and (!isset($headers['etag']) or empty($headers['etag']))) {
             $headers['etag'] = $data['headers']['etag'];
         }
         try {
             $response = $this->getHttpResponse($url, $headers, $post);
         } catch (Exception $e) {
             if (!$data) {
                 throw $e;
             }
         }
         /*
          * Use cache if the responded HTTP status code is 304.
          * If 200, format the responded HTML of the given URL to XHTML.
          */
         if (!$data or isset($response['code']) and $response['code'] != 304) {
             $data =& $response;
             /*
              * If status code was 200 and Content-Type was not (X)HTML,
              * the status code was forcibly altered to 204.
              * @see HTTP_Request_Listener_Extended->update().
              */
             if ($data['code'] != 200 and $data['code'] != 204) {
                 throw new Exception("Responded HTTP Status Code is {$data['code']}.");
             } elseif (isset($data['headers']['content-type']) and !preg_match('/^(?:text|application)\\/x?html\\b/', $data['headers']['content-type'])) {
                 throw new Exception("Responded Content-Type is {$data['headers']['content-type']}");
             } elseif (empty($data['body'])) {
                 throw new Exception("Responded entity body is empty.");
             } elseif (!preg_match('/<\\w+[^>]*?>/', $data['body'], $matches)) {
                 throw new Exception("Responded entity body does not contain a markup symbol.");
             } elseif (false !== strpos($matches[0], "")) {
                 throw new Exception("Responded entity body contains NULL.");
             }
             /*
              * Remove BOM and NULLs.
              */
             $data['body'] = preg_replace('/^\\xef\\xbb\\xbf/', '', $data['body']);
             $data['body'] = str_replace("", '', $data['body']);
             /*
              * Initialize the backups.
              */
             $this->backup = array();
             $this->backup_count = 0;
             /*
              * Removing SCRIPT and STYLE is recommended.
              * The following substitute code will capsulate the content of the tags in CDATA.
              * If use it, be sure that some JavaScript method such as document.write
//.........这里部分代码省略.........
开发者ID:diggin-sandbox,项目名称:mirror-htmlscraping-20090114,代码行数:101,代码来源:HTMLScraping.class.php


注:本文中的tidy::html方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。