当前位置: 首页>>代码示例>>PHP>>正文


PHP SimplePie_Misc::change_encoding方法代码示例

本文整理汇总了PHP中SimplePie_Misc::change_encoding方法的典型用法代码示例。如果您正苦于以下问题:PHP SimplePie_Misc::change_encoding方法的具体用法?PHP SimplePie_Misc::change_encoding怎么用?PHP SimplePie_Misc::change_encoding使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在SimplePie_Misc的用法示例。


在下文中一共展示了SimplePie_Misc::change_encoding方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: xml_encoding

 /**
  * Detect XML encoding, as per XML 1.0 Appendix F.1
  *
  * @todo Add support for EBCDIC
  * @param string $data XML data
  * @param SimplePie_Registry $registry Class registry
  * @return array Possible encodings
  */
 public static function xml_encoding($data, $registry)
 {
     // UTF-32 Big Endian BOM
     if (substr($data, 0, 4) === "��") {
         $encoding[] = 'UTF-32BE';
     } elseif (substr($data, 0, 4) === "��") {
         $encoding[] = 'UTF-32LE';
     } elseif (substr($data, 0, 2) === "��") {
         $encoding[] = 'UTF-16BE';
     } elseif (substr($data, 0, 2) === "��") {
         $encoding[] = 'UTF-16LE';
     } elseif (substr($data, 0, 3) === "") {
         $encoding[] = 'UTF-8';
     } elseif (substr($data, 0, 20) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8')));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-32BE';
     } elseif (substr($data, 0, 20) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8')));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-32LE';
     } elseif (substr($data, 0, 10) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8')));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-16BE';
     } elseif (substr($data, 0, 10) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8')));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-16LE';
     } elseif (substr($data, 0, 5) === "<?xml") {
         if ($pos = strpos($data, "?>")) {
             $parser = $registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
             if ($parser->parse()) {
                 $encoding[] = $parser->encoding;
             }
         }
         $encoding[] = 'UTF-8';
     } else {
         $encoding[] = 'UTF-8';
     }
     return $encoding;
 }
开发者ID:kadrim1,项目名称:metsayhistu,代码行数:66,代码来源:Misc.php

示例2: convert_to_utf8


//.........这里部分代码省略.........
            debug('Could not find Content-Type header in HTTP response');
        } else {
            $match = end($match);
            // get last matched element (in case of redirects)
            if (isset($match[2])) {
                $encoding = trim($match[2], "\"' \r\n\v\t");
            }
        }
        // TODO: check to see if encoding is supported (can we convert it?)
        // If it's not, result will be empty string.
        // For now we'll check for invalid encoding types returned by some sites, e.g. 'none'
        // Problem URL: http://facta.co.jp/blog/archives/20111026001026.html
        if (!$encoding || $encoding == 'none') {
            // search for encoding in HTML - only look at the first 50000 characters
            // Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html
            // TODO: improve this so it looks at smaller chunks first
            $html_head = substr($html, 0, 50000);
            if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) {
                $encoding = trim($match[1], '"\'');
            } elseif (preg_match('/<meta\\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) {
                $encoding = trim($match[1]);
            } elseif (preg_match_all('/<meta\\s+([^>]+)>/i', $html_head, $match)) {
                foreach ($match[1] as $_test) {
                    if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) {
                        $encoding = trim($_m[1]);
                        break;
                    }
                }
            }
        }
        if (isset($encoding)) {
            $encoding = trim($encoding);
        }
        // trim is important here!
        if (!$encoding || strtolower($encoding) == 'iso-8859-1') {
            // replace MS Word smart qutoes
            $trans = array();
            $trans[chr(130)] = '&sbquo;';
            // Single Low-9 Quotation Mark
            $trans[chr(131)] = '&fnof;';
            // Latin Small Letter F With Hook
            $trans[chr(132)] = '&bdquo;';
            // Double Low-9 Quotation Mark
            $trans[chr(133)] = '&hellip;';
            // Horizontal Ellipsis
            $trans[chr(134)] = '&dagger;';
            // Dagger
            $trans[chr(135)] = '&Dagger;';
            // Double Dagger
            $trans[chr(136)] = '&circ;';
            // Modifier Letter Circumflex Accent
            $trans[chr(137)] = '&permil;';
            // Per Mille Sign
            $trans[chr(138)] = '&Scaron;';
            // Latin Capital Letter S With Caron
            $trans[chr(139)] = '&lsaquo;';
            // Single Left-Pointing Angle Quotation Mark
            $trans[chr(140)] = '&OElig;';
            // Latin Capital Ligature OE
            $trans[chr(145)] = '&lsquo;';
            // Left Single Quotation Mark
            $trans[chr(146)] = '&rsquo;';
            // Right Single Quotation Mark
            $trans[chr(147)] = '&ldquo;';
            // Left Double Quotation Mark
            $trans[chr(148)] = '&rdquo;';
            // Right Double Quotation Mark
            $trans[chr(149)] = '&bull;';
            // Bullet
            $trans[chr(150)] = '&ndash;';
            // En Dash
            $trans[chr(151)] = '&mdash;';
            // Em Dash
            $trans[chr(152)] = '&tilde;';
            // Small Tilde
            $trans[chr(153)] = '&trade;';
            // Trade Mark Sign
            $trans[chr(154)] = '&scaron;';
            // Latin Small Letter S With Caron
            $trans[chr(155)] = '&rsaquo;';
            // Single Right-Pointing Angle Quotation Mark
            $trans[chr(156)] = '&oelig;';
            // Latin Small Ligature OE
            $trans[chr(159)] = '&Yuml;';
            // Latin Capital Letter Y With Diaeresis
            $html = strtr($html, $trans);
        }
        if (!$encoding) {
            debug('No character encoding found, so treating as UTF-8');
            $encoding = 'utf-8';
        } else {
            debug('Character encoding: ' . $encoding);
            if (strtolower($encoding) != 'utf-8') {
                debug('Converting to UTF-8');
                $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
            }
        }
    }
    return $html;
}
开发者ID:kktsvetkov,项目名称:Fivefilters_Libraries,代码行数:101,代码来源:makefulltextfeedHelpers.php

示例3: init


//.........这里部分代码省略.........
                         SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
                         return false;
                     }
                 }
                 $locate = null;
             }
             $headers = $file->headers;
             $data = $file->body;
             $sniffer = new $this->content_type_sniffer_class($file);
             $sniffed = $sniffer->get_type();
         } else {
             $data = $this->raw_data;
         }
         // Set up array of possible encodings
         $encodings = array();
         // First check to see if input has been overridden.
         if ($this->input_encoding !== false) {
             $encodings[] = $this->input_encoding;
         }
         $application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity');
         $text_types = array('text/xml', 'text/xml-external-parsed-entity');
         // RFC 3023 (only applies to sniffed content)
         if (isset($sniffed)) {
             if (in_array($sniffed, $application_types) || substr($sniffed, 0, 12) === 'application/' && substr($sniffed, -4) === '+xml') {
                 if (isset($headers['content-type']) && preg_match('/;\\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) {
                     $encodings[] = strtoupper($charset[1]);
                 }
                 $encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data));
                 $encodings[] = 'UTF-8';
             } elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml') {
                 if (isset($headers['content-type']) && preg_match('/;\\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) {
                     $encodings[] = $charset[1];
                 }
                 $encodings[] = 'US-ASCII';
             } elseif (substr($sniffed, 0, 5) === 'text/') {
                 $encodings[] = 'US-ASCII';
             }
         }
         // Fallback to XML 1.0 Appendix F.1/UTF-8/ISO-8859-1
         $encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data));
         $encodings[] = 'UTF-8';
         $encodings[] = 'ISO-8859-1';
         // There's no point in trying an encoding twice
         $encodings = array_unique($encodings);
         // If we want the XML, just output that with the most likely encoding and quit
         if ($this->xml_dump) {
             header('Content-type: text/xml; charset=' . $encodings[0]);
             echo $data;
             exit;
         }
         // Loop through each possible encoding, till we return something, or run out of possibilities
         foreach ($encodings as $encoding) {
             // Change the encoding to UTF-8 (as we always use UTF-8 internally)
             if ($utf8_data = SimplePie_Misc::change_encoding($data, $encoding, 'UTF-8')) {
                 // Create new parser
                 $parser = new $this->parser_class();
                 // If it's parsed fine
                 if ($parser->parse($utf8_data, 'UTF-8')) {
                     $this->data = $parser->get_data();
                     if ($this->get_type() & ~SIMPLEPIE_TYPE_NONE) {
                         if (isset($headers)) {
                             $this->data['headers'] = $headers;
                         }
                         $this->data['build'] = SIMPLEPIE_BUILD;
                         // Cache the file if caching is enabled
                         if ($cache && !$cache->save($this)) {
                             trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
                         }
                         return true;
                     } else {
                         $this->error = "A feed could not be found at {$this->feed_url}. This does not appear to be a valid RSS or Atom feed.";
                         SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
                         return false;
                     }
                 }
             }
         }
         if (isset($parser)) {
             // We have an error, just set SimplePie_Misc::error to it and quit
             $this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column());
         } else {
             $this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.';
         }
         SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
         return false;
     } elseif (!empty($this->multifeed_url)) {
         $i = 0;
         $success = 0;
         $this->multifeed_objects = array();
         foreach ($this->multifeed_url as $url) {
             $this->multifeed_objects[$i] = clone $this;
             $this->multifeed_objects[$i]->set_feed_url($url);
             $success |= $this->multifeed_objects[$i]->init();
             $i++;
         }
         return (bool) $success;
     } else {
         return false;
     }
 }
开发者ID:Thingee,项目名称:openstack-org,代码行数:101,代码来源:Core.php

示例4: sanitize

 function sanitize($data, $type, $base = '')
 {
     $data = trim($data);
     if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
         if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) {
             if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\\/[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
                 $type |= SIMPLEPIE_CONSTRUCT_HTML;
             } else {
                 $type |= SIMPLEPIE_CONSTRUCT_TEXT;
             }
         }
         if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
             $data = base64_decode($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
             if ($this->remove_div) {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
                 $data = preg_replace('/<\\/div>$/', '', $data);
             } else {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
             }
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
             // Strip comments
             if ($this->strip_comments) {
                 $data = SimplePie_Misc::strip_comments($data);
             }
             // Strip out HTML tags and attributes that might cause various security problems.
             // Based on recommendations by Mark Pilgrim at:
             // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
             if ($this->strip_htmltags) {
                 foreach ($this->strip_htmltags as $tag) {
                     $pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
                     while (preg_match($pcre, $data)) {
                         $data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
                     }
                 }
             }
             if ($this->strip_attributes) {
                 foreach ($this->strip_attributes as $attrib) {
                     $data = preg_replace('/(<[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\\s*=\\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x22\\x27\\x3E][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\\1\\2\\3>', $data);
                 }
             }
             // Replace relative URLs
             $this->base = $base;
             foreach ($this->replace_url_attributes as $element => $attributes) {
                 $data = $this->replace_urls($data, $element, $attributes);
             }
             // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
             if (isset($this->image_handler) && (string) $this->image_handler !== '' && $this->enable_cache) {
                 $images = SimplePie_Misc::get_element('img', $data);
                 foreach ($images as $img) {
                     if (isset($img['attribs']['src']['data'])) {
                         $image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
                         $cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
                         if ($cache->load()) {
                             $img['attribs']['src']['data'] = $this->image_handler . $image_url;
                             $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                         } else {
                             $file =& new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
                             $headers = $file->headers;
                             if ($file->success && ($file->status_code == 200 || $file->status_code > 206 && $file->status_code < 300)) {
                                 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
                                     $img['attribs']['src']['data'] = $this->image_handler . $image_url;
                                     $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                                 } else {
                                     trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
                                 }
                             }
                         }
                     }
                 }
             }
             // Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
             $data = trim($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
             $data = SimplePie_Misc::absolutize_url($data, $base);
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
             $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
         }
         if ($this->output_encoding != 'UTF-8') {
             $data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
         }
     }
     return $data;
 }
开发者ID:jojospaghettio,项目名称:wicketpixie,代码行数:88,代码来源:simplepie.php

示例5: sanitize

 public function sanitize($data, $type, $base = '')
 {
     $data = trim($data);
     if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
         if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) {
             if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\\/[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
                 $type |= SIMPLEPIE_CONSTRUCT_HTML;
             } else {
                 $type |= SIMPLEPIE_CONSTRUCT_TEXT;
             }
         }
         if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
             $data = base64_decode($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
             if ($this->remove_div) {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
                 $data = preg_replace('/<\\/div>$/', '', $data);
             } else {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
             }
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
             if ($this->strip_comments) {
                 $data = SimplePie_Misc::strip_comments($data);
             }
             if ($this->strip_htmltags) {
                 foreach ($this->strip_htmltags as $tag) {
                     $pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
                     while (preg_match($pcre, $data)) {
                         $data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
                     }
                 }
             }
             if ($this->strip_attributes) {
                 foreach ($this->strip_attributes as $attrib) {
                     $data = preg_replace('/(<[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\\s*=\\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x22\\x27\\x3E][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\\1\\2\\3>', $data);
                 }
             }
             $this->base = $base;
             foreach ($this->replace_url_attributes as $element => $attributes) {
                 $data = $this->replace_urls($data, $element, $attributes);
             }
             if (isset($this->image_handler) && (string) $this->image_handler !== '' && $this->enable_cache) {
                 $images = SimplePie_Misc::get_element('img', $data);
                 foreach ($images as $img) {
                     if (isset($img['attribs']['src']['data'])) {
                         $image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
                         $cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
                         if ($cache->load()) {
                             $img['attribs']['src']['data'] = $this->image_handler . $image_url;
                             $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                         } else {
                             $file = new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
                             $headers = $file->headers;
                             if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) {
                                 if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
                                     $img['attribs']['src']['data'] = $this->image_handler . $image_url;
                                     $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                                 } else {
                                     trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
                                 }
                             }
                         }
                     }
                 }
             }
             $data = trim($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
             $data = SimplePie_Misc::absolutize_url($data, $base);
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
             $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
         }
         if ($this->output_encoding !== 'UTF-8') {
             $data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
         }
     }
     return $data;
 }
开发者ID:himanshu12k,项目名称:ce-www,代码行数:81,代码来源:SimplePie.compiled.php

示例6: test_nonexistant

 public function test_nonexistant()
 {
     $this->assertFalse(SimplePie_Misc::change_encoding('', 'TESTENC', 'UTF-8'));
 }
开发者ID:navitronic,项目名称:simplepie,代码行数:4,代码来源:EncodingTest.php

示例7: xml_encoding

	/**
	 * Detect XML encoding, as per XML 1.0 Appendix F.1
	 *
	 * @todo Add support for EBCDIC
	 * @param string $data XML data
	 * @return array Possible encodings
	 */
	public static function xml_encoding($data)
	{
		// UTF-32 Big Endian BOM
		if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
		{
			$encoding[] = 'UTF-32BE';
		}
		// UTF-32 Little Endian BOM
		elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
		{
			$encoding[] = 'UTF-32LE';
		}
		// UTF-16 Big Endian BOM
		elseif (substr($data, 0, 2) === "\xFE\xFF")
		{
			$encoding[] = 'UTF-16BE';
		}
		// UTF-16 Little Endian BOM
		elseif (substr($data, 0, 2) === "\xFF\xFE")
		{
			$encoding[] = 'UTF-16LE';
		}
		// UTF-8 BOM
		elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
		{
			$encoding[] = 'UTF-8';
		}
		// UTF-32 Big Endian Without BOM
		elseif (substr($data, 0, 20) === "\x00\x00\x00\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C")
		{
			if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-32BE';
		}
		// UTF-32 Little Endian Without BOM
		elseif (substr($data, 0, 20) === "\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C\x00\x00\x00")
		{
			if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-32LE';
		}
		// UTF-16 Big Endian Without BOM
		elseif (substr($data, 0, 10) === "\x00\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C")
		{
			if ($pos = strpos($data, "\x00\x3F\x00\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-16BE';
		}
		// UTF-16 Little Endian Without BOM
		elseif (substr($data, 0, 10) === "\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C\x00")
		{
			if ($pos = strpos($data, "\x3F\x00\x3E\x00"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-16LE';
		}
		// US-ASCII (or superset)
		elseif (substr($data, 0, 5) === "\x3C\x3F\x78\x6D\x6C")
		{
			if ($pos = strpos($data, "\x3F\x3E"))
			{
				$parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
				if ($parser->parse())
				{
					$encoding[] = $parser->encoding;
				}
			}
			$encoding[] = 'UTF-8';
		}
		// Fallback to UTF-8
//.........这里部分代码省略.........
开发者ID:nicola-amatucci,项目名称:fuel-simplepie,代码行数:101,代码来源:misc.php

示例8: do_entites_decode

 function do_entites_decode($data)
 {
     if (isset($this->cached_entities[$data[0]])) {
         return $this->cached_entities[$data[0]];
     } else {
         $return = SimplePie_Misc::change_encoding(html_entity_decode($data[0], ENT_QUOTES), 'ISO-8859-1', $this->input_encoding);
         if ($return == $data[0]) {
             $return = SimplePie_Misc::change_encoding(preg_replace_callback('/&#([x]?[0-9a-f]+);/mi', array(&$this, 'replace_num_entity'), $data[0]), 'UTF-8', $this->input_encoding);
         }
         $this->cached_entities[$data[0]] = $return;
         return $return;
     }
 }
开发者ID:jbogota,项目名称:blog-king,代码行数:13,代码来源:class-simplepie-rss.php

示例9: rpf_convert_to_utf8

function rpf_convert_to_utf8($html, $header = null)
{
    $accept = array('type' => array('application/rss+xml', 'application/xml', 'application/rdf+xml', 'text/xml', 'text/html'), 'charset' => array_diff(mb_list_encodings(), array('pass', 'auto', 'wchar', 'byte2be', 'byte2le', 'byte4be', 'byte4le', 'BASE64', 'UUENCODE', 'HTML-ENTITIES', 'Quoted-Printable', '7bit', '8bit')));
    $encoding = null;
    if ($html || $header) {
        if (is_array($header)) {
            $header = implode("\n", $header);
        }
        if (!$header || !preg_match_all('/^Content-Type:\\s+([^;]+)(?:;\\s*charset=([^;"\'\\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
            // error parsing the response
        } else {
            $match = end($match);
            // get last matched element (in case of redirects)
            if (!in_array(strtolower($match[1]), $accept['type'])) {
                // type not accepted
                // TODO: avoid conversion
            }
            if (isset($match[2])) {
                $encoding = trim($match[2], '"\'');
            }
        }
        if (!$encoding) {
            if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html, $match)) {
                $encoding = trim($match[1], '"\'');
            } elseif (preg_match('/<meta\\s+http-equiv=["\']Content-Type["\'] content=["\'][^;]+;\\s*charset=([^;"\'>]+)/i', $html, $match)) {
                if (isset($match[1])) {
                    $encoding = trim($match[1]);
                }
            }
        }
        if (!$encoding) {
            $encoding = 'utf-8';
        } else {
            if (!in_array($encoding, array_map('strtolower', $accept['charset']))) {
                // encoding not accepted
                // TODO: avoid conversion
            }
            if (strtolower($encoding) != 'utf-8') {
                if (strtolower($encoding) == 'iso-8859-1') {
                    // replace MS Word smart qutoes
                    $trans = array();
                    $trans[chr(130)] = '&sbquo;';
                    // Single Low-9 Quotation Mark
                    $trans[chr(131)] = '&fnof;';
                    // Latin Small Letter F With Hook
                    $trans[chr(132)] = '&bdquo;';
                    // Double Low-9 Quotation Mark
                    $trans[chr(133)] = '&hellip;';
                    // Horizontal Ellipsis
                    $trans[chr(134)] = '&dagger;';
                    // Dagger
                    $trans[chr(135)] = '&Dagger;';
                    // Double Dagger
                    $trans[chr(136)] = '&circ;';
                    // Modifier Letter Circumflex Accent
                    $trans[chr(137)] = '&permil;';
                    // Per Mille Sign
                    $trans[chr(138)] = '&Scaron;';
                    // Latin Capital Letter S With Caron
                    $trans[chr(139)] = '&lsaquo;';
                    // Single Left-Pointing Angle Quotation Mark
                    $trans[chr(140)] = '&OElig;';
                    // Latin Capital Ligature OE
                    $trans[chr(145)] = '&lsquo;';
                    // Left Single Quotation Mark
                    $trans[chr(146)] = '&rsquo;';
                    // Right Single Quotation Mark
                    $trans[chr(147)] = '&ldquo;';
                    // Left Double Quotation Mark
                    $trans[chr(148)] = '&rdquo;';
                    // Right Double Quotation Mark
                    $trans[chr(149)] = '&bull;';
                    // Bullet
                    $trans[chr(150)] = '&ndash;';
                    // En Dash
                    $trans[chr(151)] = '&mdash;';
                    // Em Dash
                    $trans[chr(152)] = '&tilde;';
                    // Small Tilde
                    $trans[chr(153)] = '&trade;';
                    // Trade Mark Sign
                    $trans[chr(154)] = '&scaron;';
                    // Latin Small Letter S With Caron
                    $trans[chr(155)] = '&rsaquo;';
                    // Single Right-Pointing Angle Quotation Mark
                    $trans[chr(156)] = '&oelig;';
                    // Latin Small Ligature OE
                    $trans[chr(159)] = '&Yuml;';
                    // Latin Capital Letter Y With Diaeresis
                    $html = strtr($html, $trans);
                }
                if (!class_exists('SimplePie_Misc')) {
                    require_once RPFINC . 'simplepie.class.php';
                }
                $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
                /*
                if (function_exists('iconv')) {
                	// iconv appears to handle certain character encodings better than mb_convert_encoding
                	$html = iconv($encoding, 'utf-8', $html);
                } else {
//.........这里部分代码省略.........
开发者ID:ashray-velapanur,项目名称:grind-members,代码行数:101,代码来源:RSSPoster.php

示例10: convert_to_utf8

function convert_to_utf8($html, $header = null)
{
    $encoding = null;
    if ($html || $header) {
        if (is_array($header)) {
            $header = implode("\n", $header);
        }
        if (!$header || !preg_match_all('/^Content-Type:\\s+([^;]+)(?:;\\s*charset=["\']?([^;"\'\\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
            // error parsing the response
        } else {
            $match = end($match);
            // get last matched element (in case of redirects)
            if (isset($match[2])) {
                $encoding = trim($match[2], '"\'');
            }
        }
        if (!$encoding) {
            if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html, $match)) {
                $encoding = trim($match[1], '"\'');
            } elseif (preg_match('/<meta\\s+http-equiv=["\']Content-Type["\'] content=["\'][^;]+;\\s*charset=["\']?([^;"\'>]+)/i', $html, $match)) {
                if (isset($match[1])) {
                    $encoding = trim($match[1]);
                }
            }
        }
        if (!$encoding) {
            $encoding = 'utf-8';
        } else {
            if (strtolower($encoding) != 'utf-8') {
                if (strtolower($encoding) == 'iso-8859-1') {
                    // replace MS Word smart qutoes
                    $trans = array();
                    $trans[chr(130)] = '&sbquo;';
                    // Single Low-9 Quotation Mark
                    $trans[chr(131)] = '&fnof;';
                    // Latin Small Letter F With Hook
                    $trans[chr(132)] = '&bdquo;';
                    // Double Low-9 Quotation Mark
                    $trans[chr(133)] = '&hellip;';
                    // Horizontal Ellipsis
                    $trans[chr(134)] = '&dagger;';
                    // Dagger
                    $trans[chr(135)] = '&Dagger;';
                    // Double Dagger
                    $trans[chr(136)] = '&circ;';
                    // Modifier Letter Circumflex Accent
                    $trans[chr(137)] = '&permil;';
                    // Per Mille Sign
                    $trans[chr(138)] = '&Scaron;';
                    // Latin Capital Letter S With Caron
                    $trans[chr(139)] = '&lsaquo;';
                    // Single Left-Pointing Angle Quotation Mark
                    $trans[chr(140)] = '&OElig;';
                    // Latin Capital Ligature OE
                    $trans[chr(145)] = '&lsquo;';
                    // Left Single Quotation Mark
                    $trans[chr(146)] = '&rsquo;';
                    // Right Single Quotation Mark
                    $trans[chr(147)] = '&ldquo;';
                    // Left Double Quotation Mark
                    $trans[chr(148)] = '&rdquo;';
                    // Right Double Quotation Mark
                    $trans[chr(149)] = '&bull;';
                    // Bullet
                    $trans[chr(150)] = '&ndash;';
                    // En Dash
                    $trans[chr(151)] = '&mdash;';
                    // Em Dash
                    $trans[chr(152)] = '&tilde;';
                    // Small Tilde
                    $trans[chr(153)] = '&trade;';
                    // Trade Mark Sign
                    $trans[chr(154)] = '&scaron;';
                    // Latin Small Letter S With Caron
                    $trans[chr(155)] = '&rsaquo;';
                    // Single Right-Pointing Angle Quotation Mark
                    $trans[chr(156)] = '&oelig;';
                    // Latin Small Ligature OE
                    $trans[chr(159)] = '&Yuml;';
                    // Latin Capital Letter Y With Diaeresis
                    $html = strtr($html, $trans);
                }
                $html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
                /*
                if (function_exists('iconv')) {
                	// iconv appears to handle certain character encodings better than mb_convert_encoding
                	$html = iconv($encoding, 'utf-8', $html);
                } else {
                	$html = mb_convert_encoding($html, 'utf-8', $encoding);
                }
                */
            }
        }
    }
    return $html;
}
开发者ID:jaimejorge,项目名称:full-text-rss,代码行数:96,代码来源:makefulltextfeed.php

示例11: sanitize

 function sanitize($data, $type, $base = '')
 {
     $data = trim($data);
     if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
         if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
             $data = base64_decode($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
             if ($this->remove_div) {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
                 $data = preg_replace('/<\\/div>$/', '', $data);
             } else {
                 $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
             }
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
             // Strip comments
             if ($this->strip_comments) {
                 $data = SimplePie_Misc::strip_comments($data);
             }
             // Strip out HTML tags and attributes that might cause various security problems.
             // Based on recommendations by Mark Pilgrim at:
             // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
             if ($this->strip_htmltags) {
                 foreach ($this->strip_htmltags as $tag) {
                     $pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
                     while (preg_match($pcre, $data)) {
                         $data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
                     }
                 }
             }
             if ($this->strip_attributes) {
                 foreach ($this->strip_attributes as $attrib) {
                     $data = preg_replace('/ ' . trim($attrib) . '=("|&quot;)(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\'|&apos;|<|>|\\+|{|})*("|&quot;)/i', '', $data);
                     $data = preg_replace('/ ' . trim($attrib) . '=(\'|&apos;)(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|"|&quot;|<|>|\\+|{|})*(\'|&apos;)/i', '', $data);
                     $data = preg_replace('/ ' . trim($attrib) . '=(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\\+|{|})*/i', '', $data);
                 }
             }
             // Replace relative URLs
             $this->base = $base;
             foreach ($this->replace_url_attributes as $element => $attribute) {
                 if ((!is_array($this->strip_htmltags) || !in_array($element, $this->strip_htmltags)) && (!is_array($this->strip_attributes) || !in_array($attribute, $this->strip_attributes))) {
                     $data = $this->replace_urls($data, $element, $attribute);
                 }
             }
             // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
             if (isset($this->image_handler) && !empty($this->image_handler) && $this->enable_cache) {
                 $images = SimplePie_Misc::get_element('img', $data);
                 foreach ($images as $img) {
                     if (!empty($img['attribs']['src']['data'])) {
                         $image_url = $img['attribs']['src']['data'];
                         $cache =& new $this->cache_class($this->cache_location, call_user_func($this->cache_name_function, $image_url), 'spi');
                         if ($cache->load()) {
                             $img['attribs']['src']['data'] = $this->image_handler . rawurlencode($img['attribs']['src']['data']);
                             $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                         } else {
                             $file =& new $this->file_class($image_url, $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
                             $headers = $file->headers;
                             if ($file->success && ($file->status_code == 200 || $file->status_code > 206 && $file->status_code < 300)) {
                                 if (!$cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
                                     trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
                                 }
                                 $img['attribs']['src']['data'] = $this->image_handler . rawurlencode($img['attribs']['src']['data']);
                                 $data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
                             }
                         }
                     }
                 }
             }
             // Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
             $data = trim($data);
         }
         if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
             $data = SimplePie_Misc::absolutize_url($data, $base);
         }
         if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
             $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
         }
         if ($this->output_encoding != 'UTF-8') {
             $data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
         }
     }
     return $data;
 }
开发者ID:searchfirst,项目名称:Lenore,代码行数:84,代码来源:simplepie.php


注:本文中的SimplePie_Misc::change_encoding方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。