本文整理汇总了PHP中SimplePie_Misc::change_encoding方法的典型用法代码示例。如果您正苦于以下问题:PHP SimplePie_Misc::change_encoding方法的具体用法?PHP SimplePie_Misc::change_encoding怎么用?PHP SimplePie_Misc::change_encoding使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类SimplePie_Misc
的用法示例。
在下文中一共展示了SimplePie_Misc::change_encoding方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: xml_encoding
/**
* Detect XML encoding, as per XML 1.0 Appendix F.1
*
* @todo Add support for EBCDIC
* @param string $data XML data
* @param SimplePie_Registry $registry Class registry
* @return array Possible encodings
*/
public static function xml_encoding($data, $registry)
{
// UTF-32 Big Endian BOM
if (substr($data, 0, 4) === "��") {
$encoding[] = 'UTF-32BE';
} elseif (substr($data, 0, 4) === "��") {
$encoding[] = 'UTF-32LE';
} elseif (substr($data, 0, 2) === "��") {
$encoding[] = 'UTF-16BE';
} elseif (substr($data, 0, 2) === "��") {
$encoding[] = 'UTF-16LE';
} elseif (substr($data, 0, 3) === "") {
$encoding[] = 'UTF-8';
} elseif (substr($data, 0, 20) === "<?xml") {
if ($pos = strpos($data, "?>")) {
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8')));
if ($parser->parse()) {
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-32BE';
} elseif (substr($data, 0, 20) === "<?xml") {
if ($pos = strpos($data, "?>")) {
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8')));
if ($parser->parse()) {
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-32LE';
} elseif (substr($data, 0, 10) === "<?xml") {
if ($pos = strpos($data, "?>")) {
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8')));
if ($parser->parse()) {
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-16BE';
} elseif (substr($data, 0, 10) === "<?xml") {
if ($pos = strpos($data, "?>")) {
$parser = $registry->create('XML_Declaration_Parser', array(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8')));
if ($parser->parse()) {
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-16LE';
} elseif (substr($data, 0, 5) === "<?xml") {
if ($pos = strpos($data, "?>")) {
$parser = $registry->create('XML_Declaration_Parser', array(substr($data, 5, $pos - 5)));
if ($parser->parse()) {
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-8';
} else {
$encoding[] = 'UTF-8';
}
return $encoding;
}
示例2: convert_to_utf8
//.........这里部分代码省略.........
debug('Could not find Content-Type header in HTTP response');
} else {
$match = end($match);
// get last matched element (in case of redirects)
if (isset($match[2])) {
$encoding = trim($match[2], "\"' \r\n\v\t");
}
}
// TODO: check to see if encoding is supported (can we convert it?)
// If it's not, result will be empty string.
// For now we'll check for invalid encoding types returned by some sites, e.g. 'none'
// Problem URL: http://facta.co.jp/blog/archives/20111026001026.html
if (!$encoding || $encoding == 'none') {
// search for encoding in HTML - only look at the first 50000 characters
// Why 50000? See, for example, http://www.lemonde.fr/festival-de-cannes/article/2012/05/23/deux-cretes-en-goguette-sur-la-croisette_1705732_766360.html
// TODO: improve this so it looks at smaller chunks first
$html_head = substr($html, 0, 50000);
if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html_head, $match)) {
$encoding = trim($match[1], '"\'');
} elseif (preg_match('/<meta\\s+http-equiv=["\']?Content-Type["\']? content=["\'][^;]+;\\s*charset=["\']?([^;"\'>]+)/i', $html_head, $match)) {
$encoding = trim($match[1]);
} elseif (preg_match_all('/<meta\\s+([^>]+)>/i', $html_head, $match)) {
foreach ($match[1] as $_test) {
if (preg_match('/charset=["\']?([^"\']+)/i', $_test, $_m)) {
$encoding = trim($_m[1]);
break;
}
}
}
}
if (isset($encoding)) {
$encoding = trim($encoding);
}
// trim is important here!
if (!$encoding || strtolower($encoding) == 'iso-8859-1') {
// replace MS Word smart qutoes
$trans = array();
$trans[chr(130)] = '‚';
// Single Low-9 Quotation Mark
$trans[chr(131)] = 'ƒ';
// Latin Small Letter F With Hook
$trans[chr(132)] = '„';
// Double Low-9 Quotation Mark
$trans[chr(133)] = '…';
// Horizontal Ellipsis
$trans[chr(134)] = '†';
// Dagger
$trans[chr(135)] = '‡';
// Double Dagger
$trans[chr(136)] = 'ˆ';
// Modifier Letter Circumflex Accent
$trans[chr(137)] = '‰';
// Per Mille Sign
$trans[chr(138)] = 'Š';
// Latin Capital Letter S With Caron
$trans[chr(139)] = '‹';
// Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = 'Œ';
// Latin Capital Ligature OE
$trans[chr(145)] = '‘';
// Left Single Quotation Mark
$trans[chr(146)] = '’';
// Right Single Quotation Mark
$trans[chr(147)] = '“';
// Left Double Quotation Mark
$trans[chr(148)] = '”';
// Right Double Quotation Mark
$trans[chr(149)] = '•';
// Bullet
$trans[chr(150)] = '–';
// En Dash
$trans[chr(151)] = '—';
// Em Dash
$trans[chr(152)] = '˜';
// Small Tilde
$trans[chr(153)] = '™';
// Trade Mark Sign
$trans[chr(154)] = 'š';
// Latin Small Letter S With Caron
$trans[chr(155)] = '›';
// Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = 'œ';
// Latin Small Ligature OE
$trans[chr(159)] = 'Ÿ';
// Latin Capital Letter Y With Diaeresis
$html = strtr($html, $trans);
}
if (!$encoding) {
debug('No character encoding found, so treating as UTF-8');
$encoding = 'utf-8';
} else {
debug('Character encoding: ' . $encoding);
if (strtolower($encoding) != 'utf-8') {
debug('Converting to UTF-8');
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
}
}
}
return $html;
}
示例3: init
//.........这里部分代码省略.........
SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
return false;
}
}
$locate = null;
}
$headers = $file->headers;
$data = $file->body;
$sniffer = new $this->content_type_sniffer_class($file);
$sniffed = $sniffer->get_type();
} else {
$data = $this->raw_data;
}
// Set up array of possible encodings
$encodings = array();
// First check to see if input has been overridden.
if ($this->input_encoding !== false) {
$encodings[] = $this->input_encoding;
}
$application_types = array('application/xml', 'application/xml-dtd', 'application/xml-external-parsed-entity');
$text_types = array('text/xml', 'text/xml-external-parsed-entity');
// RFC 3023 (only applies to sniffed content)
if (isset($sniffed)) {
if (in_array($sniffed, $application_types) || substr($sniffed, 0, 12) === 'application/' && substr($sniffed, -4) === '+xml') {
if (isset($headers['content-type']) && preg_match('/;\\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) {
$encodings[] = strtoupper($charset[1]);
}
$encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data));
$encodings[] = 'UTF-8';
} elseif (in_array($sniffed, $text_types) || substr($sniffed, 0, 5) === 'text/' && substr($sniffed, -4) === '+xml') {
if (isset($headers['content-type']) && preg_match('/;\\x20?charset=([^;]*)/i', $headers['content-type'], $charset)) {
$encodings[] = $charset[1];
}
$encodings[] = 'US-ASCII';
} elseif (substr($sniffed, 0, 5) === 'text/') {
$encodings[] = 'US-ASCII';
}
}
// Fallback to XML 1.0 Appendix F.1/UTF-8/ISO-8859-1
$encodings = array_merge($encodings, SimplePie_Misc::xml_encoding($data));
$encodings[] = 'UTF-8';
$encodings[] = 'ISO-8859-1';
// There's no point in trying an encoding twice
$encodings = array_unique($encodings);
// If we want the XML, just output that with the most likely encoding and quit
if ($this->xml_dump) {
header('Content-type: text/xml; charset=' . $encodings[0]);
echo $data;
exit;
}
// Loop through each possible encoding, till we return something, or run out of possibilities
foreach ($encodings as $encoding) {
// Change the encoding to UTF-8 (as we always use UTF-8 internally)
if ($utf8_data = SimplePie_Misc::change_encoding($data, $encoding, 'UTF-8')) {
// Create new parser
$parser = new $this->parser_class();
// If it's parsed fine
if ($parser->parse($utf8_data, 'UTF-8')) {
$this->data = $parser->get_data();
if ($this->get_type() & ~SIMPLEPIE_TYPE_NONE) {
if (isset($headers)) {
$this->data['headers'] = $headers;
}
$this->data['build'] = SIMPLEPIE_BUILD;
// Cache the file if caching is enabled
if ($cache && !$cache->save($this)) {
trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
return true;
} else {
$this->error = "A feed could not be found at {$this->feed_url}. This does not appear to be a valid RSS or Atom feed.";
SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
return false;
}
}
}
}
if (isset($parser)) {
// We have an error, just set SimplePie_Misc::error to it and quit
$this->error = sprintf('This XML document is invalid, likely due to invalid characters. XML error: %s at line %d, column %d', $parser->get_error_string(), $parser->get_current_line(), $parser->get_current_column());
} else {
$this->error = 'The data could not be converted to UTF-8. You MUST have either the iconv or mbstring extension installed. Upgrading to PHP 5.x (which includes iconv) is highly recommended.';
}
SimplePie_Misc::error($this->error, E_USER_NOTICE, __FILE__, __LINE__);
return false;
} elseif (!empty($this->multifeed_url)) {
$i = 0;
$success = 0;
$this->multifeed_objects = array();
foreach ($this->multifeed_url as $url) {
$this->multifeed_objects[$i] = clone $this;
$this->multifeed_objects[$i]->set_feed_url($url);
$success |= $this->multifeed_objects[$i]->init();
$i++;
}
return (bool) $success;
} else {
return false;
}
}
示例4: sanitize
function sanitize($data, $type, $base = '')
{
$data = trim($data);
if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) {
if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\\/[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
$type |= SIMPLEPIE_CONSTRUCT_HTML;
} else {
$type |= SIMPLEPIE_CONSTRUCT_TEXT;
}
}
if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
$data = base64_decode($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
if ($this->remove_div) {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\\/div>$/', '', $data);
} else {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
// Strip comments
if ($this->strip_comments) {
$data = SimplePie_Misc::strip_comments($data);
}
// Strip out HTML tags and attributes that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
if ($this->strip_htmltags) {
foreach ($this->strip_htmltags as $tag) {
$pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
while (preg_match($pcre, $data)) {
$data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
}
}
}
if ($this->strip_attributes) {
foreach ($this->strip_attributes as $attrib) {
$data = preg_replace('/(<[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\\s*=\\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x22\\x27\\x3E][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\\1\\2\\3>', $data);
}
}
// Replace relative URLs
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes) {
$data = $this->replace_urls($data, $element, $attributes);
}
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if (isset($this->image_handler) && (string) $this->image_handler !== '' && $this->enable_cache) {
$images = SimplePie_Misc::get_element('img', $data);
foreach ($images as $img) {
if (isset($img['attribs']['src']['data'])) {
$image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
$cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
if ($cache->load()) {
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
} else {
$file =& new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
$headers = $file->headers;
if ($file->success && ($file->status_code == 200 || $file->status_code > 206 && $file->status_code < 300)) {
if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
} else {
trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
}
}
}
}
}
}
// Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
$data = trim($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
$data = SimplePie_Misc::absolutize_url($data, $base);
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding != 'UTF-8') {
$data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
}
}
return $data;
}
示例5: sanitize
public function sanitize($data, $type, $base = '')
{
$data = trim($data);
if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) {
if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\\/[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) {
$type |= SIMPLEPIE_CONSTRUCT_HTML;
} else {
$type |= SIMPLEPIE_CONSTRUCT_TEXT;
}
}
if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
$data = base64_decode($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
if ($this->remove_div) {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\\/div>$/', '', $data);
} else {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
if ($this->strip_comments) {
$data = SimplePie_Misc::strip_comments($data);
}
if ($this->strip_htmltags) {
foreach ($this->strip_htmltags as $tag) {
$pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
while (preg_match($pcre, $data)) {
$data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
}
}
}
if ($this->strip_attributes) {
foreach ($this->strip_attributes as $attrib) {
$data = preg_replace('/(<[A-Za-z][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x2F\\x3E]*)' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . trim($attrib) . '(?:\\s*=\\s*(?:"(?:[^"]*)"|\'(?:[^\']*)\'|(?:[^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x22\\x27\\x3E][^\\x09\\x0A\\x0B\\x0C\\x0D\\x20\\x3E]*)?))?' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>/', '\\1\\2\\3>', $data);
}
}
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attributes) {
$data = $this->replace_urls($data, $element, $attributes);
}
if (isset($this->image_handler) && (string) $this->image_handler !== '' && $this->enable_cache) {
$images = SimplePie_Misc::get_element('img', $data);
foreach ($images as $img) {
if (isset($img['attribs']['src']['data'])) {
$image_url = call_user_func($this->cache_name_function, $img['attribs']['src']['data']);
$cache = call_user_func(array($this->cache_class, 'create'), $this->cache_location, $image_url, 'spi');
if ($cache->load()) {
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
} else {
$file = new $this->file_class($img['attribs']['src']['data'], $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
$headers = $file->headers;
if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) {
if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
$img['attribs']['src']['data'] = $this->image_handler . $image_url;
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
} else {
trigger_error("{$this->cache_location} is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING);
}
}
}
}
}
}
$data = trim($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
$data = SimplePie_Misc::absolutize_url($data, $base);
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding !== 'UTF-8') {
$data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
}
}
return $data;
}
示例6: test_nonexistant
public function test_nonexistant()
{
$this->assertFalse(SimplePie_Misc::change_encoding('', 'TESTENC', 'UTF-8'));
}
示例7: xml_encoding
/**
* Detect XML encoding, as per XML 1.0 Appendix F.1
*
* @todo Add support for EBCDIC
* @param string $data XML data
* @return array Possible encodings
*/
public static function xml_encoding($data)
{
// UTF-32 Big Endian BOM
if (substr($data, 0, 4) === "\x00\x00\xFE\xFF")
{
$encoding[] = 'UTF-32BE';
}
// UTF-32 Little Endian BOM
elseif (substr($data, 0, 4) === "\xFF\xFE\x00\x00")
{
$encoding[] = 'UTF-32LE';
}
// UTF-16 Big Endian BOM
elseif (substr($data, 0, 2) === "\xFE\xFF")
{
$encoding[] = 'UTF-16BE';
}
// UTF-16 Little Endian BOM
elseif (substr($data, 0, 2) === "\xFF\xFE")
{
$encoding[] = 'UTF-16LE';
}
// UTF-8 BOM
elseif (substr($data, 0, 3) === "\xEF\xBB\xBF")
{
$encoding[] = 'UTF-8';
}
// UTF-32 Big Endian Without BOM
elseif (substr($data, 0, 20) === "\x00\x00\x00\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C")
{
if ($pos = strpos($data, "\x00\x00\x00\x3F\x00\x00\x00\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32BE', 'UTF-8'));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-32BE';
}
// UTF-32 Little Endian Without BOM
elseif (substr($data, 0, 20) === "\x3C\x00\x00\x00\x3F\x00\x00\x00\x78\x00\x00\x00\x6D\x00\x00\x00\x6C\x00\x00\x00")
{
if ($pos = strpos($data, "\x3F\x00\x00\x00\x3E\x00\x00\x00"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 20), 'UTF-32LE', 'UTF-8'));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-32LE';
}
// UTF-16 Big Endian Without BOM
elseif (substr($data, 0, 10) === "\x00\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C")
{
if ($pos = strpos($data, "\x00\x3F\x00\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16BE', 'UTF-8'));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-16BE';
}
// UTF-16 Little Endian Without BOM
elseif (substr($data, 0, 10) === "\x3C\x00\x3F\x00\x78\x00\x6D\x00\x6C\x00")
{
if ($pos = strpos($data, "\x3F\x00\x3E\x00"))
{
$parser = new SimplePie_XML_Declaration_Parser(SimplePie_Misc::change_encoding(substr($data, 20, $pos - 10), 'UTF-16LE', 'UTF-8'));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-16LE';
}
// US-ASCII (or superset)
elseif (substr($data, 0, 5) === "\x3C\x3F\x78\x6D\x6C")
{
if ($pos = strpos($data, "\x3F\x3E"))
{
$parser = new SimplePie_XML_Declaration_Parser(substr($data, 5, $pos - 5));
if ($parser->parse())
{
$encoding[] = $parser->encoding;
}
}
$encoding[] = 'UTF-8';
}
// Fallback to UTF-8
//.........这里部分代码省略.........
示例8: do_entites_decode
function do_entites_decode($data)
{
if (isset($this->cached_entities[$data[0]])) {
return $this->cached_entities[$data[0]];
} else {
$return = SimplePie_Misc::change_encoding(html_entity_decode($data[0], ENT_QUOTES), 'ISO-8859-1', $this->input_encoding);
if ($return == $data[0]) {
$return = SimplePie_Misc::change_encoding(preg_replace_callback('/&#([x]?[0-9a-f]+);/mi', array(&$this, 'replace_num_entity'), $data[0]), 'UTF-8', $this->input_encoding);
}
$this->cached_entities[$data[0]] = $return;
return $return;
}
}
示例9: rpf_convert_to_utf8
function rpf_convert_to_utf8($html, $header = null)
{
$accept = array('type' => array('application/rss+xml', 'application/xml', 'application/rdf+xml', 'text/xml', 'text/html'), 'charset' => array_diff(mb_list_encodings(), array('pass', 'auto', 'wchar', 'byte2be', 'byte2le', 'byte4be', 'byte4le', 'BASE64', 'UUENCODE', 'HTML-ENTITIES', 'Quoted-Printable', '7bit', '8bit')));
$encoding = null;
if ($html || $header) {
if (is_array($header)) {
$header = implode("\n", $header);
}
if (!$header || !preg_match_all('/^Content-Type:\\s+([^;]+)(?:;\\s*charset=([^;"\'\\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
// error parsing the response
} else {
$match = end($match);
// get last matched element (in case of redirects)
if (!in_array(strtolower($match[1]), $accept['type'])) {
// type not accepted
// TODO: avoid conversion
}
if (isset($match[2])) {
$encoding = trim($match[2], '"\'');
}
}
if (!$encoding) {
if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html, $match)) {
$encoding = trim($match[1], '"\'');
} elseif (preg_match('/<meta\\s+http-equiv=["\']Content-Type["\'] content=["\'][^;]+;\\s*charset=([^;"\'>]+)/i', $html, $match)) {
if (isset($match[1])) {
$encoding = trim($match[1]);
}
}
}
if (!$encoding) {
$encoding = 'utf-8';
} else {
if (!in_array($encoding, array_map('strtolower', $accept['charset']))) {
// encoding not accepted
// TODO: avoid conversion
}
if (strtolower($encoding) != 'utf-8') {
if (strtolower($encoding) == 'iso-8859-1') {
// replace MS Word smart qutoes
$trans = array();
$trans[chr(130)] = '‚';
// Single Low-9 Quotation Mark
$trans[chr(131)] = 'ƒ';
// Latin Small Letter F With Hook
$trans[chr(132)] = '„';
// Double Low-9 Quotation Mark
$trans[chr(133)] = '…';
// Horizontal Ellipsis
$trans[chr(134)] = '†';
// Dagger
$trans[chr(135)] = '‡';
// Double Dagger
$trans[chr(136)] = 'ˆ';
// Modifier Letter Circumflex Accent
$trans[chr(137)] = '‰';
// Per Mille Sign
$trans[chr(138)] = 'Š';
// Latin Capital Letter S With Caron
$trans[chr(139)] = '‹';
// Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = 'Œ';
// Latin Capital Ligature OE
$trans[chr(145)] = '‘';
// Left Single Quotation Mark
$trans[chr(146)] = '’';
// Right Single Quotation Mark
$trans[chr(147)] = '“';
// Left Double Quotation Mark
$trans[chr(148)] = '”';
// Right Double Quotation Mark
$trans[chr(149)] = '•';
// Bullet
$trans[chr(150)] = '–';
// En Dash
$trans[chr(151)] = '—';
// Em Dash
$trans[chr(152)] = '˜';
// Small Tilde
$trans[chr(153)] = '™';
// Trade Mark Sign
$trans[chr(154)] = 'š';
// Latin Small Letter S With Caron
$trans[chr(155)] = '›';
// Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = 'œ';
// Latin Small Ligature OE
$trans[chr(159)] = 'Ÿ';
// Latin Capital Letter Y With Diaeresis
$html = strtr($html, $trans);
}
if (!class_exists('SimplePie_Misc')) {
require_once RPFINC . 'simplepie.class.php';
}
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
/*
if (function_exists('iconv')) {
// iconv appears to handle certain character encodings better than mb_convert_encoding
$html = iconv($encoding, 'utf-8', $html);
} else {
//.........这里部分代码省略.........
示例10: convert_to_utf8
function convert_to_utf8($html, $header = null)
{
$encoding = null;
if ($html || $header) {
if (is_array($header)) {
$header = implode("\n", $header);
}
if (!$header || !preg_match_all('/^Content-Type:\\s+([^;]+)(?:;\\s*charset=["\']?([^;"\'\\n]*))?/im', $header, $match, PREG_SET_ORDER)) {
// error parsing the response
} else {
$match = end($match);
// get last matched element (in case of redirects)
if (isset($match[2])) {
$encoding = trim($match[2], '"\'');
}
}
if (!$encoding) {
if (preg_match('/^<\\?xml\\s+version=(?:"[^"]*"|\'[^\']*\')\\s+encoding=("[^"]*"|\'[^\']*\')/s', $html, $match)) {
$encoding = trim($match[1], '"\'');
} elseif (preg_match('/<meta\\s+http-equiv=["\']Content-Type["\'] content=["\'][^;]+;\\s*charset=["\']?([^;"\'>]+)/i', $html, $match)) {
if (isset($match[1])) {
$encoding = trim($match[1]);
}
}
}
if (!$encoding) {
$encoding = 'utf-8';
} else {
if (strtolower($encoding) != 'utf-8') {
if (strtolower($encoding) == 'iso-8859-1') {
// replace MS Word smart qutoes
$trans = array();
$trans[chr(130)] = '‚';
// Single Low-9 Quotation Mark
$trans[chr(131)] = 'ƒ';
// Latin Small Letter F With Hook
$trans[chr(132)] = '„';
// Double Low-9 Quotation Mark
$trans[chr(133)] = '…';
// Horizontal Ellipsis
$trans[chr(134)] = '†';
// Dagger
$trans[chr(135)] = '‡';
// Double Dagger
$trans[chr(136)] = 'ˆ';
// Modifier Letter Circumflex Accent
$trans[chr(137)] = '‰';
// Per Mille Sign
$trans[chr(138)] = 'Š';
// Latin Capital Letter S With Caron
$trans[chr(139)] = '‹';
// Single Left-Pointing Angle Quotation Mark
$trans[chr(140)] = 'Œ';
// Latin Capital Ligature OE
$trans[chr(145)] = '‘';
// Left Single Quotation Mark
$trans[chr(146)] = '’';
// Right Single Quotation Mark
$trans[chr(147)] = '“';
// Left Double Quotation Mark
$trans[chr(148)] = '”';
// Right Double Quotation Mark
$trans[chr(149)] = '•';
// Bullet
$trans[chr(150)] = '–';
// En Dash
$trans[chr(151)] = '—';
// Em Dash
$trans[chr(152)] = '˜';
// Small Tilde
$trans[chr(153)] = '™';
// Trade Mark Sign
$trans[chr(154)] = 'š';
// Latin Small Letter S With Caron
$trans[chr(155)] = '›';
// Single Right-Pointing Angle Quotation Mark
$trans[chr(156)] = 'œ';
// Latin Small Ligature OE
$trans[chr(159)] = 'Ÿ';
// Latin Capital Letter Y With Diaeresis
$html = strtr($html, $trans);
}
$html = SimplePie_Misc::change_encoding($html, $encoding, 'utf-8');
/*
if (function_exists('iconv')) {
// iconv appears to handle certain character encodings better than mb_convert_encoding
$html = iconv($encoding, 'utf-8', $html);
} else {
$html = mb_convert_encoding($html, 'utf-8', $encoding);
}
*/
}
}
}
return $html;
}
示例11: sanitize
function sanitize($data, $type, $base = '')
{
$data = trim($data);
if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) {
if ($type & SIMPLEPIE_CONSTRUCT_BASE64) {
$data = base64_decode($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_XHTML) {
if ($this->remove_div) {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data);
$data = preg_replace('/<\\/div>$/', '', $data);
} else {
$data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data);
}
}
if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) {
// Strip comments
if ($this->strip_comments) {
$data = SimplePie_Misc::strip_comments($data);
}
// Strip out HTML tags and attributes that might cause various security problems.
// Based on recommendations by Mark Pilgrim at:
// http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely
if ($this->strip_htmltags) {
foreach ($this->strip_htmltags as $tag) {
$pcre = "/<({$tag})" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . "(>(.*)<\\/{$tag}" . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>|(\\/)?>)/siU';
while (preg_match($pcre, $data)) {
$data = preg_replace_callback($pcre, array(&$this, 'do_strip_htmltags'), $data);
}
}
}
if ($this->strip_attributes) {
foreach ($this->strip_attributes as $attrib) {
$data = preg_replace('/ ' . trim($attrib) . '=("|")(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\'|'|<|>|\\+|{|})*("|")/i', '', $data);
$data = preg_replace('/ ' . trim($attrib) . '=(\'|')(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|"|"|<|>|\\+|{|})*(\'|')/i', '', $data);
$data = preg_replace('/ ' . trim($attrib) . '=(\\w|\\s|=|-|:|;|\\/|\\.|\\?|&|,|#|!|\\(|\\)|\\+|{|})*/i', '', $data);
}
}
// Replace relative URLs
$this->base = $base;
foreach ($this->replace_url_attributes as $element => $attribute) {
if ((!is_array($this->strip_htmltags) || !in_array($element, $this->strip_htmltags)) && (!is_array($this->strip_attributes) || !in_array($attribute, $this->strip_attributes))) {
$data = $this->replace_urls($data, $element, $attribute);
}
}
// If image handling (caching, etc.) is enabled, cache and rewrite all the image tags.
if (isset($this->image_handler) && !empty($this->image_handler) && $this->enable_cache) {
$images = SimplePie_Misc::get_element('img', $data);
foreach ($images as $img) {
if (!empty($img['attribs']['src']['data'])) {
$image_url = $img['attribs']['src']['data'];
$cache =& new $this->cache_class($this->cache_location, call_user_func($this->cache_name_function, $image_url), 'spi');
if ($cache->load()) {
$img['attribs']['src']['data'] = $this->image_handler . rawurlencode($img['attribs']['src']['data']);
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
} else {
$file =& new $this->file_class($image_url, $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen);
$headers = $file->headers;
if ($file->success && ($file->status_code == 200 || $file->status_code > 206 && $file->status_code < 300)) {
if (!$cache->save(array('headers' => $file->headers, 'body' => $file->body))) {
trigger_error("{$cache->name} is not writeable", E_USER_WARNING);
}
$img['attribs']['src']['data'] = $this->image_handler . rawurlencode($img['attribs']['src']['data']);
$data = str_replace($img['full'], SimplePie_Misc::element_implode($img), $data);
}
}
}
}
}
// Having (possibly) taken stuff out, there may now be whitespace at the beginning/end of the data
$data = trim($data);
}
if ($type & SIMPLEPIE_CONSTRUCT_IRI) {
$data = SimplePie_Misc::absolutize_url($data, $base);
}
if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) {
$data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8');
}
if ($this->output_encoding != 'UTF-8') {
$data = SimplePie_Misc::change_encoding($data, 'UTF-8', $this->output_encoding);
}
}
return $data;
}