本文整理汇总了PHP中mb_substitute_character函数的典型用法代码示例。如果您正苦于以下问题:PHP mb_substitute_character函数的具体用法?PHP mb_substitute_character怎么用?PHP mb_substitute_character使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了mb_substitute_character函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: clean
public static function clean($var, $charset = NULL)
{
if (!$charset) {
// Use the application character set
$charset = JsonApiApplication::$charset;
}
if (is_array($var) or is_object($var)) {
foreach ($var as $key => $val) {
// Recursion!
$var[UTF8::clean($key)] = UTF8::clean($val);
}
} elseif (is_string($var) and $var !== "") {
// Remove control characters
$var = UTF8::strip_ascii_ctrl($var);
if (!UTF8::is_ascii($var)) {
// Temporarily save the mb_substitute_character() value into a variable
$mb_substitute_character = mb_substitute_character();
// Disable substituting illegal characters with the default '?' character
mb_substitute_character("none");
// convert encoding, this is expensive, used when $var is not ASCII
$var = mb_convert_encoding($var, $charset, $charset);
// Reset mb_substitute_character() value back to the original setting
mb_substitute_character($mb_substitute_character);
}
}
return $var;
}
示例2: initMbstring
static function initMbstring()
{
if (extension_loaded('mbstring')) {
if (((int) ini_get('mbstring.encoding_translation') || in_array(strtolower(ini_get('mbstring.encoding_translation')), array('on', 'yes', 'true'))) && !in_array(strtolower(ini_get('mbstring.http_input')), array('pass', '8bit', 'utf-8'))) {
user_error('php.ini settings: Please disable mbstring.encoding_translation or set mbstring.http_input to "pass"', E_USER_WARNING);
}
if (MB_OVERLOAD_STRING & (int) ini_get('mbstring.func_overload')) {
user_error('php.ini settings: Please disable mbstring.func_overload', E_USER_WARNING);
}
mb_regex_encoding('UTF-8');
ini_set('mbstring.script_encoding', 'pass');
if ('utf-8' !== strtolower(mb_internal_encoding())) {
mb_internal_encoding('UTF-8');
ini_set('mbstring.internal_encoding', 'UTF-8');
}
if ('none' !== strtolower(mb_substitute_character())) {
mb_substitute_character('none');
ini_set('mbstring.substitute_character', 'none');
}
if (!in_array(strtolower(mb_http_output()), array('pass', '8bit'))) {
mb_http_output('pass');
ini_set('mbstring.http_output', 'pass');
}
if (!in_array(strtolower(mb_language()), array('uni', 'neutral'))) {
mb_language('uni');
ini_set('mbstring.language', 'uni');
}
} else {
if (!defined('MB_OVERLOAD_MAIL')) {
extension_loaded('iconv') or static::initIconv();
require __DIR__ . '/Bootup/mbstring.php';
}
}
}
示例3: __construct
function __construct()
{
$this->charsets = array("ASMO-708" => gettext("Arabic"), "BIG5" => gettext("Chinese Traditional"), "CP1026" => gettext("IBM EBCDIC (Turkish Latin-5)"), "cp866" => gettext("Cyrillic (DOS)"), "CP870" => gettext("IBM EBCDIC (Multilingual Latin-2)"), "CISO2022JP" => gettext("Japanese (JIS-Allow 1 byte Kana)"), "DOS-720" => gettext("Arabic (DOS)"), "DOS-862" => gettext("Hebrew (DOS)"), "EBCDIC-CP-US" => gettext("IBM EBCDIC (US-Canada)"), "EUC-CN" => gettext("Chinese Simplified (EUC)"), "EUC-JP" => gettext("Japanese (EUC)"), "EUC-KR" => gettext("Korean (EUC)"), "GB2312" => gettext("Chinese Simplified (GB2312)"), "HZ-GB-2312" => gettext("Chinese Simplified (HZ)"), "IBM437" => gettext("OEM United States"), "IBM737" => gettext("Greek (DOS)"), "IBM775" => gettext("Baltic (DOS)"), "IBM850" => gettext("Western European (DOS)"), "IBM852" => gettext("Central European (DOS)"), "IBM857" => gettext("Turkish (DOS)"), "IBM861" => gettext("Icelandic (DOS)"), "IBM869" => gettext("Greek, Modern (DOS)"), "ISO-2022-JP" => gettext("Japanese (JIS)"), "ISO-2022-JP" => gettext("Japanese (JIS-Allow 1 byte Kana - SO/SI)"), "ISO-2022-KR" => gettext("Korean (ISO)"), "ISO-8859-1" => gettext("Western European (ISO)"), "ISO-8859-15" => gettext("Latin 9 (ISO)"), "ISO-8859-2" => gettext("Central European (ISO)"), "ISO-8859-3" => gettext("Latin 3 (ISO)"), "ISO-8859-4" => gettext("Baltic (ISO)"), "ISO-8859-5" => gettext("Cyrillic (ISO)"), "ISO-8859-6" => gettext("Arabic (ISO)"), "ISO-8859-7" => gettext("Greek (ISO)"), "ISO-8859-8" => gettext("Hebrew (ISO-Visual)"), "ISO-8859-8-i" => gettext("Hebrew (ISO-Logical)"), "ISO-8859-9" => gettext("Turkish (ISO)"), "JOHAB" => gettext("Korean (Johab)"), "KOi8-R" => gettext("Cyrillic (KOI8-R)"), "KOi8-U" => gettext("Cyrillic (KOI8-U)"), "KS_C_5601-1987" => gettext("Korean"), "MACINTOSH" => gettext("Western European (MAC)"), "SHIFT_JIS" => gettext("Japanese (Shift-JIS)"), "UNICODE" => gettext("Unicode"), "UNICODEFFFE" => gettext("Unicode (Big-Endian)"), "US-ASCII" => gettext("US-ASCII"), "UTF-7" => gettext("Unicode (UTF-7)"), "UTF-8" => gettext("Unicode (UTF-8)"), "WINDOWS-1250" => gettext("Central European (Windows)"), "WINDOWS-1251" => gettext("Cyrillic (Windows)"), "WINDOWS-1252" => gettext("Western European (Windows)"), "WINDOWS-1253" => gettext("Greek (Windows)"), "WINDOWS-1254" => gettext("Turkish (Windows)"), "WINDOWS-1255" => gettext("Hebrew (Windows)"), "WINDOWS-1256" => gettext("Arabic (Windows)"), "WINDOWS-1257" => gettext("Baltic (Windows)"), "WINDOWS-1258" => gettext("Vietnamese (Windows)"), "WINDOWS-874" => gettext("Thai (Windows)"));
// prune the list to supported character sets
$this->iconv_sets = array();
$this->mb_sets = array();
if (function_exists('mb_convert_encoding')) {
@mb_substitute_character('none');
if (function_exists('mb_list_encodings')) {
$list = mb_list_encodings();
} else {
$list = array("pass", "auto", "byte2be", "byte2le", "byte4be", "byte4le", "BASE64", "UUENCODE", "HTML-ENTITIES", "Quoted-Printable", "7bit", "8bit", "UCS-4", "UCS-4BE", "UCS-4LE", "UCS-2", "UCS-2BE", "UCS-2LE", "UTF-32", "UTF-32BE", "UTF-32LE", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-8", "UTF-7", "UTF7-IMAP", "ASCII", "EUC-JP", "SJIS", "eucJP-win", "SJIS-win", "CP51932", "JIS", "ISO-2022-JP", "ISO-2022-JP-MS", "Windows-1252", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", "EUC-CN", "CP936", "HZ", "EUC-TW", "BIG-5", "EUC-KR", "UHC", "ISO-2022-KR", "Windows-1251", "CP866", "KOI8-R", "ArmSCII-8");
}
foreach ($this->charsets as $key => $encoding) {
if (in_array($key, $list)) {
$this->mb_sets[$key] = $encoding;
}
}
}
if (function_exists('iconv')) {
foreach ($this->charsets as $key => $encoding) {
if (@iconv("UTF-8", $key, "UTF-8") !== false) {
$this->iconv_sets[$key] = $encoding;
}
}
}
}
示例4: smarty_modifier_xoops_html_purifier
function smarty_modifier_xoops_html_purifier($html, $ecoding = null, $doctype = null)
{
require_once XOOPS_LIBRARY_PATH . '/htmlpurifier/library/HTMLPurifier.auto.php';
$encoding = $encoding ? $encoding : _CHARSET;
$doctypeArr = array("HTML 4.01 Strict", "HTML 4.01 Transitional", "XHTML 1.0 Strict", "XHTML 1.0 Transitional", "XHTML 1.1");
$config = HTMLPurifier_Config::createDefault();
if (in_array($doctype, $doctypeArr)) {
$config->set('HTML.Doctype', $doctype);
}
if ($_conv = $encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) {
$_substitute = mb_substitute_character();
mb_substitute_character('none');
$html = mb_convert_encoding($html, 'UTF-8', $encoding);
$config->set('Core.Encoding', 'UTF-8');
} else {
$config->set('Core.Encoding', $encoding);
}
$purifier = new HTMLPurifier($config);
$html = $purifier->purify($html);
if ($_conv) {
$html = mb_convert_encoding($html, $encoding, 'UTF-8');
mb_substitute_character($_substitute);
}
return $html;
}
示例5: inputFilter
function inputFilter($str)
{
if (is_array($str)) {
return array_map(array($this, "inputFilter"), $str);
}
// 入力された絵文字はUnicodeで保存するためSJIS-win
$str = mb_convert_kana($str, 'KVrns', 'SJIS-win');
$sjismap = array();
$utf8map = array();
if ($this->is_ezweb()) {
$sjismap = array(0xe234, 0xe272, 0xa0c, 0xffff, 0xe273, 0xe2ef, 0xa0d, 0xffff, 0xe2f0, 0xe32e, 0xa50, 0xffff, 0xe32f, 0xe342, 0xa51, 0xffff, 0xe468, 0xe4a6, 0xad8, 0xffff, 0xe4a7, 0xe523, 0xad9, 0xffff, 0xe524, 0xe562, 0xb1c, 0xffff, 0xe563, 0xe5df, 0xb1d, 0xffff);
$utf8map = array(0xec40, 0xecfc, 0x0, 0xffff, 0xed40, 0xed93, 0x0, 0xffff, 0xef40, 0xeffc, 0x0, 0xffff, 0xf040, 0xf0fc, 0x0, 0xffff);
$str = mb_encode_numericentity($str, $sjismap, 'SJIS-win');
$str = mb_convert_encoding($str, "UTF-8", "SJIS-win");
$str = mb_decode_numericentity($str, $utf8map, 'UTF-8');
} elseif ($this->is_softbank()) {
$backup = mb_substitute_character();
mb_substitute_character('long');
$str = mb_convert_encoding($str, 'UTF-8', 'SJIS');
mb_substitute_character($backup);
$pattern = '/BAD\\+([0-9A-F]{4})/';
$callback = array($this, '_softbank_fallbackSjisToUtf8');
$str = preg_replace_callback($pattern, $callback, $str);
} else {
$str = mb_convert_encoding($str, "UTF-8", "SJIS-win");
}
$str = trim($str);
//$str = h($str);
return $str;
}
示例6: u2b
function u2b($str, $charset = 'BIG5')
{
mb_regex_encoding($charset);
//宣告 要進行 regex 的多位元編碼轉換格式 為 $charset
mb_substitute_character('long');
//宣告 缺碼字改以U+16進位碼為標記取代
$str = mb_convert_encoding($str, $charset, 'UTF-8');
$str = preg_replace('/U\\+([0-9A-F]{4})/e', '"&#".intval("\\1",16).";"', $str);
//將U+16進位碼標記轉換為UnicodeHTML碼
return $str;
}
示例7: __construct
/**
* コンストラクタ
* @param string $cat カテゴリ
*/
public function __construct($cat)
{
mb_internal_encoding(Todo::ENCODING);
mb_regex_encoding(Todo::ENCODING);
ini_set('default_charset', Todo::ENCODING);
//HTTPヘッダーでの文字コード指定
ini_set('mbstring.strict_detection', true);
mb_substitute_character(0x5f);
//変換できない文字は"_"にする
$this->cat = $this->_encode($cat);
}
示例8: __construct
function __construct($data)
{
libxml_use_internal_errors(true);
libxml_clear_errors();
$this->doc = new DOMDocument();
$this->doc->loadXML($data);
mb_substitute_character("none");
$error = libxml_get_last_error();
// libxml compiled without iconv?
if ($error && $error->code == 32) {
$data = $this->normalize_encoding($data);
if ($data) {
libxml_clear_errors();
$this->doc = new DOMDocument();
$this->doc->loadXML($data);
$error = libxml_get_last_error();
}
}
// some terrible invalid unicode entity?
if ($error) {
foreach (libxml_get_errors() as $err) {
if ($err->code == 9) {
// if the source feed is not in utf8, next conversion will fail
$data = $this->normalize_encoding($data);
// remove dangling bytes
$data = mb_convert_encoding($data, 'UTF-8', 'UTF-8');
// apparently not all UTF-8 characters are valid for XML
$data = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}]+/u', ' ', $data);
if ($data) {
libxml_clear_errors();
$this->doc = new DOMDocument();
$this->doc->loadXML($data);
$error = libxml_get_last_error();
}
break;
}
}
}
if ($error) {
foreach (libxml_get_errors() as $error) {
if ($error->level == LIBXML_ERR_FATAL) {
if (!isset($this->error)) {
//currently only the first error is reported
$this->error = $this->format_error($error);
}
$this->libxml_errors[] = $this->format_error($error);
}
}
}
libxml_clear_errors();
$this->items = array();
}
示例9: __construct
/**
* Class constructor
*
* Determines if UTF-8 support is to be enabled.
*
* @return void
*/
public function __construct()
{
$charset = strtoupper(Config::get('main')->charset);
ini_set('default_charset', $charset);
/*
* Configure mbstring and/or iconv if they are enabled
* and set MB_ENABLED and ICONV_ENABLED constants, so
* that we don't repeatedly do extension_loaded() or
* function_exists() calls.
*/
if (extension_loaded('mbstring')) {
define('MB_ENABLED', TRUE);
// mbstring.internal_encoding is deprecated starting with PHP 5.6
// and it's usage triggers E_DEPRECATED messages.
if (!Core::isPHP('5.6')) {
@ini_set('mbstring.internal_encoding', $charset);
} else {
mb_internal_encoding($charset);
}
// This is required for mb_convert_encoding() to strip invalid characters.
// That's utilized by Utf8, but it's also done for consistency with iconv.
mb_substitute_character('none');
} else {
define('MB_ENABLED', FALSE);
}
// There's an ICONV_IMPL constant, but the PHP manual says that using
// iconv's predefined constants is "strongly discouraged".
if (extension_loaded('iconv')) {
define('ICONV_ENABLED', TRUE);
// iconv.internal_encoding is deprecated starting with PHP 5.6
// and it's usage triggers E_DEPRECATED messages.
if (!Core::isPHP(5.6)) {
@ini_set('iconv.internal_encoding', $charset);
} else {
ini_set('default_encoding', $charset);
}
} else {
define('ICONV_ENABLED', FALSE);
}
if (Core::isPHP('5.6')) {
ini_set('php.internal_encoding', $charset);
}
if (defined('PREG_BAD_UTF8_ERROR') && (ICONV_ENABLED === TRUE or MB_ENABLED === TRUE) && strtoupper($charset) === 'UTF-8') {
define('UTF8_ENABLED', TRUE);
Logger::log('UTF-8 Support Enabled');
} else {
define('UTF8_ENABLED', FALSE);
Logger::log('UTF-8 Support Disabled');
}
}
示例10: __construct
/**
* AbstractDiff constructor.
*
* @param string $oldText
* @param string $newText
* @param string $encoding
* @param null|array $specialCaseTags
* @param null|bool $groupDiffs
*/
public function __construct($oldText, $newText, $encoding = 'UTF-8', $specialCaseTags = null, $groupDiffs = null)
{
mb_substitute_character(0x20);
$this->setConfig(HtmlDiffConfig::create()->setEncoding($encoding));
if ($specialCaseTags !== null) {
$this->config->setSpecialCaseTags($specialCaseTags);
}
if ($groupDiffs !== null) {
$this->config->setGroupDiffs($groupDiffs);
}
$this->oldText = $oldText;
$this->newText = $newText;
$this->content = '';
}
示例11: format
public function format($response)
{
$this->inputCharset = @$response->data['inputCharset'] ?: Yii::$app->charset;
$this->outputCharset = @$response->data['outputCharset'] ?: Yii::$app->charset;
// 代替文字
$substitute = new Resource(mb_substitute_character(), function ($old) {
mb_substitute_character($old);
});
mb_substitute_character(0x3013);
$tmpfile = tmpfile();
foreach ($response->data['rows'] as $row) {
fwrite($tmpfile, $this->formatRow($row) . "\r\n");
}
fseek($tmpfile, 0, SEEK_SET);
$response->content = null;
$response->stream = $tmpfile;
}
示例12: getSource
/**
* @return string
*
* @throws Backend\SourceFileException
*/
public function getSource()
{
$code = file_get_contents($this->fileInfo->getPathname());
$info = new \finfo();
$encoding = $info->file($this->fileInfo, FILEINFO_MIME_ENCODING);
if (strtolower($encoding) != 'utf-8') {
try {
$code = iconv($encoding, 'UTF-8//TRANSLIT', $code);
} catch (\ErrorException $e) {
throw new SourceFileException('Encoding error - conversion to UTF-8 failed', SourceFileException::BadEncoding, $e);
}
}
// This is a workaround to filter out leftover invalid UTF-8 byte sets
// even if the source looks like it's UTF-8 already
mb_substitute_character('none');
$cleanCode = mb_convert_encoding($code, 'UTF-8', 'UTF-8');
if ($cleanCode != $code) {
throw new SourceFileException('Encoding error - invalid UTF-8 bytes found', SourceFileException::InvalidDataBytes);
}
return $cleanCode;
}
示例13: init
/**
* Perform initialization required for the string wrapper library.
* @return null
*/
static function init()
{
$clientCharset = strtolower_codesafe(Config::getVar('i18n', 'client_charset'));
// Check if mbstring is installed (requires PHP >= 4.3.0)
if (String::hasMBString()) {
// mbstring routines are available
define('ENABLE_MBSTRING', true);
// Set up required ini settings for mbstring
// FIXME Do any other mbstring settings need to be set?
mb_internal_encoding($clientCharset);
mb_substitute_character('63');
// question mark
}
// Define modifier to be used in regexp_* routines
// FIXME Should non-UTF-8 encodings be supported with mbstring?
if ($clientCharset == 'utf-8' && String::hasPCREUTF8()) {
define('PCRE_UTF8', 'u');
} else {
define('PCRE_UTF8', '');
}
}
示例14: safeUTF8
function safeUTF8(&$text)
{
//when `mb_convert_encoding` is used below, we want it to use the recommended Unicode replacement character
//rather than just "?" <stackoverflow.com/a/13695364>
mb_substitute_character(0xfffd);
//what's given could be any imaginable encoding, normalise it into UTF-8 though it may not yet be web-safe.
//adapted from <php.net/mb_check_encoding#89286>, with thanks to Zegnat. this works by importing the current byte
//stream into UTF-32 which has enough scope to contain any other encoding, then downsizing in to UTF-8
$text = mb_convert_encoding(mb_convert_encoding($text, 'UTF-32', 'UTF-8'), 'UTF-8', 'UTF-32');
//remove Unicode bytes unsafe for XML: <www.w3.org/TR/REC-xml/#charsets>
$text = preg_replace('/[^\\x{0009}\\x{000a}\\x{000d}\\x{0020}-\\x{D7FF}\\x{E000}-\\x{FFFD}\\x{10000}-\\x{10FFFF}]+/u', '', $text);
//remove "compatibility characters" and "permanently undefined Unicode characters",
//see note proceeding: <www.w3.org/TR/REC-xml/#charsets>
$text = preg_replace('/[\\x{007f}-\\x{0084}\\x{0086}-\\x{009f}\\x{FDD0}-\\x{FDEF}' . '\\x{200E}\\x{200F}\\x{202A}-\\x{202E}' . '\\x{1FFFE}\\x{1FFFF}\\x{2FFFE}\\x{2FFFF}\\x{3FFFE}\\x{3FFFF}\\x{4FFFE}\\x{4FFFF}' . '\\x{5FFFE}\\x{5FFFF}\\x{6FFFE}\\x{6FFFF}\\x{7FFFE}\\x{7FFFF}\\x{8FFFE}\\x{8FFFF}' . '\\x{9FFFE}\\x{9FFFF}\\x{AFFFE}\\x{AFFFF}\\x{BFFFE}\\x{BFFFF}\\x{CFFFE}\\x{CFFFF}' . '\\x{DFFFE}\\x{DFFFF}\\x{EFFFE}\\x{EFFFF}\\x{FFFFE}\\x{FFFFF}\\x{10FFFE}\\x{10FFFF}]+/u', '', $text);
//TODO: strip invalid byte-sequences
//see: http://stackoverflow.com/a/13695364
//Some interesting references:
//http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805
//we still need to return, despite the by-reference parameter because use of anonymous variables and functions
//for the call will not be by-reference
return $text;
}
示例15: init
public static function init()
{
self::$utf8validator = (bool) extension_loaded('utf8validator');
mb_internal_encoding('UTF-8');
mb_language('uni');
mb_regex_encoding('UTF-8');
mb_detect_order(array('UTF-8', 'ISO-8859-1'));
mb_substitute_character(0xfffd);
MCached::connect();
$trans = MCached::get(self::TRANSKEY);
if ($trans === MCached::NO_RESULT) {
$win = "€" . implode('', range("‚", "Œ")) . "Ž" . implode('', range("‘", "œ")) . implode('', range("ž", "ÿ"));
$win_iso = "�����";
$iso = implode('', range("€", "ÿ"));
$winlen = strlen($win);
$winisolen = strlen($win_iso);
$isolen = strlen($iso);
$trans = array('iso_to_utf8' => array(), 'win_to_utf8' => array(), 'from_utf8' => array());
for ($i = 0; $i < $isolen; $i++) {
$utf8 = mb_convert_encoding($iso[$i], 'UTF-8', 'ISO-8859-1');
$trans['iso_to_utf8'][$iso[$i]] = $utf8;
$trans['from_utf8'][$utf8] = $iso[$i];
}
for ($i = 0; $i < $winlen; $i++) {
$utf8 = mb_convert_encoding($win[$i], 'UTF-8', 'Windows-1252');
$trans['win_to_utf8'][$win[$i]] = $utf8;
$trans['from_utf8'][$utf8] = $win[$i];
}
for ($i = 0; $i < $winisolen; $i++) {
$utf8 = mb_convert_encoding($win_iso[$i], 'UTF-8', 'ISO-8859-1');
$trans['win_to_utf8'][$win_iso[$i]] = $utf8;
}
MCached::add(self::TRANSKEY, $trans, 86400);
}
self::$trans_table = $trans;
}