当前位置: 首页>>代码示例>>PHP>>正文


PHP codepointToUtf8函数代码示例

本文整理汇总了PHP中codepointToUtf8函数的典型用法代码示例。如果您正苦于以下问题:PHP codepointToUtf8函数的具体用法?PHP codepointToUtf8怎么用?PHP codepointToUtf8使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了codepointToUtf8函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。

示例1: hexSequenceToUtf8

/**
 * Take a series of space-separated hexadecimal numbers representing
 * Unicode code points and return a UTF-8 string composed of those
 * characters. Used by UTF-8 data generation and testing routines.
 *
 * @param $sequence String
 * @return String
 * @private
 */
function hexSequenceToUtf8($sequence)
{
    $utf = '';
    foreach (explode(' ', $sequence) as $hex) {
        $n = hexdec($hex);
        $utf .= codepointToUtf8($n);
    }
    return $utf;
}
开发者ID:arlendotcn,项目名称:ilias,代码行数:18,代码来源:UtfNormalUtil.php

示例2: onCategoryMultisortSortkeys_getRadical

 function onCategoryMultisortSortkeys_getRadical($data, $chcp)
 {
     if (!array_key_exists($chcp, $data->radicalStrokeCounts)) {
         return '';
     } else {
         list($radicalId, $rest) = $data->radicalStrokeCounts[$chcp];
         $radicalCp = $data->radicals[$radicalId];
         return sprintf('%s%03d', codepointToUtf8($radicalCp), $rest);
     }
 }
开发者ID:realsoc,项目名称:mediawiki-extensions,代码行数:10,代码来源:CategoryMultisortChinese.hooks.php

示例3: XtestAllChars

 /**
  * This test is *very* expensive!
  * @todo document
  */
 function XtestAllChars()
 {
     $rep = UTF8_REPLACEMENT;
     for ($i = 0x0; $i < UNICODE_MAX; $i++) {
         $char = codepointToUtf8($i);
         $clean = UtfNormal::cleanUp($char);
         $x = sprintf("%04X", $i);
         if ($i % 0x1000 == 0) {
             echo "U+{$x}\n";
         }
         if ($i == 0x9 || $i == 0xa || $i == 0xd || $i > 0x1f && $i < UNICODE_SURROGATE_FIRST || $i > UNICODE_SURROGATE_LAST && $i < 0xfffe || $i > 0xffff && $i <= UNICODE_MAX) {
             if (isset(UtfNormal::$utfCanonicalComp[$char]) || isset(UtfNormal::$utfCanonicalDecomp[$char])) {
                 $comp = UtfNormal::NFC($char);
                 $this->assertEquals(bin2hex($comp), bin2hex($clean), "U+{$x} should be decomposed");
             } else {
                 $this->assertEquals(bin2hex($char), bin2hex($clean), "U+{$x} should be intact");
             }
         } else {
             $this->assertEquals(bin2hex($rep), bin2hex($clean), $x);
         }
     }
 }
开发者ID:rploaiza,项目名称:dbpedia-latinoamerica,代码行数:26,代码来源:CleanUpTest.php

示例4: fclose

        print "{$total} ";
    }
}
fclose($in);
$ok = reportResults($total, $success, $failure) && $ok;
$in = fopen("UnicodeData.txt", "rt");
if (!$in) {
    print "Can't open UnicodeData.txt for reading.\n";
    print "If necessary, fetch this file from the internet:\n";
    print "http://www.unicode.org/Public/UNIDATA/UnicodeData.txt\n";
    exit(-1);
}
print "Now testing invariants...\n";
while (false !== ($line = fgets($in))) {
    $cols = explode(';', $line);
    $char = codepointToUtf8(hexdec($cols[0]));
    $desc = $cols[0] . ": " . $cols[1];
    if ($char < " " || $char >= UTF8_SURROGATE_FIRST && $char <= UTF8_SURROGATE_LAST) {
        # Can't check NULL with the ICU plugin, as null bytes fail in C land.
        # Skip other control characters, as we strip them for XML safety.
        # Surrogates are illegal on their own or in UTF-8, ignore.
        continue;
    }
    if (empty($testedChars[$char])) {
        $total++;
        if (testInvariant($normalizer, $char, $desc)) {
            $success++;
        } else {
            $failure++;
        }
        if ($total % 100 == 0) {
开发者ID:arlendotcn,项目名称:ilias,代码行数:31,代码来源:UtfNormalTest.php

示例5: generateFirstChars


//.........这里部分代码省略.........
         if (!isset($this->weights[$cp])) {
             // Non-printable, ignore
             continue;
         }
         foreach (StringUtils::explode('[', $allWeights) as $weightStr) {
             preg_match_all('/[*.]([0-9A-F]+)/', $weightStr, $m);
             if (!empty($m[1])) {
                 if ($m[1][0] !== '0000') {
                     $primary .= '.' . $m[1][0];
                 }
                 if ($m[1][2] !== '0000') {
                     $tertiary .= '.' . $m[1][2];
                 }
             }
         }
         $this->weights[$cp] = $primary;
         if ($tertiary === '.0008' || $tertiary === '.000E') {
             $goodTertiaryChars[$cp] = true;
         }
     }
     fclose($file);
     // Identify groups of characters with the same primary weight
     $this->groups = array();
     asort($this->weights, SORT_STRING);
     $prevWeight = reset($this->weights);
     $group = array();
     foreach ($this->weights as $cp => $weight) {
         if ($weight !== $prevWeight) {
             $this->groups[$prevWeight] = $group;
             $prevWeight = $weight;
             if (isset($this->groups[$weight])) {
                 $group = $this->groups[$weight];
             } else {
                 $group = array();
             }
         }
         $group[] = $cp;
     }
     if ($group) {
         $this->groups[$prevWeight] = $group;
     }
     // If one character has a given primary weight sequence, and a second
     // character has a longer primary weight sequence with an initial
     // portion equal to the first character, then remove the second
     // character. This avoids having characters like U+A732 (double A)
     // polluting the basic latin sort area.
     foreach ($this->groups as $weight => $group) {
         if (preg_match('/(\\.[0-9A-F]*)\\./', $weight, $m)) {
             if (isset($this->groups[$m[1]])) {
                 unset($this->groups[$weight]);
             }
         }
     }
     ksort($this->groups, SORT_STRING);
     // Identify the header character in each group
     $headerChars = array();
     $prevChar = "";
     $tertiaryCollator = new Collator('root');
     $primaryCollator = new Collator('root');
     $primaryCollator->setStrength(Collator::PRIMARY);
     $numOutOfOrder = 0;
     foreach ($this->groups as $weight => $group) {
         $uncomposedChars = array();
         $goodChars = array();
         foreach ($group as $cp) {
             if (isset($goodTertiaryChars[$cp])) {
                 $goodChars[] = $cp;
             }
             if (!isset($this->mappedChars[$cp])) {
                 $uncomposedChars[] = $cp;
             }
         }
         $x = array_intersect($goodChars, $uncomposedChars);
         if (!$x) {
             $x = $uncomposedChars;
             if (!$x) {
                 $x = $group;
             }
         }
         // Use ICU to pick the lowest sorting character in the selection
         $tertiaryCollator->sort($x);
         $cp = $x[0];
         $char = codepointToUtf8($cp);
         $headerChars[] = $char;
         if ($primaryCollator->compare($char, $prevChar) <= 0) {
             $numOutOfOrder++;
             /*
             				printf( "Out of order: U+%05X > U+%05X\n",
             					utf8ToCodepoint( $prevChar ),
             					utf8ToCodepoint( $char ) );
             */
         }
         $prevChar = $char;
         if ($this->debugOutFile) {
             fwrite($this->debugOutFile, sprintf("%05X %s %s (%s)\n", $cp, $weight, $char, implode(' ', array_map('codepointToUtf8', $group))));
         }
     }
     print "Out of order: {$numOutOfOrder} / " . count($headerChars) . "\n";
     fwrite($outFile, serialize($headerChars));
 }
开发者ID:yusufchang,项目名称:app,代码行数:101,代码来源:generateCollationData.php

示例6: prepareWindows1252

 /**
  * Prepare a conversion array for converting Windows Code Page 1252 to
  * UTF-8. This should provide proper conversion of text that was miscoded
  * as Windows-1252 by naughty user-agents, and doesn't rely on an outside
  * iconv library.
  *
  * @return array
  * @access private
  */
 function prepareWindows1252()
 {
     # Mappings from:
     # http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
     static $cp1252 = array(0x80 => 0x20ac, 0x81 => 0xfffd, 0x82 => 0x201a, 0x83 => 0x192, 0x84 => 0x201e, 0x85 => 0x2026, 0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x2c6, 0x89 => 0x2030, 0x8a => 0x160, 0x8b => 0x2039, 0x8c => 0x152, 0x8d => 0xfffd, 0x8e => 0x17d, 0x8f => 0xfffd, 0x90 => 0xfffd, 0x91 => 0x2018, 0x92 => 0x2019, 0x93 => 0x201c, 0x94 => 0x201d, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014, 0x98 => 0x2dc, 0x99 => 0x2122, 0x9a => 0x161, 0x9b => 0x203a, 0x9c => 0x153, 0x9d => 0xfffd, 0x9e => 0x17e, 0x9f => 0x178);
     $pairs = array();
     for ($i = 0; $i < 0x100; $i++) {
         $unicode = isset($cp1252[$i]) ? $cp1252[$i] : $i;
         $pairs[chr($i)] = codepointToUtf8($unicode);
     }
     return $pairs;
 }
开发者ID:eFFemeer,项目名称:seizamcore,代码行数:21,代码来源:upgrade1_5.php

示例7: array

}
$wikiUpperChars = array();
$wikiLowerChars = array();
print "Reading character definitions...\n";
while (false !== ($line = fgets($in))) {
    $columns = explode(';', $line);
    $codepoint = $columns[0];
    $name = $columns[1];
    $simpleUpper = $columns[12];
    $simpleLower = $columns[13];
    $source = codepointToUtf8(hexdec($codepoint));
    if ($simpleUpper) {
        $wikiUpperChars[$source] = codepointToUtf8(hexdec($simpleUpper));
    }
    if ($simpleLower) {
        $wikiLowerChars[$source] = codepointToUtf8(hexdec($simpleLower));
    }
}
fclose($in);
$out = fopen("Utf8Case.php", "wt");
if ($out) {
    $outUpperChars = escapeArray($wikiUpperChars);
    $outLowerChars = escapeArray($wikiLowerChars);
    $outdata = "<" . "?php\n/**\n * Simple 1:1 upper/lowercase switching arrays for utf-8 text.\n * Won't get context-sensitive things yet.\n *\n * Hack for bugs in ucfirst() and company\n *\n * These are pulled from memcached if possible, as this is faster than filling\n * up a big array manually.\n *\n * @file\n * @ingroup Language\n */\n\n/**\n * Translation array to get upper case character\n */\n\$wikiUpperChars = {$outUpperChars};\n\n/**\n * Translation array to get lower case character\n */\n\$wikiLowerChars = {$outLowerChars};\n";
    fputs($out, $outdata);
    fclose($out);
    print "Wrote out Utf8Case.php\n";
} else {
    print "Can't create file Utf8Case.php\n";
    exit(-1);
}
开发者ID:nischayn22,项目名称:mediawiki-core,代码行数:31,代码来源:Utf8CaseGenerate.php

示例8: getItemParsedCallback

 /**
  * callback for replacement of unicode notations
  * @param	array	preg matches
  * @return	string	replacement string
  */
 private function getItemParsedCallback($matches)
 {
     require_once "include/Unicode/UtfNormalUtil.php";
     return codepointToUtf8(hexdec(substr($matches[0], 2)));
 }
开发者ID:arlendotcn,项目名称:ilias,代码行数:10,代码来源:ilCharSelectorConfig.php

示例9: execute

    public function execute()
    {
        $dir = __DIR__;
        $endl = "\n";
        $lines = file("{$dir}/equivset.in");
        if (!$lines) {
            $this->error("Unable to open equivset.in\n", 1);
        }
        $setsFile = fopen("{$dir}/equivset.txt", 'w');
        if (!$setsFile) {
            $this->error("Unable to open equivset.txt for writing\n", 1);
        }
        fwrite($setsFile, <<<EOT
# This file is generated by generateEquivset.php
# It shows sets of equivalent characters, one set per line, with characters
# separated by whitespace. This file is not used by MediaWiki, rather it is
# intended as a human-readable version of equivset.php, for debugging and
# review purposes.

EOT
);
        $outputFile = fopen("{$dir}/equivset.php", 'w');
        if (!$outputFile) {
            $this->error("Unable to open equivset.php for writing\n", 1);
        }
        fwrite($outputFile, "<?" . "php{$endl}" . <<<EOT
# This file is generated by generateEquivset.php
# It contains a map of characters, encoded in UTF-8, such that running strtr()
# on a string with this map will cause confusable characters to be reduced to
# a canonical representation. The same array is also available in serialized
# form, in equivset.ser.

EOT
);
        $serializedFile = fopen("{$dir}/equivset.ser", 'w');
        if (!$serializedFile) {
            $this->error("Unable to open equivset.ser for writing\n", 1);
        }
        # \s matches \xa0 in non-unicode mode, which is not what we want
        # So we need to make our own whitespace class
        $sp = '[\\ \\t]';
        $lineNum = 0;
        $setsByChar = array();
        $sets = array();
        $exitStatus = 0;
        foreach ($lines as $line) {
            ++$lineNum;
            $mapToEmpty = false;
            # Whether the line ends with a null character
            $mapToEmpty = strpos($line, "") === strlen($line) - 2;
            $line = trim($line);
            # Filter comments
            if (!$line || $line[0] == '#') {
                continue;
            }
            # Process line
            if (!preg_match("/^(?P<hexleft> [A-F0-9]+) {$sp}+ (?P<charleft> .+?) {$sp}+ => {$sp}+ (?:(?P<hexright> [A-F0-9]+) {$sp}+|) (?P<charright> .+?) {$sp}* (?: \\#.*|) \$ /x", $line, $m)) {
                $this->output("Error: invalid entry at line {$lineNum}: {$line}\n");
                $exitStatus = 1;
                continue;
            }
            $error = false;
            if ($mapToEmpty) {
                $m['charright'] = '';
            } else {
                if (codepointToUtf8(hexdec($m['hexleft'])) != $m['charleft']) {
                    $actual = utf8ToCodepoint($m['charleft']);
                    if ($actual === false) {
                        $this->output("Bytes: " . strlen($m['charleft']) . "\n");
                        $this->output(bin2hex($line) . "\n");
                        $hexForm = bin2hex($m['charleft']);
                        $this->output("Invalid UTF-8 character \"{$m['charleft']}\" ({$hexForm}) at line {$lineNum}: {$line}\n");
                    } else {
                        $this->output("Error: left number ({$m['hexleft']}) does not match left character ({$actual}) " . "at line {$lineNum}: {$line}\n");
                    }
                    $error = true;
                }
                if (!empty($m['hexright']) && codepointToUtf8(hexdec($m['hexright'])) != $m['charright']) {
                    $actual = utf8ToCodepoint($m['charright']);
                    if ($actual === false) {
                        $hexForm = bin2hex($m['charright']);
                        $this->output("Invalid UTF-8 character \"{$m['charleft']}\" ({$hexForm}) at line {$lineNum}: {$line}\n");
                    } else {
                        $this->output("Error: right number ({$m['hexright']}) does not match right character ({$actual}) " . "at line {$lineNum}: {$line}\n");
                    }
                    $error = true;
                }
                if ($error) {
                    $exitStatus = 1;
                    continue;
                }
            }
            # Find the set for the right character, add a new one if necessary
            if (isset($setsByChar[$m['charright']])) {
                $setName = $setsByChar[$m['charright']];
            } else {
                # New set
                $setName = $m['charright'];
                $sets[$setName] = array($m['charright']);
                $setsByChar[$setName] = $setName;
//.........这里部分代码省略.........
开发者ID:jasonthebomb,项目名称:mediawiki-extensions-AntiSpoof,代码行数:101,代码来源:generateEquivset.php

示例10: badCharErr

 private static function badCharErr($msgId, $point)
 {
     $symbol = codepointToUtf8($point);
     // Combining marks are combined with the previous character. If abusing character is a
     // combining mark, prepend it with space to show them correctly.
     if (self::getScriptCode($point) == "SCRIPT_COMBINING_MARKS") {
         $symbol = ' ' . $symbol;
     }
     $code = sprintf('U+%04X', $point);
     if (preg_match('/\\A\\p{C}\\z/u', $symbol)) {
         $char = wfMessage('antispoof-bad-char-non-printable', $code)->text();
     } else {
         $char = wfMessage('antispoof-bad-char', $symbol, $code)->text();
     }
     return array("ERROR", wfMessage($msgId, $char)->text());
 }
开发者ID:jasonthebomb,项目名称:mediawiki-extensions-AntiSpoof,代码行数:16,代码来源:AntiSpoof_body.php

示例11: js_unescape

/**
 * Function converts an Javascript escaped string back into a string with
 * specified charset (default is UTF-8).
 * Modified function from http://pure-essence.net/stuff/code/utf8RawUrlDecode.phps
 *
 * @param $source String escaped with Javascript's escape() function
 * @param $iconv_to String destination character set will be used as second parameter
 * in the iconv function. Default is UTF-8.
 * @return string
 */
function js_unescape($source, $iconv_to = 'UTF-8')
{
    $decodedStr = '';
    $pos = 0;
    $len = strlen($source);
    while ($pos < $len) {
        $charAt = substr($source, $pos, 1);
        if ($charAt == '%') {
            $pos++;
            $charAt = substr($source, $pos, 1);
            if ($charAt == 'u') {
                // we got a unicode character
                $pos++;
                $unicodeHexVal = substr($source, $pos, 4);
                $unicode = hexdec($unicodeHexVal);
                $decodedStr .= codepointToUtf8($unicode);
                $pos += 4;
            } else {
                // we have an escaped ascii character
                $hexVal = substr($source, $pos, 2);
                $decodedStr .= chr(hexdec($hexVal));
                $pos += 2;
            }
        } else {
            $decodedStr .= $charAt;
            $pos++;
        }
    }
    if ($iconv_to != "UTF-8") {
        $decodedStr = iconv("utf-8", $iconv_to, $decodedStr);
    }
    return $decodedStr;
}
开发者ID:realsoc,项目名称:mediawiki-extensions,代码行数:43,代码来源:FCKeditorSajax.body.php

示例12: listToString

 /**
  * @param $list array
  * @return string
  */
 public static function listToString($list)
 {
     $out = '';
     foreach ($list as $cp) {
         $out .= codepointToUtf8($cp);
     }
     return $out;
 }
开发者ID:eFFemeer,项目名称:seizamcore,代码行数:12,代码来源:AntiSpoof_body.php

示例13: hexUnicodeToUtf8

function hexUnicodeToUtf8($hexcp)
{
    return @codepointToUtf8(@hexDec($hexcp));
}
开发者ID:rkania,项目名称:GS3,代码行数:4,代码来源:gs_utf_normal.php

示例14: decodeEntity

 /**
  * If the named entity is defined in the HTML 4.0/XHTML 1.0 DTD,
  * return the UTF-8 encoding of that character. Otherwise, returns
  * pseudo-entity source (eg &foo;)
  *
  * @param string $name
  * @return string
  */
 static function decodeEntity($name)
 {
     global $wgHtmlEntities, $wgHtmlEntityAliases;
     if (isset($wgHtmlEntityAliases[$name])) {
         $name = $wgHtmlEntityAliases[$name];
     }
     if (isset($wgHtmlEntities[$name])) {
         return codepointToUtf8($wgHtmlEntities[$name]);
     } else {
         return "&{$name};";
     }
 }
开发者ID:mined-gatech,项目名称:hubzero-cms,代码行数:20,代码来源:sanitizer.php

示例15: decodeChar

 /**
  * Return UTF-8 string for a codepoint if that is a valid
  * character reference, otherwise U+FFFD REPLACEMENT CHARACTER.
  * @param $codepoint Integer
  * @return String
  */
 static function decodeChar($codepoint)
 {
     if (SGString::validateCodepoint($codepoint)) {
         return codepointToUtf8($codepoint);
     } else {
         return UTF8_REPLACEMENT;
     }
 }
开发者ID:nichtich,项目名称:sgloss,代码行数:14,代码来源:SGString.php


注:本文中的codepointToUtf8函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。