本文整理汇总了PHP中UtfNormal::toNFKD方法的典型用法代码示例。如果您正苦于以下问题:PHP UtfNormal::toNFKD方法的具体用法?PHP UtfNormal::toNFKD怎么用?PHP UtfNormal::toNFKD使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UtfNormal
的用法示例。
在下文中一共展示了UtfNormal::toNFKD方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: checkUnicodeString
/**
* TODO: does too much in one routine, refactor...
* @param $testName
* @return array
*/
public static function checkUnicodeString($testName)
{
global $wgAntiSpoofBlacklist;
# Start with some sanity checking
if (!is_array($wgAntiSpoofBlacklist)) {
throw new MWException('$wgAntiSpoofBlacklist should be an array!');
}
if (!is_string($testName)) {
return array("ERROR", wfMessage('antispoof-badtype')->text());
}
if (strlen($testName) == 0) {
return array("ERROR", wfMessage('antispoof-empty')->text());
}
foreach (self::stringToList($testName) as $char) {
if (in_array($char, $wgAntiSpoofBlacklist)) {
return self::badCharErr('antispoof-blacklisted', $char);
}
}
# Perform Unicode _compatibility_ decomposition
$testName = UtfNormal::toNFKD($testName);
$testChars = self::stringToList($testName);
# Be paranoid: check again, just in case Unicode normalization code changes...
foreach ($testChars as $char) {
if (in_array($char, $wgAntiSpoofBlacklist)) {
return self::badCharErr('antispoof-blacklisted', $char);
}
}
# Check for this: should not happen in any valid Unicode string
if (self::getScriptCode($testChars[0]) == "SCRIPT_COMBINING_MARKS") {
return self::badCharErr('antispoof-combining', $testChars[0]);
}
# Strip all combining characters in order to crudely strip accents
# Note: NFKD normalization should have decomposed all accented chars earlier
$testChars = self::stripScript($testChars, "SCRIPT_COMBINING_MARKS");
$testScripts = array_map(array('AntiSpoof', 'getScriptCode'), $testChars);
$unassigned = array_search("SCRIPT_UNASSIGNED", $testScripts);
if ($unassigned !== False) {
return self::badCharErr('antispoof-unassigned', $testChars[$unassigned]);
}
$deprecated = array_search("SCRIPT_DEPRECATED", $testScripts);
if ($deprecated !== False) {
return self::badCharErr('antispoof-deprecated', $testChars[$deprecated]);
}
$testScripts = array_unique($testScripts);
# We don't mind ASCII punctuation or digits
$testScripts = array_diff($testScripts, array("SCRIPT_ASCII_PUNCTUATION", "SCRIPT_ASCII_DIGITS"));
if (!$testScripts) {
return array("ERROR", wfMessage('antispoof-noletters')->text());
}
if (count($testScripts) > 1 && !self::isAllowedScriptCombination($testScripts)) {
return array("ERROR", wfMessage('antispoof-mixedscripts')->text());
}
# At this point, we should probably check for BiDi violations if they aren't
# caught above...
# Replace characters in confusables set with equivalence chars
$testChars = self::equivString($testChars);
# Do very simple sequence processing: "vv" -> "w", "rn" -> "m"...
# Not exhaustive, but ups the ante...
# Do this _after_ canonicalization: looks weird, but needed for consistency
$testChars = self::mergePairs($testChars, self::equivString(self::stringToList("VV")), self::equivString(self::stringToList("W")));
$testChars = self::mergePairs($testChars, self::equivString(self::stringToList("RN")), self::equivString(self::stringToList("M")));
# Squeeze out all punctuation chars
# TODO: almost the same code occurs twice, refactor into own routine
$testChars = self::stripScript($testChars, "SCRIPT_ASCII_PUNCTUATION");
$testName = self::listToString($testChars);
# Remove all remaining spaces, just in case any have snuck through...
$testName = self::hardjoin(explode(" ", $testName));
# Reduce repeated char sequences to single character
# BUG: TODO: implement this
if (strlen($testName) < 1) {
return array("ERROR", wfMessage('antispoof-tooshort')->text());
}
# Don't ASCIIfy: we assume we are UTF-8 capable on output
# Prepend version string, for futureproofing if this algorithm changes
$testName = "v2:" . $testName;
# And return the canonical version of the name
return array("OK", $testName);
}
示例2: normalize_form_kd_php
function normalize_form_kd_php($c)
{
return UtfNormal::toNFKD($c, "php");
}
示例3: toAscii
/**
* Replaces any non-ASCII character by its linguistically most logical substitution
*
* @access private
* @param string A Shibboleth attribute or other string
* @return string ascii-version of attribute
*/
function toAscii($string)
{
require_once 'include/Unicode/UtfNormal.php';
// Normalize to NFKD.
// This separates letters from combining marks.
// See http://unicode.org/reports/tr15
$string = UtfNormal::toNFKD($string);
// Replace german usages of diaeresis by appending an e
$string = preg_replace('/([aouAOU])\\xcc\\x88/', '\\1e', $string);
// Replace the combined ae character by separated a and e
$string = preg_replace('/\\xc3\\x86/', 'AE', $string);
$string = preg_replace('/\\xc3\\xa6/', 'ae', $string);
// Replace the combined thorn character by th
$string = preg_replace('/\\xc3\\x9e/', 'TH', $string);
$string = preg_replace('/\\xc3\\xbe/', 'th', $string);
// Replace the letter eth by d
$string = preg_replace('/\\xc3\\x90/', 'D', $string);
$string = preg_replace('/\\xc4\\x91/', 'd', $string);
$string = preg_replace('/\\xc4\\x90/', 'D', $string);
// Replace the combined ss character
$string = preg_replace('/\\xc3\\x9f/', 'ss', $string);
// Get rid of everything except the characters a to z and the hyphen
$string = preg_replace('/[^a-zA-Z\\-]/i', '', $string);
return $string;
}