本文整理汇总了PHP中HTMLPurifier_Encoder::cleanUTF8方法的典型用法代码示例。如果您正苦于以下问题:PHP HTMLPurifier_Encoder::cleanUTF8方法的具体用法?PHP HTMLPurifier_Encoder::cleanUTF8怎么用?PHP HTMLPurifier_Encoder::cleanUTF8使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类HTMLPurifier_Encoder
的用法示例。
在下文中一共展示了HTMLPurifier_Encoder::cleanUTF8方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: assertCleanUTF8
public function assertCleanUTF8($string, $expect = null)
{
if ($expect === null) {
$expect = $string;
}
$this->assertIdentical(HTMLPurifier_Encoder::cleanUTF8($string), $expect, 'iconv: %s');
$this->assertIdentical(HTMLPurifier_Encoder::cleanUTF8($string, true), $expect, 'PHP: %s');
}
示例2: normalize
/**
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
* @param string $html HTML.
* @param HTMLPurifier_Config $config
* @param HTMLPurifier_Context $context
* @return string
* @todo Consider making protected
*/
public function normalize($html, $config, $context)
{
// normalize newlines to \n
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
// escape CDATA
$html = $this->escapeCDATA($html);
$html = $this->removeIEConditional($html);
// extract body from document if applicable
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
}
// expand entities that aren't the big five
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
// clean into wellformed UTF-8 string for an SGML context: this has
// to be done after entity expansion because the entities sometimes
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
// if processing instructions are to removed, remove them now
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\\?.+?\\?>#s', '', $html);
}
return $html;
}
示例3: normalize
/**
* Takes a piece of HTML and normalizes it by converting entities, fixing
* encoding, extracting bits, and other good stuff.
*/
function normalize($html, $config, &$context)
{
// extract body from document if applicable
if ($config->get('Core', 'AcceptFullDocuments')) {
$html = $this->extractBody($html);
}
// normalize newlines to \n
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
if ($config->get('HTML', 'Trusted')) {
// escape convoluted CDATA
$html = $this->escapeCommentedCDATA($html);
}
// escape CDATA
$html = $this->escapeCDATA($html);
// expand entities that aren't the big five
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
// clean into wellformed UTF-8 string for an SGML context: this has
// to be done after entity expansion because the entities sometimes
// represent non-SGML characters (horror, horror!)
$html = HTMLPurifier_Encoder::cleanUTF8($html);
return $html;
}
示例4: validate
public function validate($string, $config, $context)
{
static $generic_names = array('serif' => true, 'sans-serif' => true, 'monospace' => true, 'fantasy' => true, 'cursive' => true);
// assume that no font names contain commas in them
$fonts = explode(',', $string);
$final = '';
foreach ($fonts as $font) {
$font = trim($font);
if ($font === '') {
continue;
}
// match a generic name
if (isset($generic_names[$font])) {
$final .= $font . ', ';
continue;
}
// match a quoted name
if ($font[0] === '"' || $font[0] === "'") {
$length = strlen($font);
if ($length <= 2) {
continue;
}
$quote = $font[0];
if ($font[$length - 1] !== $quote) {
continue;
}
$font = substr($font, 1, $length - 2);
$new_font = '';
for ($i = 0, $c = strlen($font); $i < $c; $i++) {
if ($font[$i] === '\\') {
$i++;
if ($i >= $c) {
$new_font .= '\\';
break;
}
if (ctype_xdigit($font[$i])) {
$code = $font[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($font[$i])) {
break;
}
$code .= $font[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
continue;
}
$new_font .= $char;
if ($i < $c && trim($font[$i]) !== '') {
$i--;
}
continue;
}
if ($font[$i] === "\n") {
continue;
}
}
$new_font .= $font[$i];
}
$font = $new_font;
}
// $font is a pure representation of the font name
if (ctype_alnum($font) && $font !== '') {
// very simple font, allow it in unharmed
$final .= $font . ', ';
continue;
}
// complicated font, requires quoting
// armor single quotes and new lines
$font = str_replace("\\", "\\\\", $font);
$font = str_replace("'", "\\'", $font);
$final .= "'{$font}', ";
}
$final = rtrim($final, ', ');
if ($final === '') {
return false;
}
return $final;
}
示例5: escapeHTML
function escapeHTML($string)
{
$string = HTMLPurifier_Encoder::cleanUTF8($string);
$string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8');
return $string;
}
示例6: expandCSSEscape
/**
* Parses a possibly escaped CSS string and returns the "pure"
* version of it.
*/
protected function expandCSSEscape($string)
{
// flexibly parse it
$ret = '';
for ($i = 0, $c = strlen($string); $i < $c; $i++) {
if ($string[$i] === '\\') {
$i++;
if ($i >= $c) {
$ret .= '\\';
break;
}
if (ctype_xdigit($string[$i])) {
$code = $string[$i];
for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) {
if (!ctype_xdigit($string[$i])) {
break;
}
$code .= $string[$i];
}
// We have to be extremely careful when adding
// new characters, to make sure we're not breaking
// the encoding.
$char = HTMLPurifier_Encoder::unichr(hexdec($code));
if (HTMLPurifier_Encoder::cleanUTF8($char) === '') {
continue;
}
$ret .= $char;
if ($i < $c && trim($string[$i]) !== '') {
$i--;
}
continue;
}
if ($string[$i] === "\n") {
continue;
}
}
$ret .= $string[$i];
}
return $ret;
}
示例7: convertToUTF8
/**
* Attempts to convert a string to UTF-8 and clean any non-valid UTF-8 characters.
*
* @param $string
*
* @return bool|string
*/
public static function convertToUTF8($string)
{
// Don't wrap in a class_exists in case the server already has it's own version of HTMLPurifier and they have
// open_basedir restrictions
require_once Craft::getPathOfAlias('system.vendors.htmlpurifier') . '/HTMLPurifier.standalone.php';
// If it's already a UTF8 string, just clean and return it
if (static::isUTF8($string)) {
return \HTMLPurifier_Encoder::cleanUTF8($string);
}
// Otherwise set HTMLPurifier to the actual string encoding
$config = \HTMLPurifier_Config::createDefault();
$config->set('Core.Encoding', static::getEncoding($string));
// Clean it
$string = \HTMLPurifier_Encoder::cleanUTF8($string);
// Convert it to UTF8 if possible
if (static::checkForIconv()) {
$string = \HTMLPurifier_Encoder::convertToUTF8($string, $config, null);
} else {
$encoding = static::getEncoding($string);
$string = mb_convert_encoding($string, 'utf-8', $encoding);
}
return $string;
}
示例8: normalize
public function normalize($html, $config, $context)
{
if ($config->get('Core.NormalizeNewlines')) {
$html = str_replace("\r\n", "\n", $html);
$html = str_replace("\r", "\n", $html);
}
if ($config->get('HTML.Trusted')) {
$html = $this->escapeCommentedCDATA($html);
}
$html = $this->escapeCDATA($html);
$html = $this->removeIEConditional($html);
if ($config->get('Core.ConvertDocumentToFragment')) {
$e = false;
if ($config->get('Core.CollectErrors')) {
$e =& $context->get('ErrorCollector');
}
$new_html = $this->extractBody($html);
if ($e && $new_html != $html) {
$e->send(E_WARNING, 'Lexer: Extracted body');
}
$html = $new_html;
}
$html = $this->_entity_parser->substituteNonSpecialEntities($html);
$html = HTMLPurifier_Encoder::cleanUTF8($html);
if ($config->get('Core.RemoveProcessingInstructions')) {
$html = preg_replace('#<\\?.+?\\?>#s', '', $html);
}
return $html;
}
示例9: iconv
function &search_index($data)
{
// Be sure we will parse UTF-8 data
if (function_exists('mb_check_encoding') && function_exists('iconv') && function_exists('mb_detect_encoding') && mb_check_encoding($data, 'UTF-8')) {
$data = iconv(mb_detect_encoding($data), 'UTF-8//TRANSLIT', $data);
}
// Clean the UTF-8 string using HTML Purifier
@(require_once 'lib/HTMLPurifier.auto.php');
@(require_once 'HTMLPurifier/Encoder.php');
if (class_exists('HTMLPurifier_Encoder')) {
$utf8encoder = new HTMLPurifier_Encoder();
$data = $utf8encoder->cleanUTF8($data);
unset($utf8encoder);
}
// Remove remaining HTML numeric entities
if (function_exists('mb_decode_numericentity')) {
if (!function_exists('utf8_entity_decode')) {
function utf8_entity_decode($entity)
{
$convmap = array(0x0, 0x10000, 0, 0xfffff);
return mb_decode_numericentity($entity, $convmap, 'UTF-8');
}
}
$data = preg_replace('/&#\\d{2,5};/ue', "utf8_entity_decode('\$0')", $data);
$data = preg_replace('/&#x([a-fA-F0-7]{2,8});/ue', "utf8_entity_decode('&#'.hexdec('\$1').';')", $data);
}
// Lowerize
$data = function_exists('mb_convert_case') ? mb_convert_case($data, MB_CASE_LOWER, 'UTF-8') : strtolower($data);
// Convert punctuations to spaces
$data = preg_replace('/[\\pP\\pZ\\pS]/u', ' ', $data);
if ($data != '') {
// Split into words (do NOT use the split function that doesn't correctly handle some characters !)
$sstrings = preg_split('/\\s+/u', $data, -1, PREG_SPLIT_NO_EMPTY);
foreach ($sstrings as $value) {
// Keep only alpha-num words
if (preg_match('/^[\\pL\\pN]+$/u', $value)) {
if (isset($words[$value])) {
$words[$value]++;
// count words
} else {
$words[$value] = 1;
}
}
}
}
return $words;
}