本文整理汇总了PHP中UtfNormal::cleanUp方法的典型用法代码示例。如果您正苦于以下问题:PHP UtfNormal::cleanUp方法的具体用法?PHP UtfNormal::cleanUp怎么用?PHP UtfNormal::cleanUp使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UtfNormal
的用法示例。
在下文中一共展示了UtfNormal::cleanUp方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的PHP代码示例。
示例1: processPage
function processPage($row)
{
global $wgContLang;
$source = $row->img_name;
if ($source == '') {
// Ye olde empty rows. Just kill them.
$this->killRow($source);
return $this->progress(1);
}
$cleaned = $source;
// About half of old bad image names have percent-codes
$cleaned = rawurldecode($cleaned);
// Some are old latin-1
$cleaned = $wgContLang->checkTitleEncoding($cleaned);
// Many of remainder look like non-normalized unicode
$cleaned = UtfNormal::cleanUp($cleaned);
$title = Title::makeTitleSafe(NS_IMAGE, $cleaned);
if (is_null($title)) {
$this->log("page {$source} ({$cleaned}) is illegal.");
$safe = $this->buildSafeTitle($cleaned);
$this->pokeFile($source, $safe);
return $this->progress(1);
}
if ($title->getDbKey() !== $source) {
$munged = $title->getDbKey();
$this->log("page {$source} ({$munged}) doesn't match self.");
$this->pokeFile($source, $munged);
return $this->progress(1);
}
$this->progress(0);
}
示例2: elementClean
/**
* Format an XML element as with self::element(), but run text through the
* UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
* is passed.
*
* @param $element String:
* @param $attribs Array: Name=>value pairs. Values will be escaped.
* @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
* @return string
*/
public static function elementClean($element, $attribs = array(), $contents = '')
{
if ($attribs) {
$attribs = array_map(array('UtfNormal', 'cleanUp'), $attribs);
}
if ($contents) {
$contents = UtfNormal::cleanUp($contents);
}
return self::element($element, $attribs, $contents);
}
示例3: elementClean
/**
* Format an XML element as with self::element(), but run text through the
* UtfNormal::cleanUp() validator first to ensure that no invalid UTF-8
* is passed.
*
* @param $element String:
* @param $attribs Array: Name=>value pairs. Values will be escaped.
* @param $contents String: NULL to make an open tag only; '' for a contentless closed tag (default)
* @return string
*/
public static function elementClean($element, $attribs = array(), $contents = '')
{
if ($attribs) {
$attribs = array_map(array('UtfNormal', 'cleanUp'), $attribs);
}
if ($contents) {
wfProfileIn(__METHOD__ . '-norm');
$contents = UtfNormal::cleanUp($contents);
wfProfileOut(__METHOD__ . '-norm');
}
return self::element($element, $attribs, $contents);
}
示例4: processPage
function processPage($row)
{
$current = Title::makeTitle($row->wl_namespace, $row->wl_title);
$display = $current->getPrefixedText();
$verified = UtfNormal::cleanUp($display);
$title = Title::newFromText($verified);
if ($row->wl_user == 0 || is_null($title) || !$title->equals($current)) {
$this->log("invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")");
$this->removeWatch($row);
return $this->progress(1);
}
$this->progress(0);
}
示例5: formatDiffRow
public static function formatDiffRow($title, $oldid, $newid, $timestamp, $comment, $actiontext = '')
{
global $wgFeedDiffCutoff, $wgContLang, $wgUser;
wfProfileIn(__FUNCTION__);
$skin = $wgUser->getSkin();
# log enties
$completeText = '<p>' . implode(' ', array_filter(array($actiontext, $skin->formatComment($comment)))) . "</p>\n";
//NOTE: Check permissions for anonymous users, not current user.
// No "privileged" version should end up in the cache.
// Most feed readers will not log in anway.
$anon = new User();
$accErrors = $title->getUserPermissionsErrors('read', $anon, true);
if ($title->getNamespace() >= 0 && !$accErrors) {
if ($oldid) {
wfProfileIn(__FUNCTION__ . "-dodiff");
#$diffText = $de->getDiff( wfMsg( 'revisionasof',
# $wgContLang->timeanddate( $timestamp ) ),
# wfMsg( 'currentrev' ) );
// Don't bother generating the diff if we won't be able to show it
if ($wgFeedDiffCutoff > 0) {
$de = new DifferenceEngine($title, $oldid, $newid);
$diffText = $de->getDiff(wfMsg('previousrevision'), wfMsg('revisionasof', $wgContLang->timeanddate($timestamp)));
}
if (strlen($diffText) > $wgFeedDiffCutoff || $wgFeedDiffCutoff <= 0) {
// Omit large diffs
$diffLink = $title->escapeFullUrl('diff=' . $newid . '&oldid=' . $oldid);
$diffText = '<a href="' . $diffLink . '">' . htmlspecialchars(wfMsgForContent('showdiff')) . '</a>';
} elseif ($diffText === false) {
// Error in diff engine, probably a missing revision
$diffText = "<p>Can't load revision {$newid}</p>";
} else {
// Diff output fine, clean up any illegal UTF-8
$diffText = UtfNormal::cleanUp($diffText);
$diffText = self::applyDiffStyle($diffText);
}
wfProfileOut(__FUNCTION__ . "-dodiff");
} else {
$rev = Revision::newFromId($newid);
if (is_null($rev)) {
$newtext = '';
} else {
$newtext = $rev->getText();
}
$diffText = '<p><b>' . wfMsg('newpage') . '</b></p>' . '<div>' . nl2br(htmlspecialchars($newtext)) . '</div>';
}
$completeText .= $diffText;
}
wfProfileOut(__FUNCTION__);
return $completeText;
}
示例6: normalizePageName
/**
* Returns the normalized form of the given page title, using the normalization rules of the given site.
* If the given title is a redirect, the redirect weill be resolved and the redirect target is returned.
*
* @note : This actually makes an API request to the remote site, so beware that this function is slow and depends
* on an external service.
*
* @note : If MW_PHPUNIT_TEST is defined, the call to the external site is skipped, and the title
* is normalized using the local normalization rules as implemented by the Title class.
*
* @see Site::normalizePageName
*
* @since 1.21
*
* @param string $pageName
*
* @return string
* @throws MWException
*/
public function normalizePageName($pageName)
{
// Check if we have strings as arguments.
if (!is_string($pageName)) {
throw new MWException('$pageName must be a string');
}
// Go on call the external site
if (defined('MW_PHPUNIT_TEST')) {
// If the code is under test, don't call out to other sites, just normalize locally.
// Note: this may cause results to be inconsistent with the actual normalization used by the respective remote site!
$t = Title::newFromText($pageName);
return $t->getPrefixedText();
} else {
// Make sure the string is normalized into NFC (due to the bug 40017)
// but do nothing to the whitespaces, that should work appropriately.
// @see https://bugzilla.wikimedia.org/show_bug.cgi?id=40017
$pageName = UtfNormal::cleanUp($pageName);
// Build the args for the specific call
$args = array('action' => 'query', 'prop' => 'info', 'redirects' => true, 'converttitles' => true, 'format' => 'json', 'titles' => $pageName);
$url = $this->getFileUrl('api.php') . '?' . wfArrayToCgi($args);
// Go on call the external site
//@todo: we need a good way to specify a timeout here.
$ret = Http::get($url);
}
if ($ret === false) {
wfDebugLog("MediaWikiSite", "call to external site failed: {$url}");
return false;
}
$data = FormatJson::decode($ret, true);
if (!is_array($data)) {
wfDebugLog("MediaWikiSite", "call to <{$url}> returned bad json: " . $ret);
return false;
}
$page = static::extractPageRecord($data, $pageName);
if (isset($page['missing'])) {
wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for a missing page title! " . $ret);
return false;
}
if (isset($page['invalid'])) {
wfDebugLog("MediaWikiSite", "call to <{$url}> returned a marker for an invalid page title! " . $ret);
return false;
}
if (!isset($page['title'])) {
wfDebugLog("MediaWikiSite", "call to <{$url}> did not return a page title! " . $ret);
return false;
}
return $page['title'];
}
示例7: processPage
function processPage($row)
{
$current = Title::makeTitle($row->page_namespace, $row->page_title);
$display = $current->getPrefixedText();
$verified = UtfNormal::cleanUp($display);
$title = Title::newFromText($verified);
if (is_null($title)) {
$this->log("page {$row->page_id} ({$display}) is illegal.");
$this->moveIllegalPage($row);
return $this->progress(1);
}
if (!$title->equals($current)) {
$this->log("page {$row->page_id} ({$display}) doesn't match self.");
$this->moveInconsistentPage($row, $title);
return $this->progress(1);
}
$this->progress(0);
}
示例8: makeInputText
function makeInputText($max = false)
{
if ($max === false) {
$max = $this->maxLength;
}
$length = mt_rand($this->minLength, $max);
$s = '';
for ($i = 0; $i < $length; $i++) {
$hairIndex = mt_rand(0, count($this->hairs) - 1);
$s .= $this->hairs[$hairIndex];
}
// Send through the UTF-8 normaliser
// This resolves a few differences between the old preprocessor and the
// XML-based one, which doesn't like illegals and converts line endings.
// It's done by the MW UI, so it's a reasonably legitimate thing to do.
$s = UtfNormal::cleanUp($s);
return $s;
}
示例9: processPage
function processPage($row)
{
$current = Title::makeTitle($row->page_namespace, $row->page_title);
$display = $current->getPrefixedText();
$verified = UtfNormal::cleanUp($display);
$title = Title::newFromText($verified);
if (!is_null($title) && $title->equals($current) && $title->canExist()) {
return $this->progress(0);
// all is fine
}
if ($row->page_namespace == NS_FILE && $this->fileExists($row->page_title)) {
$this->log("file {$row->page_title} needs cleanup, please run cleanupImages.php.");
return $this->progress(0);
} elseif (is_null($title)) {
$this->log("page {$row->page_id} ({$display}) is illegal.");
$this->moveIllegalPage($row);
return $this->progress(1);
} else {
$this->log("page {$row->page_id} ({$display}) doesn't match self.");
$this->moveInconsistentPage($row, $title);
return $this->progress(1);
}
}
示例10: parserFunctionHook
/**
* Function executed by use of {{#infoboxbuilder:}} parser function.
* It gets the code from InfoboxBuilder.lua and creates new module object
* from it. The module is then invoked and the result is returned.
* @param Parser $parser Parser object
* @param PPFrame $frame PPFrame object
* @param array $args Array of arguments passed from $frame object
* @return string A string returned by InfoboxBuilder.lua
*/
public static function parserFunctionHook(\Parser $parser, $frame, $args)
{
wfProfileIn(__METHOD__);
try {
/**
* Add the registered SCSS with the default theme
*/
$parser->getOutput()->addModuleStyles('ext.wikia.InfoboxBuilder');
$engine = \Scribunto::getParserEngine($parser);
unset($args[0]);
$childFrame = $frame->newChild($args, $parser->getTitle(), 1);
$moduleText = file_get_contents(__DIR__ . '/includes/lua/InfoboxBuilder.lua');
$module = new \Scribunto_LuaModule($engine, $moduleText, 'InfoboxBuilder');
$result = $module->invoke('builder', $childFrame);
$result = \UtfNormal::cleanUp(strval($result));
wfProfileOut(__METHOD__);
return $result;
} catch (\ScribuntoException $e) {
$trace = $e->getScriptTraceHtml(array('msgOptions' => array('content')));
$html = \Html::element('p', array(), $e->getMessage());
if ($trace !== false) {
$html .= \Html::element('p', array(), wfMessage('scribunto-common-backtrace')->inContentLanguage()->text()) . $trace;
}
$out = $parser->getOutput();
if (!isset($out->scribunto_errors)) {
$out->addOutputHook('ScribuntoError');
$out->scribunto_errors = array();
$parser->addTrackingCategory('scribunto-common-error-category');
}
$out->scribunto_errors[] = $html;
$id = 'mw-scribunto-error-' . (count($out->scribunto_errors) - 1);
$parserError = wfMessage('scribunto-parser-error')->inContentLanguage()->text() . $parser->insertStripItem('<!--' . htmlspecialchars($e->getMessage()) . '-->');
wfProfileOut(__METHOD__);
// #iferror-compatible error element
return "<strong class=\"error\"><span class=\"scribunto-error\" id=\"{$id}\">" . $parserError . "</span></strong>";
}
}
示例11: invokeHook
/**
* Hook function for {{#invoke:module|func}}
*
* @param $parser Parser
* @param $frame PPFrame
* @param $args array
* @throws MWException
* @throws ScribuntoException
* @return string
*/
public static function invokeHook(&$parser, $frame, $args)
{
if (!@constant(get_class($frame) . '::SUPPORTS_INDEX_OFFSET')) {
throw new MWException('Scribunto needs MediaWiki 1.20 or later (Preprocessor::SUPPORTS_INDEX_OFFSET)');
}
wfProfileIn(__METHOD__);
try {
if (count($args) < 2) {
throw new ScribuntoException('scribunto-common-nofunction');
}
$moduleName = trim($frame->expand($args[0]));
$engine = Scribunto::getParserEngine($parser);
$title = Title::makeTitleSafe(NS_MODULE, $moduleName);
if (!$title) {
throw new ScribuntoException('scribunto-common-nosuchmodule');
}
$module = $engine->fetchModuleFromParser($title);
if (!$module) {
throw new ScribuntoException('scribunto-common-nosuchmodule');
}
$functionName = trim($frame->expand($args[1]));
unset($args[0]);
unset($args[1]);
$childFrame = $frame->newChild($args, $title, 1);
$result = $module->invoke($functionName, $childFrame);
$result = UtfNormal::cleanUp(strval($result));
wfProfileOut(__METHOD__);
return $result;
} catch (ScribuntoException $e) {
$trace = $e->getScriptTraceHtml(array('msgOptions' => array('content')));
$html = Html::element('p', array(), $e->getMessage());
if ($trace !== false) {
$html .= Html::element('p', array(), wfMessage('scribunto-common-backtrace')->inContentLanguage()->text()) . $trace;
}
$out = $parser->getOutput();
if (!isset($out->scribunto_errors)) {
$out->addOutputHook('ScribuntoError');
$out->scribunto_errors = array();
$parser->addTrackingCategory('scribunto-common-error-category');
}
$out->scribunto_errors[] = $html;
$id = 'mw-scribunto-error-' . (count($out->scribunto_errors) - 1);
$parserError = wfMessage('scribunto-parser-error')->inContentLanguage()->text() . $parser->insertStripItem('<!--' . htmlspecialchars($e->getMessage()) . '-->');
wfProfileOut(__METHOD__);
// #iferror-compatible error element
return "<strong class=\"error\"><span class=\"scribunto-error\" id=\"{$id}\">" . $parserError . "</span></strong>";
}
}
示例12: normalize
/**
* Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
* also cleans up certain backwards-compatible sequences, converting them
* to the modern Unicode equivalent.
*
* This is language-specific for performance reasons only.
*/
function normalize($s)
{
return UtfNormal::cleanUp($s);
}
示例13: Diff
$diffs = new Diff($ota, $nta);
$formatter = new TableDiffFormatter();
$funky = $formatter->format($diffs);
preg_match_all('/<span class="diffchange">(.*?)<\\/span>/', $funky, $matches);
foreach ($matches[1] as $bit) {
$hex = bin2hex($bit);
echo "\t{$hex}\n";
}
}
$size = 16;
$n = 0;
while (true) {
$n++;
echo "{$n}\n";
$str = randomString($size, true);
$clean = UtfNormal::cleanUp($str);
$norm = donorm($str);
echo strlen($clean) . ", " . strlen($norm);
if ($clean == $norm) {
echo " (match)\n";
} else {
echo " (FAIL)\n";
echo "\traw: " . bin2hex($str) . "\n" . "\tphp: " . bin2hex($clean) . "\n" . "\ticu: " . bin2hex($norm) . "\n";
echo "\n\tdiffs:\n";
showDiffs($clean, $norm);
die;
}
$str = '';
$clean = '';
$norm = '';
}
示例14: preprocessToObj
/**
* Preprocess some wikitext and return the document tree.
* This is the ghost of Parser::replace_variables().
*
* @param string $text the text to parse
* @param $flags Integer: bitwise combination of:
* Parser::PTD_FOR_INCLUSION Handle "<noinclude>" and "<includeonly>" as if the text is being
* included. Default is to assume a direct page view.
*
* The generated DOM tree must depend only on the input text and the flags.
* The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a regression of bug 4899.
*
* Any flag added to the $flags parameter here, or any other parameter liable to cause a
* change in the DOM tree for a given text, must be passed through the section identifier
* in the section edit link and thus back to extractSections().
*
* The output of this function is currently only cached in process memory, but a persistent
* cache may be implemented at a later date which takes further advantage of these strict
* dependency requirements.
*
* @throws MWException
* @return PPNode_DOM
*/
function preprocessToObj($text, $flags = 0)
{
wfProfileIn(__METHOD__);
global $wgMemc, $wgPreprocessorCacheThreshold;
$xml = false;
$cacheable = $wgPreprocessorCacheThreshold !== false && strlen($text) > $wgPreprocessorCacheThreshold;
if ($cacheable) {
wfProfileIn(__METHOD__ . '-cacheable');
$cacheKey = wfMemcKey('preprocess-xml', md5($text), $flags);
$cacheValue = $wgMemc->get($cacheKey);
if ($cacheValue) {
$version = substr($cacheValue, 0, 8);
if (intval($version) == self::CACHE_VERSION) {
$xml = substr($cacheValue, 8);
// From the cache
wfDebugLog("Preprocessor", "Loaded preprocessor XML from memcached (key {$cacheKey})");
}
}
if ($xml === false) {
wfProfileIn(__METHOD__ . '-cache-miss');
$xml = $this->preprocessToXml($text, $flags);
$cacheValue = sprintf("%08d", self::CACHE_VERSION) . $xml;
$wgMemc->set($cacheKey, $cacheValue, 86400);
wfProfileOut(__METHOD__ . '-cache-miss');
wfDebugLog("Preprocessor", "Saved preprocessor XML to memcached (key {$cacheKey})");
}
} else {
$xml = $this->preprocessToXml($text, $flags);
}
// Fail if the number of elements exceeds acceptable limits
// Do not attempt to generate the DOM
$this->parser->mGeneratedPPNodeCount += substr_count($xml, '<');
$max = $this->parser->mOptions->getMaxGeneratedPPNodeCount();
if ($this->parser->mGeneratedPPNodeCount > $max) {
if ($cacheable) {
wfProfileOut(__METHOD__ . '-cacheable');
}
wfProfileOut(__METHOD__);
throw new MWException(__METHOD__ . ': generated node count limit exceeded');
}
wfProfileIn(__METHOD__ . '-loadXML');
$dom = new DOMDocument();
wfSuppressWarnings();
$result = $dom->loadXML($xml);
wfRestoreWarnings();
if (!$result) {
// Try running the XML through UtfNormal to get rid of invalid characters
$xml = UtfNormal::cleanUp($xml);
// 1 << 19 == XML_PARSE_HUGE, needed so newer versions of libxml2 don't barf when the XML is >256 levels deep
$result = $dom->loadXML($xml, 1 << 19);
}
if ($result) {
$obj = new PPNode_DOM($dom->documentElement);
}
wfProfileOut(__METHOD__ . '-loadXML');
if ($cacheable) {
wfProfileOut(__METHOD__ . '-cacheable');
}
wfProfileOut(__METHOD__);
if (!$result) {
throw new MWException(__METHOD__ . ' generated invalid XML');
}
return $obj;
}
示例15: formatDiffRow
/**
* Really format a diff for the newsfeed
*
* @param $title Title object
* @param $oldid Integer: old revision's id
* @param $newid Integer: new revision's id
* @param $timestamp Integer: new revision's timestamp
* @param $comment String: new revision's comment
* @param $actiontext String: text of the action; in case of log event
* @return String
*/
public static function formatDiffRow($title, $oldid, $newid, $timestamp, $comment, $actiontext = '')
{
global $wgFeedDiffCutoff, $wgLang;
wfProfileIn(__METHOD__);
# log enties
$completeText = '<p>' . implode(' ', array_filter(array($actiontext, Linker::formatComment($comment)))) . "</p>\n";
// NOTE: Check permissions for anonymous users, not current user.
// No "privileged" version should end up in the cache.
// Most feed readers will not log in anway.
$anon = new User();
$accErrors = $title->getUserPermissionsErrors('read', $anon, true);
// Can't diff special pages, unreadable pages or pages with no new revision
// to compare against: just return the text.
if ($title->getNamespace() < 0 || $accErrors || !$newid) {
wfProfileOut(__METHOD__);
return $completeText;
}
if ($oldid) {
wfProfileIn(__METHOD__ . "-dodiff");
#$diffText = $de->getDiff( wfMessage( 'revisionasof',
# $wgLang->timeanddate( $timestamp ),
# $wgLang->date( $timestamp ),
# $wgLang->time( $timestamp ) )->text(),
# wfMessage( 'currentrev' )->text() );
$diffText = '';
// Don't bother generating the diff if we won't be able to show it
if ($wgFeedDiffCutoff > 0) {
$rev = Revision::newFromId($oldid);
if (!$rev) {
$diffText = false;
} else {
$context = clone RequestContext::getMain();
$context->setTitle($title);
$contentHandler = $rev->getContentHandler();
$de = $contentHandler->createDifferenceEngine($context, $oldid, $newid);
$diffText = $de->getDiff(wfMessage('previousrevision')->text(), wfMessage('revisionasof', $wgLang->timeanddate($timestamp), $wgLang->date($timestamp), $wgLang->time($timestamp))->text());
}
}
if ($wgFeedDiffCutoff <= 0 || strlen($diffText) > $wgFeedDiffCutoff) {
// Omit large diffs
$diffText = self::getDiffLink($title, $newid, $oldid);
} elseif ($diffText === false) {
// Error in diff engine, probably a missing revision
$diffText = "<p>Can't load revision {$newid}</p>";
} else {
// Diff output fine, clean up any illegal UTF-8
$diffText = UtfNormal::cleanUp($diffText);
$diffText = self::applyDiffStyle($diffText);
}
wfProfileOut(__METHOD__ . "-dodiff");
} else {
$rev = Revision::newFromId($newid);
if ($wgFeedDiffCutoff <= 0 || is_null($rev)) {
$newContent = ContentHandler::getForTitle($title)->makeEmptyContent();
} else {
$newContent = $rev->getContent();
}
if ($newContent instanceof TextContent) {
// only textual content has a "source view".
$text = $newContent->getNativeData();
if ($wgFeedDiffCutoff <= 0 || strlen($text) > $wgFeedDiffCutoff) {
$html = null;
} else {
$html = nl2br(htmlspecialchars($text));
}
} else {
//XXX: we could get an HTML representation of the content via getParserOutput, but that may
// contain JS magic and generally may not be suitable for inclusion in a feed.
// Perhaps Content should have a getDescriptiveHtml method and/or a getSourceText method.
//Compare also ApiFeedContributions::feedItemDesc
$html = null;
}
if ($html === null) {
// Omit large new page diffs, bug 29110
// Also use diff link for non-textual content
$diffText = self::getDiffLink($title, $newid);
} else {
$diffText = '<p><b>' . wfMessage('newpage')->text() . '</b></p>' . '<div>' . $html . '</div>';
}
}
$completeText .= $diffText;
wfProfileOut(__METHOD__);
return $completeText;
}