本文整理汇总了C++中UnicodeSet::contains方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet::contains方法的具体用法?C++ UnicodeSet::contains怎么用?C++ UnicodeSet::contains使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeSet
的用法示例。
在下文中一共展示了UnicodeSet::contains方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: next
/*
* Return the next break, counting words and spaces.
*/
int32_t SpaceBreakIterator::next()
{
if (fDone) {
return BreakIterator::DONE;
}
int32_t nextBreak;
do {
nextBreak = fBreakIter->next();
if (nextBreak == BreakIterator::DONE) {
fDone = TRUE;
return BreakIterator::DONE;
}
}
while(nextBreak > 0 && fComplexContext.contains(fText[nextBreak-1])
&& fComplexContext.contains(fText[nextBreak]));
int32_t result = nextBreak - fSpaceCount;
if (nextBreak < fTextCount) {
if (fText[nextBreak] == 0x0020 /*Unicode::isSpaceChar(fText[nextBreak])*/) {
fSpaceCount += fBreakIter->next() - nextBreak;
}
}
fWordCount += 1;
return result;
}
示例2: initPinyinBounds
void AlphabeticIndex::initPinyinBounds(const Collator *col, UErrorCode &status) {
{
Mutex m;
if (PINYIN_LOWER_BOUNDS != NULL) {
return;
}
}
UnicodeSet *colSet = col->getTailoredSet(status);
if (U_FAILURE(status) || colSet == NULL) {
delete colSet;
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
return;
}
UBool useLongTables = colSet->contains(probeCharInLong);
delete colSet;
{
Mutex m;
if (useLongTables) {
PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG;
HACK_PINYIN_LOOKUP = &HACK_PINYIN_LOOKUP_LONG;
} else {
PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT;
HACK_PINYIN_LOOKUP = &HACK_PINYIN_LOOKUP_SHORT;
}
}
}
示例3: assertInSet
void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, UChar32 cp) {
// If this test case fails, add the specified code point to the corresponding set in
// UnicodeSetStaticCache.java and numparse_unisets.cpp
assertTrue(
localeName + UnicodeString(u" ") + UnicodeString(cp) + UnicodeString(u" is missing in ") +
setName, set.contains(cp));
}
示例4: fontContainsCharacter
static bool fontContainsCharacter(const FontPlatformData* fontData,
const wchar_t* family, UChar32 character)
{
// FIXME: For non-BMP characters, GetFontUnicodeRanges is of
// no use. We have to read directly from the cmap table of a font.
// Return true for now.
if (character > 0xFFFF)
return true;
// This cache is just leaked on shutdown.
static FontCmapCache* fontCmapCache = 0;
if (!fontCmapCache)
fontCmapCache = new FontCmapCache;
HashMap<const wchar_t*, UnicodeSet*>::iterator it = fontCmapCache->find(family);
if (it != fontCmapCache->end())
return it->second->contains(character);
HFONT hfont = fontData->hfont();
HDC hdc = GetDC(0);
HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(hdc, hfont));
int count = GetFontUnicodeRanges(hdc, 0);
if (count == 0 && ChromiumBridge::ensureFontLoaded(hfont))
count = GetFontUnicodeRanges(hdc, 0);
if (count == 0) {
ASSERT_NOT_REACHED();
SelectObject(hdc, oldFont);
ReleaseDC(0, hdc);
return true;
}
static Vector<char, 512> glyphsetBuffer;
glyphsetBuffer.resize(GetFontUnicodeRanges(hdc, 0));
GLYPHSET* glyphset = reinterpret_cast<GLYPHSET*>(glyphsetBuffer.data());
// In addition, refering to the OS/2 table and converting the codepage list
// to the coverage map might be faster.
count = GetFontUnicodeRanges(hdc, glyphset);
ASSERT(count > 0);
SelectObject(hdc, oldFont);
ReleaseDC(0, hdc);
// FIXME: consider doing either of the following two:
// 1) port back ICU 4.0's faster look-up code for UnicodeSet
// 2) port Mozilla's CompressedCharMap or gfxSparseBitset
unsigned i = 0;
UnicodeSet* cmap = new UnicodeSet;
while (i < glyphset->cRanges) {
WCHAR start = glyphset->ranges[i].wcLow;
cmap->add(start, start + glyphset->ranges[i].cGlyphs - 1);
i++;
}
cmap->freeze();
// We don't lowercase |family| because all of them are under our control
// and they're already lowercased.
fontCmapCache->set(family, cmap);
return cmap->contains(character);
}
示例5: parseName
void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
return;
}
U_ASSERT(props.start==props.end);
const char *names[4]={ NULL, NULL, NULL, NULL };
int16_t lengths[4]={ 0, 0, 0, 0 };
/* get the character name */
if(props.name!=NULL) {
names[0]=props.name;
lengths[0]=(int16_t)uprv_strlen(props.name);
parseName(names[0], lengths[0]);
}
CharString buffer;
if(props.nameAlias!=NULL) {
/*
* Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
* TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
*/
const char *corr=uprv_strstr(props.nameAlias, "correction=");
if(corr!=NULL) {
corr+=11; // skip "correction="
const char *limit=uprv_strchr(corr, ',');
if(limit!=NULL) {
buffer.append(corr, limit-corr, errorCode);
names[3]=buffer.data();
lengths[3]=(int16_t)(limit-corr);
} else {
names[3]=corr;
lengths[3]=(int16_t)uprv_strlen(corr);
}
parseName(names[3], lengths[3]);
}
}
addLine(props.start, names, lengths, LENGTHOF(names));
}
示例6: span
static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
UChar32 c;
int32_t start=0, prev;
while((prev=start)<length) {
U16_NEXT(s, start, length, c);
if(tf!=set.contains(c)) {
break;
}
}
return prev;
}
示例7: fprintf
void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
scx.clear();
CharString scString;
for(;;) {
const char *scs;
const char *scLimit=strchr(s, ' ');
if(scLimit!=NULL) {
scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
if(U_FAILURE(errorCode)) { return; }
} else {
scs=s;
}
int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
if(script==UCHAR_INVALID_CODE) {
fprintf(stderr,
"error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else if(scx.contains(script)) {
fprintf(stderr,
"error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else {
scx.add(script);
}
if(scLimit!=NULL) {
s=scLimit+1;
} else {
break;
}
}
if(scx.isEmpty()) {
fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
errorCode=U_PARSE_ERROR;
}
}
示例8: main
//.........这里部分代码省略.........
};
status = U_ZERO_ERROR;
UChar *wordSourceU = new UChar[destCap+1];
ucnv_toUChars(conv,
wordSourceU, // dest,
destCap+1,
wordSourceC,
wordFileSize,
&status);
if (U_FAILURE(status)) {
fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
};
ucnv_close(conv);
// Get rid of the original file buffer
delete[] wordBufferC;
// Create a MutableTrieDictionary, and loop through all the lines, inserting
// words.
// First, pick a median character.
UChar *current = wordSourceU + (destCap/2);
UChar uc = *current++;
UnicodeSet breaks;
breaks.add(0x000A); // Line Feed
breaks.add(0x000D); // Carriage Return
breaks.add(0x2028); // Line Separator
breaks.add(0x2029); // Paragraph Separator
do {
// Look for line break
while (uc && !breaks.contains(uc)) {
uc = *current++;
}
// Now skip to first non-line-break
while (uc && breaks.contains(uc)) {
uc = *current++;
}
}
while (uc && (breaks.contains(uc) || u_isspace(uc)));
mtd = new MutableTrieDictionary(uc, status);
if (U_FAILURE(status)) {
fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
}
// Now add the words. Words are non-space characters at the beginning of
// lines, and must be at least one UChar. If a word has an associated value,
// the value should follow the word on the same line after a tab character.
current = wordSourceU;
UChar *candidate = current;
uc = *current++;
int32_t length = 0;
int count = 0;
while (uc) {
while (uc && !u_isspace(uc)) {
++length;
uc = *current++;
}
UnicodeString valueString;
示例9: build
//.........这里部分代码省略.........
if (U_FAILURE(*fStatus)) {
return;
}
}
// The current rlRange is now entirely within the UnicodeSet range.
// Add this unicode set to the list of sets for this rlRange
if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
rlRange->fIncludesSets->addElement(usetNode, *fStatus);
if (U_FAILURE(*fStatus)) {
return;
}
}
// Advance over ranges that we are finished with.
if (inputSetRangeEnd == rlRange->fEndChar) {
inputSetRangeIndex++;
}
rlRange = rlRange->fNext;
}
}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}
//
// Group the above ranges, with each group consisting of one or more
// ranges that are in exactly the same set of original UnicodeSets.
// The groups are numbered, and these group numbers are the set of
// input symbols recognized by the run-time state machine.
//
// Numbering: # 0 (state table column 0) is unused.
// # 1 is reserved - table column 1 is for end-of-input
// # 2 is reserved - table column 2 is for beginning-in-input
// # 3 is the first range list.
//
RangeDescriptor *rlSearchRange;
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
rlRange->fNum = rlSearchRange->fNum;
break;
}
}
if (rlRange->fNum == 0) {
fGroupCount ++;
rlRange->fNum = fGroupCount+2;
rlRange->setDictionaryFlag();
addValToSets(rlRange->fIncludesSets, fGroupCount+2);
}
}
// Handle input sets that contain the special string {eof}.
// Column 1 of the state table is reserved for EOF on input.
// Column 2 is reserved for before-the-start-input.
// (This column can be optimized away later if there are no rule
// references to {bof}.)
// Add this column value (1 or 2) to the equivalent expression
// subtree for each UnicodeSet that contains the string {eof}
// Because {bof} and {eof} are not a characters in the normal sense,
// they doesn't affect the computation of ranges or TRIE.
static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};
UnicodeString eofString(eofUString);
UnicodeString bofString(bofUString);
for (ni=0; ; ni++) { // Loop over each of the UnicodeSets encountered in the input rules
usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
if (usetNode==NULL) {
break;
}
UnicodeSet *inputSet = usetNode->fInputSet;
if (inputSet->contains(eofString)) {
addValToSet(usetNode, 1);
}
if (inputSet->contains(bofString)) {
addValToSet(usetNode, 2);
fSawBOF = TRUE;
}
}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}
//
// Build the Trie table for mapping UChar32 values to the corresponding
// range group number
//
fTrie = utrie_open(NULL, // Pre-existing trie to be filled in
NULL, // Data array (utrie will allocate one)
100000, // Max Data Length
0, // Initial value for all code points
0, // Lead surrogate unit value
TRUE); // Keep Latin 1 in separately
for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
}
}
示例10: if
void
CasePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }
UChar32 start=props.start;
UChar32 end=props.end;
/* default: map to self */
int32_t delta=0;
uint32_t type;
if(props.binProps[UCHAR_LOWERCASE]) {
type=UCASE_LOWER;
} else if(props.binProps[UCHAR_UPPERCASE]) {
type=UCASE_UPPER;
} else if(props.getIntProp(UCHAR_GENERAL_CATEGORY)==U_TITLECASE_LETTER) {
type=UCASE_TITLE;
} else {
type=UCASE_NONE;
}
uint32_t value=type;
UBool hasMapping=FALSE;
if(props.suc>=0) {
/* uppercase mapping as delta if the character is lowercase */
hasMapping=TRUE;
if(type==UCASE_LOWER) {
delta=props.suc-start;
} else {
value|=UCASE_EXCEPTION;
}
}
if(props.slc>=0) {
/* lowercase mapping as delta if the character is uppercase or titlecase */
hasMapping=TRUE;
if(type>=UCASE_UPPER) {
delta=props.slc-start;
} else {
value|=UCASE_EXCEPTION;
}
}
if(props.stc>=0) {
hasMapping=TRUE;
}
if(props.suc!=props.stc) {
value|=UCASE_EXCEPTION;
}
if(!props.lc.isEmpty() || !props.uc.isEmpty() || !props.tc.isEmpty() ||
newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS)
) {
hasMapping=TRUE;
value|=UCASE_EXCEPTION;
}
if( (props.scf>=0 && props.scf!=props.slc) ||
(!props.cf.isEmpty() && props.cf!=UnicodeString(props.scf)) ||
newValues.contains(PPUCD_TURKIC_CASE_FOLDING)
) {
hasMapping=TRUE;
value|=UCASE_EXCEPTION;
}
// Simple case folding falls back to simple lowercasing.
// If there is no case folding but there is a lowercase mapping,
// then add a case folding mapping to the code point.
// For example: Cherokee uppercase syllables since Unicode 8.
// (Full case folding falls back to simple case folding,
// not to full lowercasing, so we need not also handle it specially
// for such cases.)
UChar32 scf=props.scf;
if(scf<0 && props.slc>=0) {
scf=start;
hasMapping=TRUE;
value|=UCASE_EXCEPTION;
}
if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
value|=UCASE_EXCEPTION;
}
if(props.binProps[UCHAR_SOFT_DOTTED]) {
value|=UCASE_SOFT_DOTTED;
}
int32_t cc=props.getIntProp(UCHAR_CANONICAL_COMBINING_CLASS);
if(cc!=0) {
if(props.binProps[UCHAR_SOFT_DOTTED]) {
fprintf(stderr, "genprops error: a soft-dotted character has ccc!=0\n");
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(cc==230) {
value|=UCASE_ABOVE;
} else {
value|=UCASE_OTHER_ACCENT;
}
}
if(props.binProps[UCHAR_CASE_IGNORABLE]) {
value|=UCASE_IGNORABLE;
}
//.........这里部分代码省略.........
示例11: normalizedInput
U_CAPI int32_t U_EXPORT2
uspoof_check(const USpoofChecker *sc,
const UChar *text, int32_t length,
int32_t *position,
UErrorCode *status) {
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (This == NULL) {
return 0;
}
if (length < -1) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
if (length == -1) {
// It's not worth the bother to handle nul terminated strings everywhere.
// Just get the length and be done with it.
length = u_strlen(text);
}
int32_t result = 0;
int32_t failPos = 0x7fffffff; // TODO: do we have a #define for max int32?
// A count of the number of non-Common or inherited scripts.
// Needed for both the SINGLE_SCRIPT and the WHOLE/MIXED_SCIRPT_CONFUSABLE tests.
// Share the computation when possible. scriptCount == -1 means that we haven't
// done it yet.
int32_t scriptCount = -1;
if ((This->fChecks) & USPOOF_SINGLE_SCRIPT) {
scriptCount = This->scriptScan(text, length, failPos, *status);
// printf("scriptCount (clipped to 2) = %d\n", scriptCount);
if ( scriptCount >= 2) {
// Note: scriptCount == 2 covers all cases of the number of scripts >= 2
result |= USPOOF_SINGLE_SCRIPT;
}
}
if (This->fChecks & USPOOF_CHAR_LIMIT) {
int32_t i;
UChar32 c;
for (i=0; i<length ;) {
U16_NEXT(text, i, length, c);
if (!This->fAllowedCharsSet->contains(c)) {
result |= USPOOF_CHAR_LIMIT;
if (i < failPos) {
failPos = i;
}
break;
}
}
}
if (This->fChecks &
(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
// These are the checks that need to be done on NFD input
NFDBuffer normalizedInput(text, length, *status);
const UChar *nfdText = normalizedInput.getBuffer();
int32_t nfdLength = normalizedInput.getLength();
if (This->fChecks & USPOOF_INVISIBLE) {
// scan for more than one occurence of the same non-spacing mark
// in a sequence of non-spacing marks.
int32_t i;
UChar32 c;
UChar32 firstNonspacingMark = 0;
UBool haveMultipleMarks = FALSE;
UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
for (i=0; i<nfdLength ;) {
U16_NEXT(nfdText, i, nfdLength, c);
if (u_charType(c) != U_NON_SPACING_MARK) {
firstNonspacingMark = 0;
if (haveMultipleMarks) {
marksSeenSoFar.clear();
haveMultipleMarks = FALSE;
}
continue;
}
if (firstNonspacingMark == 0) {
firstNonspacingMark = c;
continue;
}
if (!haveMultipleMarks) {
marksSeenSoFar.add(firstNonspacingMark);
haveMultipleMarks = TRUE;
}
if (marksSeenSoFar.contains(c)) {
// report the error, and stop scanning.
// No need to find more than the first failure.
result |= USPOOF_INVISIBLE;
failPos = i;
// TODO: Bug 8655: failPos is the position in the NFD buffer, but what we want
// to give back to our caller is a position in the original input string.
if (failPos > length) {
failPos = length;
}
break;
}
//.........这里部分代码省略.........
示例12: buildIndex
void AlphabeticIndex::buildIndex(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (!indexBuildRequired_) {
return;
}
// Discard any already-built data.
// This is important when the user builds and uses an index, then subsequently modifies it,
// necessitating a rebuild.
bucketList_->removeAllElements();
labels_->removeAllElements();
uhash_removeAll(alreadyIn_);
noDistinctSorting_->clear();
notAlphabetic_->clear();
// first sort the incoming Labels, with a "best" ordering among items
// that are the same according to the collator
UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector.
preferenceSorting.setDeleter(uprv_deleteUObject);
appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
// We now make a set of Labels.
// Some of the input may, however, be redundant.
// That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
// So we make a pass through, filtering out those cases.
// TODO: filtering these out would seem to be at odds with the eventual goal
// of being able to split buckets that contain too many items.
UnicodeSet labelSet;
for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
// TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet,
// is it even possible for duplicates to show up in this check?
if (labelSet.contains(item)) {
UnicodeSetIterator itemAlreadyInIter(labelSet);
while (itemAlreadyInIter.next()) {
const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
if (targets == NULL) {
// alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
targets = new UnicodeSet();
uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
}
targets->add(item);
break;
}
}
} else if (item.moveIndex32(0, 1) < item.length() && // Label contains more than one code point.
collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
noDistinctSorting_->add(item);
} else if (!ALPHABETIC->containsSome(item)) {
notAlphabetic_->add(item);
} else {
labelSet.add(item);
}
}
// If we have no labels, hard-code a fallback default set of [A-Z]
// This case can occur with locales that don't have exemplar character data, including root.
// A no-labels situation will cause other problems; it needs to be avoided.
if (labelSet.isEmpty()) {
labelSet.add((UChar32)0x41, (UChar32)0x5A);
}
// Move the set of Labels from the set into a vector, and sort
// according to the collator.
appendUnicodeSetToUVector(*labels_, labelSet, status);
labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);
// if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
// Implemented by copying the elements to be retained to a new UVector.
const int32_t size = labelSet.size() - 1;
if (size > maxLabelCount_) {
UVector *newLabels = new UVector(status);
newLabels->setDeleter(uprv_deleteUObject);
int32_t count = 0;
int32_t old = -1;
for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
++count;
const int32_t bump = count * maxLabelCount_ / size;
if (bump == old) {
// it.remove();
} else {
newLabels->addElement(str->clone(), status);
old = bump;
}
}
delete labels_;
labels_ = newLabels;
}
//.........这里部分代码省略.........
示例13:
U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
const icu::UnicodeString &id,
int32_t *position,
UErrorCode *status) {
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (This == NULL) {
return 0;
}
int32_t result = 0;
IdentifierInfo *identifierInfo = NULL;
if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
identifierInfo = This->getIdentifierInfo(*status);
if (U_FAILURE(*status)) {
goto cleanupAndReturn;
}
identifierInfo->setIdentifier(id, *status);
identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
}
if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
if (idRestrictionLevel > This->fRestrictionLevel) {
result |= USPOOF_RESTRICTION_LEVEL;
}
if (This->fChecks & USPOOF_AUX_INFO) {
result |= idRestrictionLevel;
}
}
if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
const UnicodeSet *numerics = identifierInfo->getNumerics();
if (numerics->size() > 1) {
result |= USPOOF_MIXED_NUMBERS;
}
// TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
// We have no easy way to do the same in C.
// if (checkResult != null) {
// checkResult.numerics = numerics;
// }
}
if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
int32_t i;
UChar32 c;
int32_t length = id.length();
for (i=0; i<length ;) {
c = id.char32At(i);
i += U16_LENGTH(c);
if (!This->fAllowedCharsSet->contains(c)) {
result |= USPOOF_CHAR_LIMIT;
break;
}
}
}
if (This->fChecks &
(USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
// These are the checks that need to be done on NFD input
UnicodeString nfdText;
gNfdNormalizer->normalize(id, nfdText, *status);
int32_t nfdLength = nfdText.length();
if (This->fChecks & USPOOF_INVISIBLE) {
// scan for more than one occurence of the same non-spacing mark
// in a sequence of non-spacing marks.
int32_t i;
UChar32 c;
UChar32 firstNonspacingMark = 0;
UBool haveMultipleMarks = FALSE;
UnicodeSet marksSeenSoFar; // Set of combining marks in a single combining sequence.
for (i=0; i<nfdLength ;) {
c = nfdText.char32At(i);
i += U16_LENGTH(c);
if (u_charType(c) != U_NON_SPACING_MARK) {
firstNonspacingMark = 0;
if (haveMultipleMarks) {
marksSeenSoFar.clear();
haveMultipleMarks = FALSE;
}
continue;
}
if (firstNonspacingMark == 0) {
firstNonspacingMark = c;
continue;
}
if (!haveMultipleMarks) {
marksSeenSoFar.add(firstNonspacingMark);
haveMultipleMarks = TRUE;
}
if (marksSeenSoFar.contains(c)) {
// report the error, and stop scanning.
// No need to find more than the first failure.
result |= USPOOF_INVISIBLE;
//.........这里部分代码省略.........