本文整理汇总了C++中UnicodeSet::addAll方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet::addAll方法的具体用法?C++ UnicodeSet::addAll怎么用?C++ UnicodeSet::addAll使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeSet
的用法示例。
在下文中一共展示了UnicodeSet::addAll方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: iter
/*
* Find missing case mapping relationships and add mappings for case closure.
* This function starts from an "original" code point and recursively
* finds its case mappings and the case mappings of where it maps to.
*
* The recursion depth is capped at 3 nested calls of this function.
* In each call, the current code point is c, and the function enumerates
* all of c's simple (single-code point) case mappings.
* prev is the code point that case-mapped to c.
* prev2 is the code point that case-mapped to prev.
*
* The initial function call has prev2<0, prev<0, and c==orig
* (marking no code points).
* It enumerates c's case mappings and recurses without further action.
*
* The second-level function call has prev2<0, prev==orig, and c is
* the destination code point of one of prev's case mappings.
* The function checks if any of c's case mappings go back to orig
* and adds a closure mapping if not.
* In other words, it turns a case mapping relationship of
* orig->c
* into
* orig<->c
*
* The third-level function call has prev2==orig, prev>=0, and c is
* the destination code point of one of prev's case mappings.
* (And prev is the destination of one of prev2's case mappings.)
* The function checks if any of c's case mappings go back to orig
* and adds a closure mapping if not.
* In other words, it turns case mapping relationships of
* orig->prev->c or orig->prev<->c
* into
* orig->prev->c->orig or orig->prev<->c->orig
* etc.
* (Graphically, this closes a triangle.)
*
* With repeated application on all code points until no more closure mappings
* are added, all case equivalence groups get complete mappings.
* That is, in each group of code points with case relationships
* each code point will in the end have some mapping to each other
* code point in the group.
*
* @return TRUE if a closure mapping was added
*/
UBool
CasePropsBuilder::addClosure(UChar32 orig, UChar32 prev2, UChar32 prev, UChar32 c, uint32_t value,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return FALSE; }
UChar32 next;
UBool someMappingsAdded=FALSE;
if(c!=orig) {
/* get the properties for c */
value=utrie2_get32(pTrie, c);
}
/* else if c==orig then c's value was passed in */
if(value&UCASE_EXCEPTION) {
UnicodeSet set;
ExcProps &ep=*excProps[value>>UGENCASE_EXC_SHIFT];
UniProps &p=ep.props;
/*
* marker for whether any of c's mappings goes to orig
* c==orig: prevent adding a closure mapping when getting orig's own, direct mappings
*/
UBool mapsToOrig=(UBool)(c==orig);
/* collect c's case mapping destinations in set[] */
if((next=p.suc)>=0 && next!=c) {
set.add(next);
}
if((next=p.slc)>=0 && next!=c) {
set.add(next);
}
if(p.suc!=(next=p.stc) && next!=c) {
set.add(next);
}
if((next=p.scf)>=0 && next!=c) {
set.add(next);
}
/* add c's current closure mappings to set */
set.addAll(ep.closure);
/* process all code points to which c case-maps */
UnicodeSetIterator iter(set);
while(iter.next()) {
next=iter.getCodepoint(); /* next!=c */
if(next==orig) {
mapsToOrig=TRUE; /* remember that we map to orig */
} else if(prev2<0 && next!=prev) {
/*
* recurse unless
* we have reached maximum depth (prev2>=0) or
* this is a mapping to one of the previous code points (orig, prev, c)
*/
//.........这里部分代码省略.........
示例2: setAllowedLocales
void SpoofImpl::setAllowedLocales(const char *localesList, UErrorCode &status) {
UnicodeSet allowedChars;
UnicodeSet *tmpSet = NULL;
const char *locStart = localesList;
const char *locEnd = NULL;
const char *localesListEnd = localesList + uprv_strlen(localesList);
int32_t localeListCount = 0; // Number of locales provided by caller.
// Loop runs once per locale from the localesList, a comma separated list of locales.
do {
locEnd = uprv_strchr(locStart, ',');
if (locEnd == NULL) {
locEnd = localesListEnd;
}
while (*locStart == ' ') {
locStart++;
}
const char *trimmedEnd = locEnd-1;
while (trimmedEnd > locStart && *trimmedEnd == ' ') {
trimmedEnd--;
}
if (trimmedEnd <= locStart) {
break;
}
const char *locale = uprv_strndup(locStart, (int32_t)(trimmedEnd + 1 - locStart));
localeListCount++;
// We have one locale from the locales list.
// Add the script chars for this locale to the accumulating set of allowed chars.
// If the locale is no good, we will be notified back via status.
addScriptChars(locale, &allowedChars, status);
uprv_free((void *)locale);
if (U_FAILURE(status)) {
break;
}
locStart = locEnd + 1;
} while (locStart < localesListEnd);
// If our caller provided an empty list of locales, we disable the allowed characters checking
if (localeListCount == 0) {
uprv_free((void *)fAllowedLocales);
fAllowedLocales = uprv_strdup("");
tmpSet = new UnicodeSet(0, 0x10ffff);
if (fAllowedLocales == NULL || tmpSet == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
tmpSet->freeze();
delete fAllowedCharsSet;
fAllowedCharsSet = tmpSet;
fChecks &= ~USPOOF_CHAR_LIMIT;
return;
}
// Add all common and inherited characters to the set of allowed chars.
UnicodeSet tempSet;
tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
allowedChars.addAll(tempSet);
tempSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
allowedChars.addAll(tempSet);
// If anything went wrong, we bail out without changing
// the state of the spoof checker.
if (U_FAILURE(status)) {
return;
}
// Store the updated spoof checker state.
tmpSet = static_cast<UnicodeSet *>(allowedChars.clone());
const char *tmpLocalesList = uprv_strdup(localesList);
if (tmpSet == NULL || tmpLocalesList == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return;
}
uprv_free((void *)fAllowedLocales);
fAllowedLocales = tmpLocalesList;
tmpSet->freeze();
delete fAllowedCharsSet;
fAllowedCharsSet = tmpSet;
fChecks |= USPOOF_CHAR_LIMIT;
}
示例3: if
//.........这里部分代码省略.........
// not to full lowercasing, so we need not also handle it specially
// for such cases.)
UChar32 scf=props.scf;
if(scf<0 && props.slc>=0) {
scf=start;
hasMapping=TRUE;
value|=UCASE_EXCEPTION;
}
if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
value|=UCASE_EXCEPTION;
}
if(props.binProps[UCHAR_SOFT_DOTTED]) {
value|=UCASE_SOFT_DOTTED;
}
int32_t cc=props.getIntProp(UCHAR_CANONICAL_COMBINING_CLASS);
if(cc!=0) {
if(props.binProps[UCHAR_SOFT_DOTTED]) {
fprintf(stderr, "genprops error: a soft-dotted character has ccc!=0\n");
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
if(cc==230) {
value|=UCASE_ABOVE;
} else {
value|=UCASE_OTHER_ACCENT;
}
}
if(props.binProps[UCHAR_CASE_IGNORABLE]) {
value|=UCASE_IGNORABLE;
}
if((hasMapping || (value&UCASE_EXCEPTION)) && start!=end) {
fprintf(stderr,
"genprops error: range %04lX..%04lX has case mappings "
"or reasons for data structure exceptions\n",
(long)start, (long)end);
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
/* handle exceptions */
if(value&UCASE_EXCEPTION) {
/* simply store exceptions for later processing and encoding */
if(excPropsCount==MAX_EXC_COUNT) {
fprintf(stderr, "genprops error: casepropsbuilder: too many exceptions\n");
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return;
}
ExcProps *newExcProps=new ExcProps(props);
if(newExcProps==NULL) {
fprintf(stderr,
"genprops error: casepropsbuilder out of memory allocating "
"exceptions properties\n");
errorCode=U_MEMORY_ALLOCATION_ERROR;
return;
}
newExcProps->props.scf=scf;
newExcProps->hasConditionalCaseMappings=newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS);
newExcProps->hasTurkicCaseFolding=newValues.contains(PPUCD_TURKIC_CASE_FOLDING);
value|=(uint32_t)excPropsCount<<UGENCASE_EXC_SHIFT;
excProps[excPropsCount++]=newExcProps;
} else {
/* store the simple case mapping delta */
value|=((uint32_t)delta<<UCASE_DELTA_SHIFT)&UCASE_DELTA_MASK;
}
utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: unable to set case mapping values: %s\n",
u_errorName(errorCode));
return;
}
if(hasMapping) {
/* update the case-sensitive set */
caseSensitive.add(start);
if(scf>=0) { caseSensitive.add(scf); }
if(props.slc>=0) { caseSensitive.add(props.slc); }
if(props.suc>=0) { caseSensitive.add(props.suc); }
if(props.stc>=0) { caseSensitive.add(props.stc); }
caseSensitive.addAll(props.cf);
caseSensitive.addAll(props.lc);
caseSensitive.addAll(props.uc);
caseSensitive.addAll(props.tc);
/* update maxFullLength */
if(props.cf.length()>maxFullLength) { maxFullLength=props.cf.length(); }
if(props.lc.length()>maxFullLength) { maxFullLength=props.lc.length(); }
if(props.uc.length()>maxFullLength) { maxFullLength=props.uc.length(); }
if(props.tc.length()>maxFullLength) { maxFullLength=props.tc.length(); }
}
/* add the multi-character case folding to the "unfold" data */
if(props.cf.hasMoreChar32Than(0, 0x7fffffff, 1)) {
addUnfolding(start, props.cf, errorCode);
}
}
示例4: siter
//
// First characters in scripts.
// Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
// The vector is sorted according to this index's collation.
//
// This code is too slow to use, so for now hard code the data.
// Hard coded implementation is follows.
//
UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
UnicodeString results[USCRIPT_CODE_LIMIT];
UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");
UnicodeSetIterator siter(*TO_TRY);
while (siter.next()) {
const UnicodeString ¤t = siter.getString();
Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
if (r < 0) { // TODO fix; we only want "real" script characters, not
// symbols.
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
}
else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
UnicodeSet extras;
UnicodeSet expansions;
RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
const UCollator *uRuleBasedCollator = rbc->getUCollator();
ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
extras.addAll(expansions).removeAll(*TO_TRY);
if (extras.size() != 0) {
const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
UnicodeSetIterator extrasIter(extras);
while (extrasIter.next()) {
const UnicodeString ¤t = extrasIter.next();
if (!TO_TRY->containsAll(current))
continue;
if (!normalizer->isNormalized(current, status) ||
ruleBasedCollator->compare(current, LOWER_A) < 0) {
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
} else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
}
UVector *dest = new UVector(status);
dest->setDeleter(uprv_deleteUObject);
for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
if (results[i].length() > 0) {
dest->addElement(results[i].clone(), status);
}
}
dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
return dest;
}
示例5: getIndexExemplars
void AlphabeticIndex::getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
dest.addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// Locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
UnicodeSet lowersToRemove;
UnicodeSet uppersToAdd;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
if (exemplarC != upperC) {
lowersToRemove.add(exemplarC);
uppersToAdd.add(upperC);
}
}
exemplars.removeAll(lowersToRemove);
exemplars.addAll(uppersToAdd);
// get the exemplars, and handle special cases
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(*CORE_LATIN)) {
exemplars.addAll(*CORE_LATIN);
}
if (exemplars.containsSome(*HANGUL)) {
// cut down to small list
UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status);
exemplars.removeAll(BLOCK_HANGUL_SYLLABLES);
exemplars.addAll(*HANGUL);
}
if (exemplars.containsSome(*ETHIOPIC)) {
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSetIterator it(*ETHIOPIC);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
dest.addAll(exemplars);
}
示例6: addReplacementSetTo
/**
* Implement UnicodeReplacer
*/
void FunctionReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
UnicodeSet set;
toUnionTo.addAll(translit->getTargetSet(set));
}
示例7: codesAndRanges
int32_t
CollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
const CollationData &data, const CollationSettings &settings,
const void *rootElements, int32_t rootElementsLength,
int32_t indexes[], uint8_t *dest, int32_t capacity,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return 0; }
if(capacity < 0 || (capacity > 0 && dest == NULL)) {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
// Figure out which data items to write before settling on
// the indexes length and writing offsets.
// For any data item, we need to write the start and limit offsets,
// so the indexes length must be at least index-of-start-offset + 2.
int32_t indexesLength;
UBool hasMappings;
UnicodeSet unsafeBackwardSet;
const CollationData *baseData = data.base;
int32_t fastLatinVersion;
if(data.fastLatinTable != NULL) {
fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
} else {
fastLatinVersion = 0;
}
int32_t fastLatinTableLength = 0;
if(isBase) {
// For the root collator, we write an even number of indexes
// so that we start with an 8-aligned offset.
indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
U_ASSERT(settings.reorderCodesLength == 0);
hasMappings = TRUE;
unsafeBackwardSet = *data.unsafeBackwardSet;
fastLatinTableLength = data.fastLatinTableLength;
} else if(baseData == NULL) {
hasMappings = FALSE;
if(settings.reorderCodesLength == 0) {
// only options
indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit offset here
} else {
// only options, reorder codes, and the reorder table
indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
}
} else {
hasMappings = TRUE;
// Tailored mappings, and what else?
// Check in ascending order of optional tailoring data items.
indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
if(data.contextsLength != 0) {
indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
}
unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
if(!unsafeBackwardSet.isEmpty()) {
indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
}
if(data.fastLatinTable != baseData->fastLatinTable) {
fastLatinTableLength = data.fastLatinTableLength;
indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
}
}
UVector32 codesAndRanges(errorCode);
const int32_t *reorderCodes = settings.reorderCodes;
int32_t reorderCodesLength = settings.reorderCodesLength;
if(settings.hasReordering() &&
CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
// Rebuild the full list of reorder ranges.
// The list in the settings is truncated for efficiency.
data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
// Write the codes, then the ranges.
for(int32_t i = 0; i < reorderCodesLength; ++i) {
codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
}
if(U_FAILURE(errorCode)) { return 0; }
reorderCodes = codesAndRanges.getBuffer();
reorderCodesLength = codesAndRanges.size();
}
int32_t headerSize;
if(isBase) {
headerSize = 0; // udata_create() writes the header
} else {
DataHeader header;
header.dataHeader.magic1 = 0xda;
header.dataHeader.magic2 = 0x27;
uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
headerSize = (int32_t)sizeof(header);
U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes
if(hasMappings && data.cesLength != 0) {
// Sum of the sizes of the data items which are
// not automatically multiples of 8 bytes and which are placed before the CEs.
int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
if((sum & 7) != 0) {
// We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
// We add to the header size here.
// Alternatively, we could increment the indexesLength
//.........这里部分代码省略.........
示例8: buildWSConfusableData
//.........这里部分代码省略.........
innerSet->index = outeri;
innerSet->rindex = outerSet->rindex;
duplicateCount++;
}
// But this doesn't get all. We need to fix the TRIE.
}
}
// printf("Number of distinct script sets: %d\n", rtScriptSetsCount);
}
// Update the Trie values to be reflect the run time script indexes (after duplicate merging).
// (Trie Values 0 and 1 are reserved, and the corresponding slots in scriptSets
// are unused, which is why the loop index starts at 2.)
{
for (int32_t i=2; i<scriptSets->size(); i++) {
BuilderScriptSet *bSet = static_cast<BuilderScriptSet *>(scriptSets->elementAt(i));
if (bSet->rindex != (uint32_t)i) {
utrie2_set32(bSet->trie, bSet->codePoint, bSet->rindex, &status);
}
}
}
// For code points with script==Common or script==Inherited,
// Set the reserved value of 1 into both Tries. These characters do not participate
// in Whole Script Confusable detection; this reserved value is the means
// by which they are detected.
{
UnicodeSet ignoreSet;
ignoreSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_COMMON, status);
UnicodeSet inheritedSet;
inheritedSet.applyIntPropertyValue(UCHAR_SCRIPT, USCRIPT_INHERITED, status);
ignoreSet.addAll(inheritedSet);
for (int32_t rn=0; rn<ignoreSet.getRangeCount(); rn++) {
UChar32 rangeStart = ignoreSet.getRangeStart(rn);
UChar32 rangeEnd = ignoreSet.getRangeEnd(rn);
utrie2_setRange32(anyCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status);
utrie2_setRange32(lowerCaseTrie, rangeStart, rangeEnd, 1, TRUE, &status);
}
}
// Serialize the data to the Spoof Detector
{
utrie2_freeze(anyCaseTrie, UTRIE2_16_VALUE_BITS, &status);
int32_t size = utrie2_serialize(anyCaseTrie, NULL, 0, &status);
// printf("Any case Trie size: %d\n", size);
if (status != U_BUFFER_OVERFLOW_ERROR) {
goto cleanup;
}
status = U_ZERO_ERROR;
spImpl->fSpoofData->fRawData->fAnyCaseTrie = spImpl->fSpoofData->fMemLimit;
spImpl->fSpoofData->fRawData->fAnyCaseTrieLength = size;
spImpl->fSpoofData->fAnyCaseTrie = anyCaseTrie;
void *where = spImpl->fSpoofData->reserveSpace(size, status);
utrie2_serialize(anyCaseTrie, where, size, &status);
utrie2_freeze(lowerCaseTrie, UTRIE2_16_VALUE_BITS, &status);
size = utrie2_serialize(lowerCaseTrie, NULL, 0, &status);
// printf("Lower case Trie size: %d\n", size);
if (status != U_BUFFER_OVERFLOW_ERROR) {
goto cleanup;
}
status = U_ZERO_ERROR;
spImpl->fSpoofData->fRawData->fLowerCaseTrie = spImpl->fSpoofData->fMemLimit;
spImpl->fSpoofData->fRawData->fLowerCaseTrieLength = size;