本文整理汇总了C++中UnicodeSet类的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet类的具体用法?C++ UnicodeSet怎么用?C++ UnicodeSet使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UnicodeSet类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: uscript_getCode
void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) {
UScriptCode scripts[30];
int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
if (U_FAILURE(status)) {
return;
}
if (status == U_USING_DEFAULT_WARNING) {
status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
UnicodeSet tmpSet;
int32_t i;
for (i=0; i<numScripts; i++) {
tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
allowedChars->addAll(tmpSet);
}
}
示例2: parseName
void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
return;
}
U_ASSERT(props.start==props.end);
const char *names[4]={ NULL, NULL, NULL, NULL };
int16_t lengths[4]={ 0, 0, 0, 0 };
/* get the character name */
if(props.name!=NULL) {
names[0]=props.name;
lengths[0]=(int16_t)uprv_strlen(props.name);
parseName(names[0], lengths[0]);
}
CharString buffer;
if(props.nameAlias!=NULL) {
/*
* Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
* TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
*/
const char *corr=uprv_strstr(props.nameAlias, "correction=");
if(corr!=NULL) {
corr+=11; // skip "correction="
const char *limit=uprv_strchr(corr, ',');
if(limit!=NULL) {
buffer.append(corr, limit-corr, errorCode);
names[3]=buffer.data();
lengths[3]=(int16_t)(limit-corr);
} else {
names[3]=corr;
lengths[3]=(int16_t)uprv_strlen(corr);
}
parseName(names[3], lengths[3]);
}
}
addLine(props.start, names, lengths, LENGTHOF(names));
}
示例3: errln
void TransliteratorErrorTest::TestUnicodeSetErrors() {
UnicodeString badPattern="[[:L:]-[0x0300-0x0400]";
UnicodeSet set;
UErrorCode status = U_ZERO_ERROR;
UnicodeString result;
if (!set.isEmpty()) {
errln("FAIL: The default ctor of UnicodeSet created a non-empty object.");
}
set.applyPattern(badPattern, status);
if (U_SUCCESS(status)) {
errln("FAIL: Applied a bad pattern to the UnicodeSet object okay.");
}
status = U_ZERO_ERROR;
UnicodeSet *set1 = new UnicodeSet(badPattern, status);
if (U_SUCCESS(status)) {
errln("FAIL: Created a UnicodeSet based on bad patterns.");
}
delete set1;
}
示例4: uspoof_setAllowedUnicodeSet
U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (This == NULL) {
return;
}
if (chars->isBogus()) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return;
}
UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
if (clonedSet == NULL || clonedSet->isBogus()) {
*status = U_MEMORY_ALLOCATION_ERROR;
return;
}
clonedSet->freeze();
delete This->fAllowedCharsSet;
This->fAllowedCharsSet = clonedSet;
This->fChecks |= USPOOF_CHAR_LIMIT;
}
示例5: span
static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
UChar32 c;
int32_t start=0, prev;
while((prev=start)<length) {
U16_NEXT(s, start, length, c);
if(tf!=set.contains(c)) {
break;
}
}
return prev;
}
示例6: fprintf
void
BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
UErrorCode &errorCode) {
if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }
UChar32 start=props.start;
UChar32 end=props.end;
// The runtime code relies on this invariant for returning both bmg and bpb
// from the same data.
int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
fprintf(stderr,
"genprops error: invariant not true: "
"if(bpt==None) then bpb=<none> else bpb=bmg\n");
return;
}
int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
if(props.binProps[UCHAR_BIDI_MIRRORED]) {
value|=U_MASK(UBIDI_IS_MIRRORED_SHIFT);
}
if(props.binProps[UCHAR_BIDI_CONTROL]) {
value|=U_MASK(UBIDI_BIDI_CONTROL_SHIFT);
}
if(props.binProps[UCHAR_JOIN_CONTROL]) {
value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
}
value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
if(U_FAILURE(errorCode)) {
fprintf(stderr, "genprops error: BiDiPropsBuilder utrie2_setRange32() failed - %s\n",
u_errorName(errorCode));
return;
}
// Store Joining_Group values from vector column 1 in simple byte arrays.
int32_t jg=props.getIntProp(UCHAR_JOINING_GROUP);
for(UChar32 c=start; c<=end; ++c) {
int32_t jgStart;
if(MIN_JG_START<=c && c<MAX_JG_LIMIT) {
jgArray[c-MIN_JG_START]=(uint8_t)jg;
} else if(MIN_JG_START2<=c && c<MAX_JG_LIMIT2) {
jgArray2[c-MIN_JG_START2]=(uint8_t)jg;
} else if(jg!=U_JG_NO_JOINING_GROUP) {
fprintf(stderr, "genprops error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
(long)start, (long)end);
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
}
}
示例7: fprintf
void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
if(U_FAILURE(errorCode)) { return; }
scx.clear();
CharString scString;
for(;;) {
const char *scs;
const char *scLimit=strchr(s, ' ');
if(scLimit!=NULL) {
scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
if(U_FAILURE(errorCode)) { return; }
} else {
scs=s;
}
int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
if(script==UCHAR_INVALID_CODE) {
fprintf(stderr,
"error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else if(scx.contains(script)) {
fprintf(stderr,
"error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
scs, (long)lineNumber);
errorCode=U_PARSE_ERROR;
return;
} else {
scx.add(script);
}
if(scLimit!=NULL) {
s=scLimit+1;
} else {
break;
}
}
if(scx.isEmpty()) {
fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
errorCode=U_PARSE_ERROR;
}
}
示例8: getUnderflowLabel
void AlphabeticIndex::buildBucketList(UErrorCode &status) {
UnicodeString labelStr = getUnderflowLabel();
Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
bucketList_->addElement(b, status);
// Build up the list, adding underflow, additions, overflow
// insert infix labels as needed, using \uFFFF.
const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
bucketList_->addElement(b, status);
UnicodeSet lastSet;
UnicodeSet set;
AlphabeticIndex::getScriptSet(lastSet, *last, status);
lastSet.removeAll(*IGNORE_SCRIPTS);
for (int i = 1; i < labels_->size(); ++i) {
UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
getScriptSet(set, *current, status);
set.removeAll(*IGNORE_SCRIPTS);
if (lastSet.containsNone(set)) {
// check for adjacent
const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
labelStr = getInflowLabel();
b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
bucketList_->addElement(b, status);
i++;
lastSet = set;
}
}
b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
bucketList_->addElement(b, status);
last = current;
lastSet = set;
}
const UnicodeString &limitString = getOverflowComparisonString(*last, status);
b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
bucketList_->addElement(b, status);
// final overflow bucket
}
示例9: SpanBackUTF8
SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
// Verify that the frozen set is equal to the unfrozen one.
UnicodeSet set;
char utf8[4];
UChar32 c;
int32_t length;
for(c=0; c<=0x10ffff; ++c) {
if(c==0xd800) {
c=0xe000;
}
length=0;
U8_APPEND_UNSAFE(utf8, length, c);
if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
set.add(c);
}
}
if(set!=testcase.set) {
fprintf(stderr, "error: frozen set != original!\n");
}
}
示例10: addReplacementSetTo
void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
UChar32 ch;
for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
ch = output.char32At(i);
UnicodeReplacer* r = data->lookupReplacer(ch);
if (r == NULL) {
toUnionTo.add(ch);
} else {
r->addReplacementSetTo(toUnionTo);
}
}
}
示例11: addMatchSetTo
/**
* Implement UnicodeMatcher
*/
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
UChar32 ch;
for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
ch = pattern.char32At(i);
const UnicodeMatcher* matcher = data->lookupMatcher(ch);
if (matcher == NULL) {
toUnionTo.add(ch);
} else {
matcher->addMatchSetTo(toUnionTo);
}
}
}
示例12: dest
UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
LocalPointer<UVector> dest(new UVector(status), status);
if (U_FAILURE(status)) {
return NULL;
}
dest->setDeleter(uprv_deleteUObject);
// Fetch the script-first-primary contractions which are defined in the root collator.
// They all start with U+FDD1.
UnicodeSet set;
collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
if (U_FAILURE(status)) {
return NULL;
}
if (set.isEmpty()) {
status = U_UNSUPPORTED_ERROR;
return NULL;
}
UnicodeSetIterator iter(set);
while (iter.next()) {
const UnicodeString &boundary = iter.getString();
uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
// Ignore boundaries for the special reordering groups.
// Take only those for "real scripts" (where the sample character is a Letter,
// and the one for unassigned implicit weights (Cn).
continue;
}
UnicodeString *s = new UnicodeString(boundary);
if (s == NULL) {
status = U_MEMORY_ALLOCATION_ERROR;
return NULL;
}
dest->addElement(s, status);
}
return dest.orphan();
}
示例13: DictionaryBreakEngine
CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
// Korean dictionary only includes Hangul syllables
fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);
if (U_SUCCESS(status)) {
// handle Korean and Japanese/Chinese using different dictionaries
if (type == kKorean) {
setCharacters(fHangulWordSet);
} else { //Chinese and Japanese
UnicodeSet cjSet;
cjSet.addAll(fHanWordSet);
cjSet.addAll(fKatakanaWordSet);
cjSet.addAll(fHiraganaWordSet);
cjSet.add(0xFF70);
cjSet.add(0x30FC);
setCharacters(cjSet);
}
}
}
示例14: assertTrue
void StaticUnicodeSetsTest::testSetCoverage() {
UErrorCode status = U_ZERO_ERROR;
// Lenient comma/period should be supersets of strict comma/period;
// it also makes the coverage logic cheaper.
assertTrue(
"COMMA should be superset of STRICT_COMMA",
get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
assertTrue(
"PERIOD should be superset of STRICT_PERIOD",
get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));
UnicodeSet decimals;
decimals.addAll(*get(unisets::STRICT_COMMA));
decimals.addAll(*get(unisets::STRICT_PERIOD));
decimals.freeze();
UnicodeSet grouping;
grouping.addAll(decimals);
grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
decimals.freeze();
const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);
int32_t localeCount;
const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
for (int32_t i = 0; i < localeCount; i++) {
Locale locale = allAvailableLocales[i];
DecimalFormatSymbols dfs(locale, status);
UnicodeString localeName;
locale.getDisplayName(localeName);
assertSuccess(UnicodeString("Making DFS for ") + localeName, status);
#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
}
}
示例15: clear
/**
* Parse the pattern from the given RuleCharacterIterator. The
* iterator is advanced over the parsed pattern.
* @param chars iterator over the pattern characters. Upon return
* it will be advanced to the first character after the parsed
* pattern, or the end of the iteration if all characters are
* parsed.
* @param symbols symbol table to use to parse and dereference
* variables, or null if none.
* @param rebuiltPat the pattern that was parsed, rebuilt or
* copied from the input pattern, as appropriate.
* @param options a bit mask of zero or more of the following:
* IGNORE_SPACE, CASE.
*/
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
const SymbolTable* symbols,
UnicodeString& rebuiltPat,
uint32_t options,
UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
UErrorCode& ec) {
if (U_FAILURE(ec)) return;
// Syntax characters: [ ] ^ - & { }
// Recognized special forms for chars, sets: c-c s-s s&s
int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
RuleCharacterIterator::PARSE_ESCAPES;
if ((options & USET_IGNORE_SPACE) != 0) {
opts |= RuleCharacterIterator::SKIP_WHITESPACE;
}
UnicodeString patLocal, buf;
UBool usePat = FALSE;
UnicodeSetPointer scratch;
RuleCharacterIterator::Pos backup;
// mode: 0=before [, 1=between [...], 2=after ]
// lastItem: 0=none, 1=char, 2=set
int8_t lastItem = 0, mode = 0;
UChar32 lastChar = 0;
UChar op = 0;
UBool invert = FALSE;
clear();
while (mode != 2 && !chars.atEnd()) {
U_ASSERT((lastItem == 0 && op == 0) ||
(lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
(lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
op == INTERSECTION /*'&'*/)));
UChar32 c = 0;
UBool literal = FALSE;
UnicodeSet* nested = 0; // alias - do not delete
// -------- Check for property pattern
// setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
int8_t setMode = 0;
if (resemblesPropertyPattern(chars, opts)) {
setMode = 2;
}
// -------- Parse '[' of opening delimiter OR nested set.
// If there is a nested set, use `setMode' to define how
// the set should be parsed. If the '[' is part of the
// opening delimiter for this pattern, parse special
// strings "[", "[^", "[-", and "[^-". Check for stand-in
// characters representing a nested set in the symbol
// table.
else {
// Prepare to backup if necessary
chars.getPos(backup);
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5B /*'['*/ && !literal) {
if (mode == 1) {
chars.setPos(backup); // backup
setMode = 1;
} else {
// Handle opening '[' delimiter
mode = 1;
patLocal.append((UChar) 0x5B /*'['*/);
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
if (c == 0x5E /*'^'*/ && !literal) {
invert = TRUE;
patLocal.append((UChar) 0x5E /*'^'*/);
chars.getPos(backup); // prepare to backup
c = chars.next(opts, literal, ec);
if (U_FAILURE(ec)) return;
}
// Fall through to handle special leading '-';
// otherwise restart loop for nested [], \p{}, etc.
if (c == HYPHEN /*'-'*/) {
//.........这里部分代码省略.........