C++ UnicodeSet类代码示例

本文整理汇总了C++中UnicodeSet类的典型用法代码示例。如果您正苦于以下问题：C++ UnicodeSet类的具体用法？C++ UnicodeSet怎么用？C++ UnicodeSet使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

在下文中一共展示了UnicodeSet类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: uscript_getCode

void SpoofImpl::addScriptChars(const char *locale, UnicodeSet *allowedChars, UErrorCode &status) {
    UScriptCode scripts[30];

    int32_t numScripts = uscript_getCode(locale, scripts, sizeof(scripts)/sizeof(UScriptCode), &status);
    if (U_FAILURE(status)) {
        return;
    }
    if (status == U_USING_DEFAULT_WARNING) {
        status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    UnicodeSet tmpSet;
    int32_t    i;
    for (i=0; i<numScripts; i++) {
        tmpSet.applyIntPropertyValue(UCHAR_SCRIPT, scripts[i], status);
        allowedChars->addAll(tmpSet);
    }
}

开发者ID:0x4d52，项目名称:JavaScriptCore-X，代码行数:18，代码来源:uspoof_impl.cpp

示例2: parseName

void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                            UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
        return;
    }

    U_ASSERT(props.start==props.end);

    const char *names[4]={ NULL, NULL, NULL, NULL };
    int16_t lengths[4]={ 0, 0, 0, 0 };

    /* get the character name */
    if(props.name!=NULL) {
        names[0]=props.name;
        lengths[0]=(int16_t)uprv_strlen(props.name);
        parseName(names[0], lengths[0]);
    }

    CharString buffer;
    if(props.nameAlias!=NULL) {
        /*
         * Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
         * TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
         */
        const char *corr=uprv_strstr(props.nameAlias, "correction=");
        if(corr!=NULL) {
            corr+=11;  // skip "correction="
            const char *limit=uprv_strchr(corr, ',');
            if(limit!=NULL) {
                buffer.append(corr, limit-corr, errorCode);
                names[3]=buffer.data();
                lengths[3]=(int16_t)(limit-corr);
            } else {
                names[3]=corr;
                lengths[3]=(int16_t)uprv_strlen(corr);
            }
            parseName(names[3], lengths[3]);
        }
    }

    addLine(props.start, names, lengths, LENGTHOF(names));
}

开发者ID:icu-project，项目名称:icu-tools，代码行数:44，代码来源:namespropsbuilder.cpp

示例3: errln

void TransliteratorErrorTest::TestUnicodeSetErrors() {
    UnicodeString badPattern="[[:L:]-[0x0300-0x0400]";
    UnicodeSet set;
    UErrorCode status = U_ZERO_ERROR;
    UnicodeString result;

    if (!set.isEmpty()) {
        errln("FAIL: The default ctor of UnicodeSet created a non-empty object.");
    }
    set.applyPattern(badPattern, status);
    if (U_SUCCESS(status)) {
        errln("FAIL: Applied a bad pattern to the UnicodeSet object okay.");
    }
    status = U_ZERO_ERROR;
    UnicodeSet *set1 = new UnicodeSet(badPattern, status);
    if (U_SUCCESS(status)) {
        errln("FAIL: Created a UnicodeSet based on bad patterns.");
    }
    delete set1;
}

开发者ID:winlibs，项目名称:icu4c，代码行数:20，代码来源:trnserr.cpp

示例4: uspoof_setAllowedUnicodeSet

U_CAPI void U_EXPORT2
uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const UnicodeSet *chars, UErrorCode *status) {
    SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return;
    }
    if (chars->isBogus()) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return;
    }
    UnicodeSet *clonedSet = static_cast<UnicodeSet *>(chars->clone());
    if (clonedSet == NULL || clonedSet->isBogus()) {
        *status = U_MEMORY_ALLOCATION_ERROR;
        return;
    }
    clonedSet->freeze();
    delete This->fAllowedCharsSet;
    This->fAllowedCharsSet = clonedSet;
    This->fChecks |= USPOOF_CHAR_LIMIT;
}

开发者ID:BrunoReX，项目名称:palemoon，代码行数:20，代码来源:uspoof.cpp

示例5: span

 static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
     UChar32 c;
     int32_t start=0, prev;
     while((prev=start)<length) {
         U16_NEXT(s, start, length, c);
         if(tf!=set.contains(c)) {
             break;
         }
     }
     return prev;
 }

开发者ID:LittoCats，项目名称:OT_4010D，代码行数:11，代码来源:unisetperf.cpp

示例6: fprintf

void
BiDiPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }

    UChar32 start=props.start;
    UChar32 end=props.end;

    // The runtime code relies on this invariant for returning both bmg and bpb
    // from the same data.
    int32_t bpt=props.getIntProp(UCHAR_BIDI_PAIRED_BRACKET_TYPE);
    if(!(bpt==0 ? props.bpb==U_SENTINEL : props.bpb==props.bmg)) {
        fprintf(stderr,
                "genprops error: invariant not true: "
                "if(bpt==None) then bpb=<none> else bpb=bmg\n");
        return;
    }
    int32_t delta=encodeBidiMirroringGlyph(start, end, props.bmg, errorCode);
    uint32_t value=(uint32_t)delta<<UBIDI_MIRROR_DELTA_SHIFT;
    if(props.binProps[UCHAR_BIDI_MIRRORED]) {
        value|=U_MASK(UBIDI_IS_MIRRORED_SHIFT);
    }
    if(props.binProps[UCHAR_BIDI_CONTROL]) {
        value|=U_MASK(UBIDI_BIDI_CONTROL_SHIFT);
    }
    if(props.binProps[UCHAR_JOIN_CONTROL]) {
        value|=U_MASK(UBIDI_JOIN_CONTROL_SHIFT);
    }
    value|=(uint32_t)bpt<<UBIDI_BPT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_JOINING_TYPE)<<UBIDI_JT_SHIFT;
    value|=(uint32_t)props.getIntProp(UCHAR_BIDI_CLASS);
    utrie2_setRange32(pTrie, start, end, value, TRUE, &errorCode);
    if(U_FAILURE(errorCode)) {
        fprintf(stderr, "genprops error: BiDiPropsBuilder utrie2_setRange32() failed - %s\n",
                u_errorName(errorCode));
        return;
    }

    // Store Joining_Group values from vector column 1 in simple byte arrays.
    int32_t jg=props.getIntProp(UCHAR_JOINING_GROUP);
    for(UChar32 c=start; c<=end; ++c) {
        int32_t jgStart;
        if(MIN_JG_START<=c && c<MAX_JG_LIMIT) {
            jgArray[c-MIN_JG_START]=(uint8_t)jg;
        } else if(MIN_JG_START2<=c && c<MAX_JG_LIMIT2) {
            jgArray2[c-MIN_JG_START2]=(uint8_t)jg;
        } else if(jg!=U_JG_NO_JOINING_GROUP) {
            fprintf(stderr, "genprops error: Joining_Group for out-of-range code points U+%04lx..U+%04lx\n",
                    (long)start, (long)end);
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
    }
}

开发者ID:icu-project，项目名称:icu-tools，代码行数:54，代码来源:bidipropsbuilder.cpp

示例7: fprintf

void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    scx.clear();
    CharString scString;
    for(;;) {
        const char *scs;
        const char *scLimit=strchr(s, ' ');
        if(scLimit!=NULL) {
            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
            if(U_FAILURE(errorCode)) { return; }
        } else {
            scs=s;
        }
        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
        if(script==UCHAR_INVALID_CODE) {
            fprintf(stderr,
                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else if(scx.contains(script)) {
            fprintf(stderr,
                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else {
            scx.add(script);
        }
        if(scLimit!=NULL) {
            s=scLimit+1;
        } else {
            break;
        }
    }
    if(scx.isEmpty()) {
        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
        errorCode=U_PARSE_ERROR;
    }
}

开发者ID:icu-project，项目名称:icu4c，代码行数:41，代码来源:ppucd.cpp

示例8: getUnderflowLabel

void AlphabeticIndex::buildBucketList(UErrorCode &status) {
    UnicodeString labelStr = getUnderflowLabel();
    Bucket *b = new Bucket(labelStr, *EMPTY_STRING, U_ALPHAINDEX_UNDERFLOW, status);
    bucketList_->addElement(b, status);

    // Build up the list, adding underflow, additions, overflow
    // insert infix labels as needed, using \uFFFF.
    const UnicodeString *last = static_cast<UnicodeString *>(labels_->elementAt(0));
    b = new Bucket(*last, *last, U_ALPHAINDEX_NORMAL, status);
    bucketList_->addElement(b, status);

    UnicodeSet lastSet;
    UnicodeSet set;
    AlphabeticIndex::getScriptSet(lastSet, *last, status);
    lastSet.removeAll(*IGNORE_SCRIPTS);

    for (int i = 1; i < labels_->size(); ++i) {
        UnicodeString *current = static_cast<UnicodeString *>(labels_->elementAt(i));
        getScriptSet(set, *current, status);
        set.removeAll(*IGNORE_SCRIPTS);
        if (lastSet.containsNone(set)) {
            // check for adjacent
            const UnicodeString &overflowComparisonString = getOverflowComparisonString(*last, status);
            if (collatorPrimaryOnly_->compare(overflowComparisonString, *current) < 0) {
                labelStr = getInflowLabel();
                b = new Bucket(labelStr, overflowComparisonString, U_ALPHAINDEX_INFLOW, status);
                bucketList_->addElement(b, status);
                i++;
                lastSet = set;
            }
        }
        b = new Bucket(*current, *current, U_ALPHAINDEX_NORMAL, status);
        bucketList_->addElement(b, status);
        last = current;
        lastSet = set;
    }
    const UnicodeString &limitString = getOverflowComparisonString(*last, status);
    b = new Bucket(getOverflowLabel(), limitString, U_ALPHAINDEX_OVERFLOW, status);
    bucketList_->addElement(b, status);
    // final overflow bucket
}

开发者ID:0omega，项目名称:platform_external_icu4c，代码行数:41，代码来源:alphaindex.cpp

示例9: SpanBackUTF8

    SpanBackUTF8(const UnicodeSetPerformanceTest &testcase) : Command(testcase) {
        // Verify that the frozen set is equal to the unfrozen one.
        UnicodeSet set;
        char utf8[4];
        UChar32 c;
        int32_t length;

        for(c=0; c<=0x10ffff; ++c) {
            if(c==0xd800) {
                c=0xe000;
            }
            length=0;
            U8_APPEND_UNSAFE(utf8, length, c);
            if(testcase.set.spanBackUTF8(utf8, length, USET_SPAN_CONTAINED)==0) {
                set.add(c);
            }
        }
        if(set!=testcase.set) {
            fprintf(stderr, "error: frozen set != original!\n");
        }
    }

开发者ID:LittoCats，项目名称:OT_4010D，代码行数:21，代码来源:unisetperf.cpp

示例10: addReplacementSetTo

void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
    UChar32 ch;
    for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
    ch = output.char32At(i);
    UnicodeReplacer* r = data->lookupReplacer(ch);
    if (r == NULL) {
        toUnionTo.add(ch);
    } else {
        r->addReplacementSetTo(toUnionTo);
    }
    }
}

开发者ID:LittoCats，项目名称:OT_4010D，代码行数:12，代码来源:strrepl.cpp

示例11: addMatchSetTo

/**
 * Implement UnicodeMatcher
 */
void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {
    UChar32 ch;
    for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {
        ch = pattern.char32At(i);
        const UnicodeMatcher* matcher = data->lookupMatcher(ch);
        if (matcher == NULL) {
            toUnionTo.add(ch);
        } else {
            matcher->addMatchSetTo(toUnionTo);
        }
    }
}

开发者ID:0x4d52，项目名称:JavaScriptCore-X，代码行数:15，代码来源:strmatch.cpp

示例12: dest

UVector *AlphabeticIndex::firstStringsInScript(UErrorCode &status) {
    if (U_FAILURE(status)) {
        return NULL;
    }
    LocalPointer<UVector> dest(new UVector(status), status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    dest->setDeleter(uprv_deleteUObject);
    // Fetch the script-first-primary contractions which are defined in the root collator.
    // They all start with U+FDD1.
    UnicodeSet set;
    collatorPrimaryOnly_->internalAddContractions(0xFDD1, set, status);
    if (U_FAILURE(status)) {
        return NULL;
    }
    if (set.isEmpty()) {
        status = U_UNSUPPORTED_ERROR;
        return NULL;
    }
    UnicodeSetIterator iter(set);
    while (iter.next()) {
        const UnicodeString &boundary = iter.getString();
        uint32_t gcMask = U_GET_GC_MASK(boundary.char32At(1));
        if ((gcMask & (U_GC_L_MASK | U_GC_CN_MASK)) == 0) {
            // Ignore boundaries for the special reordering groups.
            // Take only those for "real scripts" (where the sample character is a Letter,
            // and the one for unassigned implicit weights (Cn).
            continue;
        }
        UnicodeString *s = new UnicodeString(boundary);
        if (s == NULL) {
            status = U_MEMORY_ALLOCATION_ERROR;
            return NULL;
        }
        dest->addElement(s, status);
    }
    return dest.orphan();
}

开发者ID:DavidCai1993，项目名称:node，代码行数:39，代码来源:alphaindex.cpp

示例13: DictionaryBreakEngine

CjkBreakEngine::CjkBreakEngine(DictionaryMatcher *adoptDictionary, LanguageType type, UErrorCode &status)
: DictionaryBreakEngine(1 << UBRK_WORD), fDictionary(adoptDictionary) {
    // Korean dictionary only includes Hangul syllables
    fHangulWordSet.applyPattern(UNICODE_STRING_SIMPLE("[\\uac00-\\ud7a3]"), status);
    fHanWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Han:]"), status);
    fKatakanaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[[:Katakana:]\\uff9e\\uff9f]"), status);
    fHiraganaWordSet.applyPattern(UNICODE_STRING_SIMPLE("[:Hiragana:]"), status);

    if (U_SUCCESS(status)) {
        // handle Korean and Japanese/Chinese using different dictionaries
        if (type == kKorean) {
            setCharacters(fHangulWordSet);
        } else { //Chinese and Japanese
            UnicodeSet cjSet;
            cjSet.addAll(fHanWordSet);
            cjSet.addAll(fKatakanaWordSet);
            cjSet.addAll(fHiraganaWordSet);
            cjSet.add(0xFF70);
            cjSet.add(0x30FC);
            setCharacters(cjSet);
        }
    }
}

开发者ID:alfintatorkace，项目名称:osx-10.9-opensource，代码行数:23，代码来源:dictbe.cpp

示例14: assertTrue

void StaticUnicodeSetsTest::testSetCoverage() {
    UErrorCode status = U_ZERO_ERROR;

    // Lenient comma/period should be supersets of strict comma/period;
    // it also makes the coverage logic cheaper.
    assertTrue(
            "COMMA should be superset of STRICT_COMMA",
            get(unisets::COMMA)->containsAll(*get(unisets::STRICT_COMMA)));
    assertTrue(
            "PERIOD should be superset of STRICT_PERIOD",
            get(unisets::PERIOD)->containsAll(*get(unisets::STRICT_PERIOD)));

    UnicodeSet decimals;
    decimals.addAll(*get(unisets::STRICT_COMMA));
    decimals.addAll(*get(unisets::STRICT_PERIOD));
    decimals.freeze();
    UnicodeSet grouping;
    grouping.addAll(decimals);
    grouping.addAll(*get(unisets::OTHER_GROUPING_SEPARATORS));
    decimals.freeze();

    const UnicodeSet &plusSign = *get(unisets::PLUS_SIGN);
    const UnicodeSet &minusSign = *get(unisets::MINUS_SIGN);
    const UnicodeSet &percent = *get(unisets::PERCENT_SIGN);
    const UnicodeSet &permille = *get(unisets::PERMILLE_SIGN);
    const UnicodeSet &infinity = *get(unisets::INFINITY_KEY);

    int32_t localeCount;
    const Locale* allAvailableLocales = Locale::getAvailableLocales(localeCount);
    for (int32_t i = 0; i < localeCount; i++) {
        Locale locale = allAvailableLocales[i];
        DecimalFormatSymbols dfs(locale, status);
        UnicodeString localeName;
        locale.getDisplayName(localeName);
        assertSuccess(UnicodeString("Making DFS for ") + localeName, status);

#define ASSERT_IN_SET(name, foo) assertInSet(localeName, UnicodeString("" #name ""), name, foo)
        ASSERT_IN_SET(decimals, dfs.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol));
        ASSERT_IN_SET(grouping, dfs.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol));
        ASSERT_IN_SET(plusSign, dfs.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol));
        ASSERT_IN_SET(minusSign, dfs.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol));
        ASSERT_IN_SET(percent, dfs.getConstSymbol(DecimalFormatSymbols::kPercentSymbol));
        ASSERT_IN_SET(permille, dfs.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol));
        ASSERT_IN_SET(infinity, dfs.getConstSymbol(DecimalFormatSymbols::kInfinitySymbol));
    }
}

开发者ID:winlibs，项目名称:icu4c，代码行数:46，代码来源:static_unisets_test.cpp

示例15: clear

/**
 * Parse the pattern from the given RuleCharacterIterator.  The
 * iterator is advanced over the parsed pattern.
 * @param chars iterator over the pattern characters.  Upon return
 * it will be advanced to the first character after the parsed
 * pattern, or the end of the iteration if all characters are
 * parsed.
 * @param symbols symbol table to use to parse and dereference
 * variables, or null if none.
 * @param rebuiltPat the pattern that was parsed, rebuilt or
 * copied from the input pattern, as appropriate.
 * @param options a bit mask of zero or more of the following:
 * IGNORE_SPACE, CASE.
 */
void UnicodeSet::applyPattern(RuleCharacterIterator& chars,
                              const SymbolTable* symbols,
                              UnicodeString& rebuiltPat,
                              uint32_t options,
                              UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
                              UErrorCode& ec) {
    if (U_FAILURE(ec)) return;

    // Syntax characters: [ ] ^ - & { }

    // Recognized special forms for chars, sets: c-c s-s s&s

    int32_t opts = RuleCharacterIterator::PARSE_VARIABLES |
                   RuleCharacterIterator::PARSE_ESCAPES;
    if ((options & USET_IGNORE_SPACE) != 0) {
        opts |= RuleCharacterIterator::SKIP_WHITESPACE;
    }

    UnicodeString patLocal, buf;
    UBool usePat = FALSE;
    UnicodeSetPointer scratch;
    RuleCharacterIterator::Pos backup;

    // mode: 0=before [, 1=between [...], 2=after ]
    // lastItem: 0=none, 1=char, 2=set
    int8_t lastItem = 0, mode = 0;
    UChar32 lastChar = 0;
    UChar op = 0;

    UBool invert = FALSE;

    clear();

    while (mode != 2 && !chars.atEnd()) {
        U_ASSERT((lastItem == 0 && op == 0) ||
                 (lastItem == 1 && (op == 0 || op == HYPHEN /*'-'*/)) ||
                 (lastItem == 2 && (op == 0 || op == HYPHEN /*'-'*/ ||
                                    op == INTERSECTION /*'&'*/)));

        UChar32 c = 0;
        UBool literal = FALSE;
        UnicodeSet* nested = 0; // alias - do not delete

        // -------- Check for property pattern

        // setMode: 0=none, 1=unicodeset, 2=propertypat, 3=preparsed
        int8_t setMode = 0;
        if (resemblesPropertyPattern(chars, opts)) {
            setMode = 2;
        }

        // -------- Parse '[' of opening delimiter OR nested set.
        // If there is a nested set, use `setMode' to define how
        // the set should be parsed.  If the '[' is part of the
        // opening delimiter for this pattern, parse special
        // strings "[", "[^", "[-", and "[^-".  Check for stand-in
        // characters representing a nested set in the symbol
        // table.

        else {
            // Prepare to backup if necessary
            chars.getPos(backup);
            c = chars.next(opts, literal, ec);
            if (U_FAILURE(ec)) return;

            if (c == 0x5B /*'['*/ && !literal) {
                if (mode == 1) {
                    chars.setPos(backup); // backup
                    setMode = 1;
                } else {
                    // Handle opening '[' delimiter
                    mode = 1;
                    patLocal.append((UChar) 0x5B /*'['*/);
                    chars.getPos(backup); // prepare to backup
                    c = chars.next(opts, literal, ec); 
                    if (U_FAILURE(ec)) return;
                    if (c == 0x5E /*'^'*/ && !literal) {
                        invert = TRUE;
                        patLocal.append((UChar) 0x5E /*'^'*/);
                        chars.getPos(backup); // prepare to backup
                        c = chars.next(opts, literal, ec);
                        if (U_FAILURE(ec)) return;
                    }
                    // Fall through to handle special leading '-';
                    // otherwise restart loop for nested [], \p{}, etc.
                    if (c == HYPHEN /*'-'*/) {
//.........这里部分代码省略.........

开发者ID:ThomasWo，项目名称:proto-quic，代码行数:101，代码来源:uniset_props.cpp

注：本文中的UnicodeSet类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。