当前位置: 首页>>代码示例>>C++>>正文


C++ UnicodeSet::contains方法代码示例

本文整理汇总了C++中UnicodeSet::contains方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet::contains方法的具体用法?C++ UnicodeSet::contains怎么用?C++ UnicodeSet::contains使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在UnicodeSet的用法示例。


在下文中一共展示了UnicodeSet::contains方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: next

/*
 * Return the next break, counting words and spaces.
 */
int32_t SpaceBreakIterator::next()
{
    if (fDone) {
        return BreakIterator::DONE;
    }
    
    int32_t nextBreak;
    do {
        nextBreak = fBreakIter->next();
        
        if (nextBreak == BreakIterator::DONE) {
            fDone = TRUE;
            return BreakIterator::DONE;
        }
    }
    while(nextBreak > 0 && fComplexContext.contains(fText[nextBreak-1])
            && fComplexContext.contains(fText[nextBreak]));
    
   int32_t result = nextBreak - fSpaceCount;
    
    if (nextBreak < fTextCount) {
        if (fText[nextBreak] == 0x0020 /*Unicode::isSpaceChar(fText[nextBreak])*/) {
            fSpaceCount += fBreakIter->next() - nextBreak;
        }
    }
    
    fWordCount += 1;

    return result;
}
开发者ID:MIPS,项目名称:external-icu,代码行数:33,代码来源:thaitest.cpp

示例2: initPinyinBounds

void AlphabeticIndex::initPinyinBounds(const Collator *col, UErrorCode &status) {
    {
        Mutex m;
        if (PINYIN_LOWER_BOUNDS != NULL) {
            return;
        }
    }
    UnicodeSet *colSet = col->getTailoredSet(status);
    if (U_FAILURE(status) || colSet == NULL) {
        delete colSet;
        if (U_SUCCESS(status))  {
            status = U_MEMORY_ALLOCATION_ERROR;
        }
        return;
    }
    UBool useLongTables = colSet->contains(probeCharInLong);
    delete colSet;
    {
        Mutex m;
        if (useLongTables) {
            PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_LONG;
            HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_LONG;
        } else {
            PINYIN_LOWER_BOUNDS = PINYIN_LOWER_BOUNDS_SHORT;
            HACK_PINYIN_LOOKUP  = &HACK_PINYIN_LOOKUP_SHORT;
        }
    }
}
开发者ID:0omega,项目名称:platform_external_icu4c,代码行数:28,代码来源:alphaindex.cpp

示例3: assertInSet

void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
                              const UnicodeSet &set, UChar32 cp) {
    // If this test case fails, add the specified code point to the corresponding set in
    // UnicodeSetStaticCache.java and numparse_unisets.cpp
    assertTrue(
            localeName + UnicodeString(u" ") + UnicodeString(cp) + UnicodeString(u" is missing in ") +
            setName, set.contains(cp));
}
开发者ID:winlibs,项目名称:icu4c,代码行数:8,代码来源:static_unisets_test.cpp

示例4: fontContainsCharacter

static bool fontContainsCharacter(const FontPlatformData* fontData,
                                  const wchar_t* family, UChar32 character)
{
    // FIXME: For non-BMP characters, GetFontUnicodeRanges is of
    // no use. We have to read directly from the cmap table of a font.
    // Return true for now.
    if (character > 0xFFFF)
        return true;

    // This cache is just leaked on shutdown.
    static FontCmapCache* fontCmapCache = 0;
    if (!fontCmapCache)
        fontCmapCache = new FontCmapCache;

    HashMap<const wchar_t*, UnicodeSet*>::iterator it = fontCmapCache->find(family);
    if (it != fontCmapCache->end()) 
        return it->second->contains(character);
    
    HFONT hfont = fontData->hfont(); 
    HDC hdc = GetDC(0);
    HGDIOBJ oldFont = static_cast<HFONT>(SelectObject(hdc, hfont));
    int count = GetFontUnicodeRanges(hdc, 0);
    if (count == 0 && ChromiumBridge::ensureFontLoaded(hfont))
        count = GetFontUnicodeRanges(hdc, 0);
    if (count == 0) {
        ASSERT_NOT_REACHED();
        SelectObject(hdc, oldFont);
        ReleaseDC(0, hdc);
        return true;
    }

    static Vector<char, 512> glyphsetBuffer;
    glyphsetBuffer.resize(GetFontUnicodeRanges(hdc, 0));
    GLYPHSET* glyphset = reinterpret_cast<GLYPHSET*>(glyphsetBuffer.data());
    // In addition, refering to the OS/2 table and converting the codepage list
    // to the coverage map might be faster. 
    count = GetFontUnicodeRanges(hdc, glyphset);
    ASSERT(count > 0);
    SelectObject(hdc, oldFont);
    ReleaseDC(0, hdc);

    // FIXME: consider doing either of the following two:
    // 1) port back ICU 4.0's faster look-up code for UnicodeSet
    // 2) port Mozilla's CompressedCharMap or gfxSparseBitset
    unsigned i = 0;
    UnicodeSet* cmap = new UnicodeSet;
    while (i < glyphset->cRanges) {
        WCHAR start = glyphset->ranges[i].wcLow; 
        cmap->add(start, start + glyphset->ranges[i].cGlyphs - 1);
        i++;
    }
    cmap->freeze();
    // We don't lowercase |family| because all of them are under our control
    // and they're already lowercased. 
    fontCmapCache->set(family, cmap); 
    return cmap->contains(character);
}
开发者ID:jackiekaon,项目名称:owb-mirror,代码行数:57,代码来源:FontCacheChromiumWin.cpp

示例5: parseName

void
NamesPropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                            UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    if(!newValues.contains(UCHAR_NAME) && !newValues.contains(PPUCD_NAME_ALIAS)) {
        return;
    }

    U_ASSERT(props.start==props.end);

    const char *names[4]={ NULL, NULL, NULL, NULL };
    int16_t lengths[4]={ 0, 0, 0, 0 };

    /* get the character name */
    if(props.name!=NULL) {
        names[0]=props.name;
        lengths[0]=(int16_t)uprv_strlen(props.name);
        parseName(names[0], lengths[0]);
    }

    CharString buffer;
    if(props.nameAlias!=NULL) {
        /*
         * Only use "correction" aliases for now, from Unicode 6.1 NameAliases.txt with 3 fields per line.
         * TODO: Work on ticket #8963 to deal with multiple type:alias pairs per character.
         */
        const char *corr=uprv_strstr(props.nameAlias, "correction=");
        if(corr!=NULL) {
            corr+=11;  // skip "correction="
            const char *limit=uprv_strchr(corr, ',');
            if(limit!=NULL) {
                buffer.append(corr, limit-corr, errorCode);
                names[3]=buffer.data();
                lengths[3]=(int16_t)(limit-corr);
            } else {
                names[3]=corr;
                lengths[3]=(int16_t)uprv_strlen(corr);
            }
            parseName(names[3], lengths[3]);
        }
    }

    addLine(props.start, names, lengths, LENGTHOF(names));
}
开发者ID:icu-project,项目名称:icu-tools,代码行数:44,代码来源:namespropsbuilder.cpp

示例6: span

 static int32_t span(const UnicodeSet &set, const UChar *s, int32_t length, UBool tf) {
     UChar32 c;
     int32_t start=0, prev;
     while((prev=start)<length) {
         U16_NEXT(s, start, length, c);
         if(tf!=set.contains(c)) {
             break;
         }
     }
     return prev;
 }
开发者ID:LittoCats,项目名称:OT_4010D,代码行数:11,代码来源:unisetperf.cpp

示例7: fprintf

void
PreparsedUCD::parseScriptExtensions(const char *s, UnicodeSet &scx, UErrorCode &errorCode) {
    if(U_FAILURE(errorCode)) { return; }
    scx.clear();
    CharString scString;
    for(;;) {
        const char *scs;
        const char *scLimit=strchr(s, ' ');
        if(scLimit!=NULL) {
            scs=scString.clear().append(s, (int32_t)(scLimit-s), errorCode).data();
            if(U_FAILURE(errorCode)) { return; }
        } else {
            scs=s;
        }
        int32_t script=pnames->getPropertyValueEnum(UCHAR_SCRIPT, scs);
        if(script==UCHAR_INVALID_CODE) {
            fprintf(stderr,
                    "error in preparsed UCD: '%s' is not a valid script code on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else if(scx.contains(script)) {
            fprintf(stderr,
                    "error in preparsed UCD: scx has duplicate '%s' codes on line %ld\n",
                    scs, (long)lineNumber);
            errorCode=U_PARSE_ERROR;
            return;
        } else {
            scx.add(script);
        }
        if(scLimit!=NULL) {
            s=scLimit+1;
        } else {
            break;
        }
    }
    if(scx.isEmpty()) {
        fprintf(stderr, "error in preparsed UCD: empty scx= on line %ld\n", (long)lineNumber);
        errorCode=U_PARSE_ERROR;
    }
}
开发者ID:icu-project,项目名称:icu4c,代码行数:41,代码来源:ppucd.cpp

示例8: main


//.........这里部分代码省略.........
        };
    
        status = U_ZERO_ERROR;
        UChar *wordSourceU = new UChar[destCap+1];
        ucnv_toUChars(conv,
                      wordSourceU,     //  dest,
                      destCap+1,
                      wordSourceC,
                      wordFileSize,
                      &status);
        if (U_FAILURE(status)) {
            fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        };
        ucnv_close(conv);
    
        // Get rid of the original file buffer
        delete[] wordBufferC;
    
        // Create a MutableTrieDictionary, and loop through all the lines, inserting
        // words.
    
        // First, pick a median character.
        UChar *current = wordSourceU + (destCap/2);
        UChar uc = *current++;
        UnicodeSet breaks;
        breaks.add(0x000A);     // Line Feed
        breaks.add(0x000D);     // Carriage Return
        breaks.add(0x2028);     // Line Separator
        breaks.add(0x2029);     // Paragraph Separator
    
        do { 
            // Look for line break
            while (uc && !breaks.contains(uc)) {
                uc = *current++;
            }
            // Now skip to first non-line-break
            while (uc && breaks.contains(uc)) {
                uc = *current++;
            }
        }
        while (uc && (breaks.contains(uc) || u_isspace(uc)));
    
        mtd = new MutableTrieDictionary(uc, status);
        
        if (U_FAILURE(status)) {
            fprintf(stderr, "new MutableTrieDictionary: ICU Error \"%s\"\n", u_errorName(status));
            exit(status);
        }
        
        // Now add the words. Words are non-space characters at the beginning of
        // lines, and must be at least one UChar. If a word has an associated value,
        // the value should follow the word on the same line after a tab character.
        current = wordSourceU;
        UChar *candidate = current;
        uc = *current++;
        int32_t length = 0;
        int count = 0;
                
        while (uc) {
            while (uc && !u_isspace(uc)) {
                ++length;
                uc = *current++;
            }
            
            UnicodeString valueString;
开发者ID:AutomationConsultant,项目名称:perch-webrtc,代码行数:67,代码来源:genctd.cpp

示例9: build


//.........这里部分代码省略.........
                if (U_FAILURE(*fStatus)) {
                    return;
                }
            }

            // The current rlRange is now entirely within the UnicodeSet range.
            // Add this unicode set to the list of sets for this rlRange
            if (rlRange->fIncludesSets->indexOf(usetNode) == -1) {
                rlRange->fIncludesSets->addElement(usetNode, *fStatus);
                if (U_FAILURE(*fStatus)) {
                    return;
                }
            }

            // Advance over ranges that we are finished with.
            if (inputSetRangeEnd == rlRange->fEndChar) {
                inputSetRangeIndex++;
            }
            rlRange = rlRange->fNext;
        }
    }

    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "range")) { printRanges();}

    //
    //  Group the above ranges, with each group consisting of one or more
    //    ranges that are in exactly the same set of original UnicodeSets.
    //    The groups are numbered, and these group numbers are the set of
    //    input symbols recognized by the run-time state machine.
    //
    //    Numbering: # 0  (state table column 0) is unused.
    //               # 1  is reserved - table column 1 is for end-of-input
    //               # 2  is reserved - table column 2 is for beginning-in-input
    //               # 3  is the first range list.
    //
    RangeDescriptor *rlSearchRange;
    for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
        for (rlSearchRange=fRangeList; rlSearchRange != rlRange; rlSearchRange=rlSearchRange->fNext) {
            if (rlRange->fIncludesSets->equals(*rlSearchRange->fIncludesSets)) {
                rlRange->fNum = rlSearchRange->fNum;
                break;
            }
        }
        if (rlRange->fNum == 0) {
            fGroupCount ++;
            rlRange->fNum = fGroupCount+2; 
            rlRange->setDictionaryFlag();
            addValToSets(rlRange->fIncludesSets, fGroupCount+2);
        }
    }

    // Handle input sets that contain the special string {eof}.
    //   Column 1 of the state table is reserved for EOF on input.
    //   Column 2 is reserved for before-the-start-input.
    //            (This column can be optimized away later if there are no rule
    //             references to {bof}.)
    //   Add this column value (1 or 2) to the equivalent expression
    //     subtree for each UnicodeSet that contains the string {eof}
    //   Because {bof} and {eof} are not a characters in the normal sense,
    //   they doesn't affect the computation of ranges or TRIE.
    static const UChar eofUString[] = {0x65, 0x6f, 0x66, 0};
    static const UChar bofUString[] = {0x62, 0x6f, 0x66, 0};

    UnicodeString eofString(eofUString);
    UnicodeString bofString(bofUString);
    for (ni=0; ; ni++) {        // Loop over each of the UnicodeSets encountered in the input rules
        usetNode = (RBBINode *)this->fRB->fUSetNodes->elementAt(ni);
        if (usetNode==NULL) {
            break;
        }
        UnicodeSet      *inputSet = usetNode->fInputSet;
        if (inputSet->contains(eofString)) {
            addValToSet(usetNode, 1);
        }
        if (inputSet->contains(bofString)) {
            addValToSet(usetNode, 2);
            fSawBOF = TRUE;
        }
    }


    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "rgroup")) {printRangeGroups();}
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "esets")) {printSets();}

    //
    // Build the Trie table for mapping UChar32 values to the corresponding
    //   range group number
    //
    fTrie = utrie_open(NULL,    //  Pre-existing trie to be filled in
                      NULL,    //  Data array  (utrie will allocate one)
                      100000,  //  Max Data Length
                      0,       //  Initial value for all code points
                      0,       //  Lead surrogate unit value
                      TRUE);   //  Keep Latin 1 in separately


    for (rlRange = fRangeList; rlRange!=0; rlRange=rlRange->fNext) {
        utrie_setRange32(fTrie, rlRange->fStartChar, rlRange->fEndChar+1, rlRange->fNum, TRUE);
    }
}
开发者ID:Katarzynasrom,项目名称:patch-hosting-for-android-x86-support,代码行数:101,代码来源:rbbisetb.cpp

示例10: if

void
CasePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
                           UErrorCode &errorCode) {
    if(U_FAILURE(errorCode) || newValues.containsNone(relevantProps)) { return; }

    UChar32 start=props.start;
    UChar32 end=props.end;

    /* default: map to self */
    int32_t delta=0;

    uint32_t type;
    if(props.binProps[UCHAR_LOWERCASE]) {
        type=UCASE_LOWER;
    } else if(props.binProps[UCHAR_UPPERCASE]) {
        type=UCASE_UPPER;
    } else if(props.getIntProp(UCHAR_GENERAL_CATEGORY)==U_TITLECASE_LETTER) {
        type=UCASE_TITLE;
    } else {
        type=UCASE_NONE;
    }
    uint32_t value=type;

    UBool hasMapping=FALSE;
    if(props.suc>=0) {
        /* uppercase mapping as delta if the character is lowercase */
        hasMapping=TRUE;
        if(type==UCASE_LOWER) {
            delta=props.suc-start;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(props.slc>=0) {
        /* lowercase mapping as delta if the character is uppercase or titlecase */
        hasMapping=TRUE;
        if(type>=UCASE_UPPER) {
            delta=props.slc-start;
        } else {
            value|=UCASE_EXCEPTION;
        }
    }
    if(props.stc>=0) {
        hasMapping=TRUE;
    }
    if(props.suc!=props.stc) {
        value|=UCASE_EXCEPTION;
    }
    if(!props.lc.isEmpty() || !props.uc.isEmpty() || !props.tc.isEmpty() ||
        newValues.contains(PPUCD_CONDITIONAL_CASE_MAPPINGS)
    ) {
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }
    if( (props.scf>=0 && props.scf!=props.slc) ||
        (!props.cf.isEmpty() && props.cf!=UnicodeString(props.scf)) ||
        newValues.contains(PPUCD_TURKIC_CASE_FOLDING)
    ) {
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }

    // Simple case folding falls back to simple lowercasing.
    // If there is no case folding but there is a lowercase mapping,
    // then add a case folding mapping to the code point.
    // For example: Cherokee uppercase syllables since Unicode 8.
    // (Full case folding falls back to simple case folding,
    // not to full lowercasing, so we need not also handle it specially
    // for such cases.)
    UChar32 scf=props.scf;
    if(scf<0 && props.slc>=0) {
        scf=start;
        hasMapping=TRUE;
        value|=UCASE_EXCEPTION;
    }

    if(delta<UCASE_MIN_DELTA || UCASE_MAX_DELTA<delta) {
        value|=UCASE_EXCEPTION;
    }

    if(props.binProps[UCHAR_SOFT_DOTTED]) {
        value|=UCASE_SOFT_DOTTED;
    }
    int32_t cc=props.getIntProp(UCHAR_CANONICAL_COMBINING_CLASS);
    if(cc!=0) {
        if(props.binProps[UCHAR_SOFT_DOTTED]) {
            fprintf(stderr, "genprops error: a soft-dotted character has ccc!=0\n");
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
            return;
        }
        if(cc==230) {
            value|=UCASE_ABOVE;
        } else {
            value|=UCASE_OTHER_ACCENT;
        }
    }

    if(props.binProps[UCHAR_CASE_IGNORABLE]) {
        value|=UCASE_IGNORABLE;
    }
//.........这里部分代码省略.........
开发者ID:icu-project,项目名称:icu-tools,代码行数:101,代码来源:casepropsbuilder.cpp

示例11: normalizedInput

U_CAPI int32_t U_EXPORT2
uspoof_check(const USpoofChecker *sc,
             const UChar *text, int32_t length,
             int32_t *position,
             UErrorCode *status) {
             
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    if (length < -1) {
        *status = U_ILLEGAL_ARGUMENT_ERROR;
        return 0;
    }
    if (length == -1) {
        // It's not worth the bother to handle nul terminated strings everywhere.
        //   Just get the length and be done with it.
        length = u_strlen(text);
    }

    int32_t result = 0;
    int32_t failPos = 0x7fffffff;   // TODO: do we have a #define for max int32?

    // A count of the number of non-Common or inherited scripts.
    // Needed for both the SINGLE_SCRIPT and the WHOLE/MIXED_SCIRPT_CONFUSABLE tests.
    // Share the computation when possible.  scriptCount == -1 means that we haven't
    // done it yet.
    int32_t scriptCount = -1;

    if ((This->fChecks) & USPOOF_SINGLE_SCRIPT) {
        scriptCount = This->scriptScan(text, length, failPos, *status);
        // printf("scriptCount (clipped to 2) = %d\n", scriptCount);
        if ( scriptCount >= 2) {
            // Note: scriptCount == 2 covers all cases of the number of scripts >= 2
            result |= USPOOF_SINGLE_SCRIPT;
        }
    }

    if (This->fChecks & USPOOF_CHAR_LIMIT) {
        int32_t i;
        UChar32 c;
        for (i=0; i<length ;) {
            U16_NEXT(text, i, length, c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                if (i < failPos) {
                    failPos = i;
                }
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        NFDBuffer   normalizedInput(text, length, *status);
        const UChar  *nfdText = normalizedInput.getBuffer();
        int32_t      nfdLength = normalizedInput.getLength();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                U16_NEXT(nfdText, i, nfdLength, c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
                    failPos = i;
                    // TODO: Bug 8655: failPos is the position in the NFD buffer, but what we want
                    //       to give back to our caller is a position in the original input string.
                    if (failPos > length) {
                        failPos = length;
                    }
                    break;
                }
//.........这里部分代码省略.........
开发者ID:BrunoReX,项目名称:palemoon,代码行数:101,代码来源:uspoof.cpp

示例12: buildIndex

void AlphabeticIndex::buildIndex(UErrorCode &status) {
    if (U_FAILURE(status)) {
        return;
    }
    if (!indexBuildRequired_) {
        return;
    }

    // Discard any already-built data.
    // This is important when the user builds and uses an index, then subsequently modifies it,
    // necessitating a rebuild.

    bucketList_->removeAllElements();
    labels_->removeAllElements();
    uhash_removeAll(alreadyIn_);
    noDistinctSorting_->clear();
    notAlphabetic_->clear();

    // first sort the incoming Labels, with a "best" ordering among items
    // that are the same according to the collator

    UVector preferenceSorting(status);   // Vector of UnicodeStrings; owned by the vector.
    preferenceSorting.setDeleter(uprv_deleteUObject);
    appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
    preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);

    // We now make a set of Labels.
    // Some of the input may, however, be redundant.
    // That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
    // So we make a pass through, filtering out those cases.
    // TODO: filtering these out would seem to be at odds with the eventual goal
    //       of being able to split buckets that contain too many items.

    UnicodeSet labelSet;
    for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
        UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
        // TODO:  Since preferenceSorting was originally populated from the contents of a UnicodeSet,
        //        is it even possible for duplicates to show up in this check?
        if (labelSet.contains(item)) {
            UnicodeSetIterator itemAlreadyInIter(labelSet);
            while (itemAlreadyInIter.next()) {
                const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
                if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
                    UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
                    if (targets == NULL) {
                        // alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
                        targets = new UnicodeSet();
                        uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
                    }
                    targets->add(item);
                    break;
                }
            }
        } else if (item.moveIndex32(0, 1) < item.length() &&  // Label contains more than one code point.
                   collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
            noDistinctSorting_->add(item);
        } else if (!ALPHABETIC->containsSome(item)) {
            notAlphabetic_->add(item);
        } else {
            labelSet.add(item);
        }
    }

    // If we have no labels, hard-code a fallback default set of [A-Z]
    // This case can occur with locales that don't have exemplar character data, including root.
    // A no-labels situation will cause other problems; it needs to be avoided.
    if (labelSet.isEmpty()) {
        labelSet.add((UChar32)0x41, (UChar32)0x5A);
    }

    // Move the set of Labels from the set into a vector, and sort
    // according to the collator.

    appendUnicodeSetToUVector(*labels_, labelSet, status);
    labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);

    // if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
    //    Implemented by copying the elements to be retained to a new UVector.

    const int32_t size = labelSet.size() - 1;
    if (size > maxLabelCount_) {
        UVector *newLabels = new UVector(status);
        newLabels->setDeleter(uprv_deleteUObject);
        int32_t count = 0;
        int32_t old = -1;
        for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
            const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
            ++count;
            const int32_t bump = count * maxLabelCount_ / size;
            if (bump == old) {
                // it.remove();
            } else {
                newLabels->addElement(str->clone(), status);
                old = bump;
            }
        }
        delete labels_;
        labels_ = newLabels;
    }

//.........这里部分代码省略.........
开发者ID:0omega,项目名称:platform_external_icu4c,代码行数:101,代码来源:alphaindex.cpp

示例13:

U_CAPI int32_t U_EXPORT2
uspoof_checkUnicodeString(const USpoofChecker *sc,
                          const icu::UnicodeString &id, 
                          int32_t *position,
                          UErrorCode *status) {
    const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
    if (This == NULL) {
        return 0;
    }
    int32_t result = 0;

    IdentifierInfo *identifierInfo = NULL;
    if ((This->fChecks) & (USPOOF_RESTRICTION_LEVEL | USPOOF_MIXED_NUMBERS)) {
        identifierInfo = This->getIdentifierInfo(*status);
        if (U_FAILURE(*status)) {
            goto cleanupAndReturn;
        }
        identifierInfo->setIdentifier(id, *status);
        identifierInfo->setIdentifierProfile(*This->fAllowedCharsSet);
    }


    if ((This->fChecks) & USPOOF_RESTRICTION_LEVEL) {
        URestrictionLevel idRestrictionLevel = identifierInfo->getRestrictionLevel(*status);
        if (idRestrictionLevel > This->fRestrictionLevel) {
            result |= USPOOF_RESTRICTION_LEVEL;
        }
        if (This->fChecks & USPOOF_AUX_INFO) {
            result |= idRestrictionLevel;
        }
    }

    if ((This->fChecks) & USPOOF_MIXED_NUMBERS) {
        const UnicodeSet *numerics = identifierInfo->getNumerics();
        if (numerics->size() > 1) {
            result |= USPOOF_MIXED_NUMBERS;
        }

        // TODO: ICU4J returns the UnicodeSet of the numerics found in the identifier.
        //       We have no easy way to do the same in C.
        // if (checkResult != null) {
        //     checkResult.numerics = numerics;
        // }
    }


    if (This->fChecks & (USPOOF_CHAR_LIMIT)) {
        int32_t i;
        UChar32 c;
        int32_t length = id.length();
        for (i=0; i<length ;) {
            c = id.char32At(i);
            i += U16_LENGTH(c);
            if (!This->fAllowedCharsSet->contains(c)) {
                result |= USPOOF_CHAR_LIMIT;
                break;
            }
        }
    }

    if (This->fChecks & 
        (USPOOF_WHOLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_INVISIBLE)) {
        // These are the checks that need to be done on NFD input
        UnicodeString nfdText;
        gNfdNormalizer->normalize(id, nfdText, *status);
        int32_t nfdLength = nfdText.length();

        if (This->fChecks & USPOOF_INVISIBLE) {
           
            // scan for more than one occurence of the same non-spacing mark
            // in a sequence of non-spacing marks.
            int32_t     i;
            UChar32     c;
            UChar32     firstNonspacingMark = 0;
            UBool       haveMultipleMarks = FALSE;  
            UnicodeSet  marksSeenSoFar;   // Set of combining marks in a single combining sequence.
            
            for (i=0; i<nfdLength ;) {
                c = nfdText.char32At(i);
                i += U16_LENGTH(c);
                if (u_charType(c) != U_NON_SPACING_MARK) {
                    firstNonspacingMark = 0;
                    if (haveMultipleMarks) {
                        marksSeenSoFar.clear();
                        haveMultipleMarks = FALSE;
                    }
                    continue;
                }
                if (firstNonspacingMark == 0) {
                    firstNonspacingMark = c;
                    continue;
                }
                if (!haveMultipleMarks) {
                    marksSeenSoFar.add(firstNonspacingMark);
                    haveMultipleMarks = TRUE;
                }
                if (marksSeenSoFar.contains(c)) {
                    // report the error, and stop scanning.
                    // No need to find more than the first failure.
                    result |= USPOOF_INVISIBLE;
//.........这里部分代码省略.........
开发者ID:eyoung-father,项目名称:libicu_full,代码行数:101,代码来源:uspoof.cpp


注:本文中的UnicodeSet::contains方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。