本文整理汇总了C++中UnicodeSet::size方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet::size方法的具体用法?C++ UnicodeSet::size怎么用?C++ UnicodeSet::size使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeSet
的用法示例。
在下文中一共展示了UnicodeSet::size方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: TestGetCount
void CompoundTransliteratorTest::TestGetCount(){
logln("Testing the getCount() API of CompoundTransliterator");
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
CompoundTransliterator *ct1=new CompoundTransliterator("Halfwidth-Fullwidth;Fullwidth-Halfwidth", parseError, status);
CompoundTransliterator *ct2=new CompoundTransliterator("Any-Hex;Hex-Any;Cyrillic-Latin;Latin-Cyrillic", parseError, status);
CompoundTransliterator *ct3=(CompoundTransliterator*)ct1;
if (U_FAILURE(status)) {
dataerrln("FAILED: CompoundTransliterator constructor failed - %s", u_errorName(status));
return;
}
CompoundTransliterator *ct4=new CompoundTransliterator("Latin-Devanagari", parseError, status);
CompoundTransliterator *ct5=new CompoundTransliterator(*ct4);
if (U_FAILURE(status)) {
errln("FAILED: CompoundTransliterator constructor failed");
return;
}
if(ct1->getCount() == ct2->getCount() || ct1->getCount() != ct3->getCount() ||
ct2->getCount() == ct3->getCount() ||
ct4->getCount() != ct5->getCount() || ct4->getCount() == ct1->getCount() ||
ct4->getCount() == ct2->getCount() || ct4->getCount() == ct3->getCount() ||
ct5->getCount() == ct2->getCount() || ct5->getCount() == ct3->getCount() ) {
errln("Error: getCount() failed");
}
/* Quick test getTargetSet(), only test that it doesn't die. TODO: a better test. */
UnicodeSet ts;
UnicodeSet *retUS = NULL;
retUS = &ct1->getTargetSet(ts);
if (retUS != &ts || ts.size() == 0) {
errln("CompoundTransliterator::getTargetSet() failed.\n");
}
/* Quick test getSourceSet(), only test that it doesn't die. TODO: a better test. */
UnicodeSet ss;
retUS = NULL;
retUS = &ct1->getSourceSet(ss);
if (retUS != &ss || ss.size() == 0) {
errln("CompoundTransliterator::getSourceSet() failed.\n");
}
delete ct1;
delete ct2;
delete ct4;
delete ct5;
}
示例2: addIndexExemplars
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
if (U_FAILURE(status)) {
return;
}
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
initialLabels_->addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// The locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
exemplars.add(0x61, 0x7A);
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
// cut down to small list
exemplars.remove(0xAC00, 0xD7A3).
add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
add(0xD30C).add(0xD558);
}
if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic(
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
UnicodeSetIterator it(ethiopic);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
initialLabels_->add(upperC);
}
}
示例3: addIndexExemplars
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
if (U_FAILURE(status)) { return; }
// Chinese index characters, which are specific to each of the several Chinese tailorings,
// take precedence over the single locale data exemplar set per language.
const char *language = locale.getLanguage();
if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
uprv_strcmp(language, "ko") == 0) {
// TODO: This should be done regardless of the language, but it's expensive.
// We should add a Collator function (can be @internal)
// to enumerate just the contractions that start with a given code point or string.
if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
return;
}
}
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
if (U_FAILURE(status)) {
return;
}
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
initialLabels_->addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// The locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
exemplars.add(0x61, 0x7A);
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
// cut down to small list
exemplars.remove(0xAC00, 0xD7A3).
add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
add(0xD30C).add(0xD558);
}
if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic(
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
UnicodeSetIterator it(ethiopic);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
initialLabels_->add(upperC);
}
}
示例4: siter
//
// First characters in scripts.
// Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
// The vector is sorted according to this index's collation.
//
// This code is too slow to use, so for now hard code the data.
// Hard coded implementation is follows.
//
UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
UnicodeString results[USCRIPT_CODE_LIMIT];
UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");
UnicodeSetIterator siter(*TO_TRY);
while (siter.next()) {
const UnicodeString ¤t = siter.getString();
Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
if (r < 0) { // TODO fix; we only want "real" script characters, not
// symbols.
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
}
else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
UnicodeSet extras;
UnicodeSet expansions;
RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
const UCollator *uRuleBasedCollator = rbc->getUCollator();
ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
extras.addAll(expansions).removeAll(*TO_TRY);
if (extras.size() != 0) {
const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
UnicodeSetIterator extrasIter(extras);
while (extrasIter.next()) {
const UnicodeString ¤t = extrasIter.next();
if (!TO_TRY->containsAll(current))
continue;
if (!normalizer->isNormalized(current, status) ||
ruleBasedCollator->compare(current, LOWER_A) < 0) {
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
} else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
}
UVector *dest = new UVector(status);
dest->setDeleter(uprv_deleteUObject);
for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
if (results[i].length() > 0) {
dest->addElement(results[i].clone(), status);
}
}
dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
return dest;
}
示例5: buildIndex
void AlphabeticIndex::buildIndex(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (!indexBuildRequired_) {
return;
}
// Discard any already-built data.
// This is important when the user builds and uses an index, then subsequently modifies it,
// necessitating a rebuild.
bucketList_->removeAllElements();
labels_->removeAllElements();
uhash_removeAll(alreadyIn_);
noDistinctSorting_->clear();
notAlphabetic_->clear();
// first sort the incoming Labels, with a "best" ordering among items
// that are the same according to the collator
UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector.
preferenceSorting.setDeleter(uprv_deleteUObject);
appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
// We now make a set of Labels.
// Some of the input may, however, be redundant.
// That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
// So we make a pass through, filtering out those cases.
// TODO: filtering these out would seem to be at odds with the eventual goal
// of being able to split buckets that contain too many items.
UnicodeSet labelSet;
for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
// TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet,
// is it even possible for duplicates to show up in this check?
if (labelSet.contains(item)) {
UnicodeSetIterator itemAlreadyInIter(labelSet);
while (itemAlreadyInIter.next()) {
const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
if (targets == NULL) {
// alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
targets = new UnicodeSet();
uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
}
targets->add(item);
break;
}
}
} else if (item.moveIndex32(0, 1) < item.length() && // Label contains more than one code point.
collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
noDistinctSorting_->add(item);
} else if (!ALPHABETIC->containsSome(item)) {
notAlphabetic_->add(item);
} else {
labelSet.add(item);
}
}
// If we have no labels, hard-code a fallback default set of [A-Z]
// This case can occur with locales that don't have exemplar character data, including root.
// A no-labels situation will cause other problems; it needs to be avoided.
if (labelSet.isEmpty()) {
labelSet.add((UChar32)0x41, (UChar32)0x5A);
}
// Move the set of Labels from the set into a vector, and sort
// according to the collator.
appendUnicodeSetToUVector(*labels_, labelSet, status);
labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);
// if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
// Implemented by copying the elements to be retained to a new UVector.
const int32_t size = labelSet.size() - 1;
if (size > maxLabelCount_) {
UVector *newLabels = new UVector(status);
newLabels->setDeleter(uprv_deleteUObject);
int32_t count = 0;
int32_t old = -1;
for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
++count;
const int32_t bump = count * maxLabelCount_ / size;
if (bump == old) {
// it.remove();
} else {
newLabels->addElement(str->clone(), status);
old = bump;
}
}
delete labels_;
labels_ = newLabels;
}
//.........这里部分代码省略.........