本文整理汇总了C++中UnicodeSet::toUSet方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeSet::toUSet方法的具体用法?C++ UnicodeSet::toUSet怎么用?C++ UnicodeSet::toUSet使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeSet
的用法示例。
在下文中一共展示了UnicodeSet::toUSet方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: addIndexExemplars
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
if (U_FAILURE(status)) {
return;
}
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
initialLabels_->addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// The locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
exemplars.add(0x61, 0x7A);
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
// cut down to small list
exemplars.remove(0xAC00, 0xD7A3).
add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
add(0xD30C).add(0xD558);
}
if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic(
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
UnicodeSetIterator it(ethiopic);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
initialLabels_->add(upperC);
}
}
示例2: TestLocalXyzPointerMoveSwap
void LocalPointerTest::TestLocalXyzPointerMoveSwap() {
#if !UCONFIG_NO_NORMALIZATION
IcuTestErrorCode errorCode(*this, "TestLocalXyzPointerMoveSwap");
const UNormalizer2 *nfc=unorm2_getNFCInstance(errorCode);
const UNormalizer2 *nfd=unorm2_getNFDInstance(errorCode);
if(errorCode.logIfFailureAndReset("unorm2_getNF[CD]Instance()")) {
return;
}
UnicodeSet emptySet;
UNormalizer2 *p1 = unorm2_openFiltered(nfc, emptySet.toUSet(), errorCode);
UNormalizer2 *p2 = unorm2_openFiltered(nfd, emptySet.toUSet(), errorCode);
LocalUNormalizer2Pointer f1(p1);
LocalUNormalizer2Pointer f2(p2);
if(errorCode.logIfFailureAndReset("unorm2_openFiltered()")) {
return;
}
if(f1.isNull() || f2.isNull()) {
errln("LocalUNormalizer2Pointer failure");
return;
}
f1.swap(f2);
if(f1.getAlias() != p2 || f2.getAlias() != p1) {
errln("LocalUNormalizer2Pointer.swap() did not swap");
}
swap(f1, f2);
if(f1.getAlias() != p1 || f2.getAlias() != p2) {
errln("swap(LocalUNormalizer2Pointer) did not swap back");
}
LocalUNormalizer2Pointer f3;
f3.moveFrom(f1);
if(f3.getAlias() != p1 || f1.isValid()) {
errln("LocalUNormalizer2Pointer.moveFrom() did not move");
}
#if U_HAVE_RVALUE_REFERENCES
infoln("TestLocalXyzPointerMoveSwap() with rvalue references");
f1 = static_cast<LocalUNormalizer2Pointer &&>(f3);
if(f1.getAlias() != p1 || f3.isValid()) {
errln("LocalUNormalizer2Pointer move assignment operator did not move");
}
LocalUNormalizer2Pointer f4(static_cast<LocalUNormalizer2Pointer &&>(f2));
if(f4.getAlias() != p2 || f2.isValid()) {
errln("LocalUNormalizer2Pointer move constructor did not move");
}
#else
infoln("TestLocalXyzPointerMoveSwap() without rvalue references");
#endif
// Move self assignment leaves the object valid but in an undefined state.
// Do it to make sure there is no crash,
// but do not check for any particular resulting value.
f1.moveFrom(f1);
f3.moveFrom(f3);
#endif /* !UCONFIG_NO_NORMALIZATION */
}
示例3: m
U_CAPI const USet * U_EXPORT2
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
if (U_FAILURE(*pErrorCode)) { return nullptr; }
if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
*pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
return nullptr;
}
Mutex m(&cpMutex);
UnicodeSet *set = sets[property];
if (set == nullptr) {
sets[property] = set = makeSet(property, *pErrorCode);
}
if (U_FAILURE(*pErrorCode)) { return nullptr; }
return set->toUSet();
}
示例4: addChineseIndexCharacters
UBool AlphabeticIndex::addChineseIndexCharacters(UErrorCode &errorCode) {
UnicodeSet contractions;
ucol_getContractionsAndExpansions(collatorPrimaryOnly_->getUCollator(),
contractions.toUSet(), NULL, FALSE, &errorCode);
if (U_FAILURE(errorCode)) { return FALSE; }
UnicodeString firstHanBoundary;
UBool hasPinyin = FALSE;
UnicodeSetIterator iter(contractions);
while (iter.next()) {
const UnicodeString &s = iter.getString();
if (s.startsWith(BASE, BASE_LENGTH)) {
initialLabels_->add(s);
if (firstHanBoundary.isEmpty() ||
collatorPrimaryOnly_->compare(s, firstHanBoundary, errorCode) < 0) {
firstHanBoundary = s;
}
UChar c = s.charAt(s.length() - 1);
if (0x41 <= c && c <= 0x5A) { // A-Z
hasPinyin = TRUE;
}
}
}
if (hasPinyin) {
initialLabels_->add(0x41, 0x5A); // A-Z
}
if (!firstHanBoundary.isEmpty()) {
// The hardcoded list of script boundaries includes U+4E00
// which is tailored to not be the first primary
// in all Chinese tailorings except "unihan".
// Replace U+4E00 with the first boundary string from the tailoring.
// TODO: This becomes obsolete when the root collator gets
// reliable script-first-primary mappings.
int32_t hanIndex = binarySearch(
*firstCharsInScripts_, UnicodeString((UChar)0x4E00), *collatorPrimaryOnly_);
if (hanIndex >= 0) {
UnicodeString *fh = new UnicodeString(firstHanBoundary);
if (fh == NULL) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return FALSE;
}
firstCharsInScripts_->setElementAt(fh, hanIndex);
}
return TRUE;
} else {
return FALSE;
}
}
示例5: TestLocalXyzPointer
// Use LocalXyzPointer types that are not covered elsewhere in the intltest suite.
void LocalPointerTest::TestLocalXyzPointer() {
IcuTestErrorCode errorCode(*this, "TestLocalXyzPointer");
static const char *const encoding="ISO-8859-1";
LocalUConverterSelectorPointer sel(
ucnvsel_open(&encoding, 1, NULL, UCNV_ROUNDTRIP_SET, errorCode));
if(errorCode.logIfFailureAndReset("ucnvsel_open()")) {
return;
}
if(sel.isNull()) {
errln("LocalUConverterSelectorPointer failure");
return;
}
#if !UCONFIG_NO_FORMATTING
LocalUCalendarPointer cal(ucal_open(NULL, 0, "root", UCAL_GREGORIAN, errorCode));
if(errorCode.logDataIfFailureAndReset("ucal_open()")) {
return;
}
if(cal.isNull()) {
errln("LocalUCalendarPointer failure");
return;
}
LocalUDateTimePatternGeneratorPointer patgen(udatpg_open("root", errorCode));
if(errorCode.logDataIfFailureAndReset("udatpg_open()")) {
return;
}
if(patgen.isNull()) {
errln("LocalUDateTimePatternGeneratorPointer failure");
return;
}
LocalULocaleDisplayNamesPointer ldn(uldn_open("de-CH", ULDN_STANDARD_NAMES, errorCode));
if(errorCode.logIfFailureAndReset("uldn_open()")) {
return;
}
if(ldn.isNull()) {
errln("LocalULocaleDisplayNamesPointer failure");
return;
}
UnicodeString hello=UNICODE_STRING_SIMPLE("Hello {0}!");
LocalUMessageFormatPointer msg(
umsg_open(hello.getBuffer(), hello.length(), "root", NULL, errorCode));
if(errorCode.logIfFailureAndReset("umsg_open()")) {
return;
}
if(msg.isNull()) {
errln("LocalUMessageFormatPointer failure");
return;
}
#endif /* UCONFIG_NO_FORMATTING */
#if !UCONFIG_NO_NORMALIZATION
const UNormalizer2 *nfc=unorm2_getNFCInstance(errorCode);
UnicodeSet emptySet;
LocalUNormalizer2Pointer fn2(unorm2_openFiltered(nfc, emptySet.toUSet(), errorCode));
if(errorCode.logIfFailureAndReset("unorm2_openFiltered()")) {
return;
}
if(fn2.isNull()) {
errln("LocalUNormalizer2Pointer failure");
return;
}
#endif /* !UCONFIG_NO_NORMALIZATION */
#if !UCONFIG_NO_IDNA
LocalUIDNAPointer idna(uidna_openUTS46(0, errorCode));
if(errorCode.logIfFailureAndReset("uidna_openUTS46()")) {
return;
}
if(idna.isNull()) {
errln("LocalUIDNAPointer failure");
return;
}
#endif /* !UCONFIG_NO_IDNA */
#if !UCONFIG_NO_REGULAR_EXPRESSIONS
UnicodeString pattern=UNICODE_STRING_SIMPLE("abc|xy+z");
LocalURegularExpressionPointer regex(
uregex_open(pattern.getBuffer(), pattern.length(), 0, NULL, errorCode));
if(errorCode.logIfFailureAndReset("uregex_open()")) {
return;
}
if(regex.isNull()) {
errln("LocalURegularExpressionPointer failure");
return;
}
#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
#if !UCONFIG_NO_TRANSLITERATION
UnicodeString id=UNICODE_STRING_SIMPLE("Grek-Latn");
LocalUTransliteratorPointer trans(
utrans_openU(id.getBuffer(), id.length(), UTRANS_FORWARD, NULL, 0, NULL, errorCode));
if(errorCode.logIfFailureAndReset("utrans_open()")) {
return;
}
if(trans.isNull()) {
//.........这里部分代码省略.........
示例6: addIndexExemplars
void AlphabeticIndex::addIndexExemplars(const Locale &locale, UErrorCode &status) {
if (U_FAILURE(status)) { return; }
// Chinese index characters, which are specific to each of the several Chinese tailorings,
// take precedence over the single locale data exemplar set per language.
const char *language = locale.getLanguage();
if (uprv_strcmp(language, "zh") == 0 || uprv_strcmp(language, "ja") == 0 ||
uprv_strcmp(language, "ko") == 0) {
// TODO: This should be done regardless of the language, but it's expensive.
// We should add a Collator function (can be @internal)
// to enumerate just the contractions that start with a given code point or string.
if (addChineseIndexCharacters(status) || U_FAILURE(status)) {
return;
}
}
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
if (U_FAILURE(status)) {
return;
}
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
initialLabels_->addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// The locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(0x61, 0x7A) /* a-z */ || exemplars.size() == 0) {
exemplars.add(0x61, 0x7A);
}
if (exemplars.containsSome(0xAC00, 0xD7A3)) { // Hangul syllables
// cut down to small list
exemplars.remove(0xAC00, 0xD7A3).
add(0xAC00).add(0xB098).add(0xB2E4).add(0xB77C).
add(0xB9C8).add(0xBC14).add(0xC0AC).add(0xC544).
add(0xC790).add(0xCC28).add(0xCE74).add(0xD0C0).
add(0xD30C).add(0xD558);
}
if (exemplars.containsSome(0x1200, 0x137F)) { // Ethiopic block
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSet ethiopic(
UNICODE_STRING_SIMPLE("[[:Block=Ethiopic:]&[:Script=Ethiopic:]]"), status);
UnicodeSetIterator it(ethiopic);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
initialLabels_->add(upperC);
}
}
示例7: siter
//
// First characters in scripts.
// Create a UVector whose contents are pointers to UnicodeStrings for the First Characters in each script.
// The vector is sorted according to this index's collation.
//
// This code is too slow to use, so for now hard code the data.
// Hard coded implementation is follows.
//
UVector *AlphabeticIndex::firstStringsInScript(Collator *ruleBasedCollator, UErrorCode &status) {
if (U_FAILURE(status)) {
return NULL;
}
UnicodeString results[USCRIPT_CODE_LIMIT];
UnicodeString LOWER_A = UNICODE_STRING_SIMPLE("a");
UnicodeSetIterator siter(*TO_TRY);
while (siter.next()) {
const UnicodeString ¤t = siter.getString();
Collator::EComparisonResult r = ruleBasedCollator->compare(current, LOWER_A);
if (r < 0) { // TODO fix; we only want "real" script characters, not
// symbols.
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
}
else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
UnicodeSet extras;
UnicodeSet expansions;
RuleBasedCollator *rbc = dynamic_cast<RuleBasedCollator *>(ruleBasedCollator);
const UCollator *uRuleBasedCollator = rbc->getUCollator();
ucol_getContractionsAndExpansions(uRuleBasedCollator, extras.toUSet(), expansions.toUSet(), true, &status);
extras.addAll(expansions).removeAll(*TO_TRY);
if (extras.size() != 0) {
const Normalizer2 *normalizer = Normalizer2::getNFKCInstance(status);
UnicodeSetIterator extrasIter(extras);
while (extrasIter.next()) {
const UnicodeString ¤t = extrasIter.next();
if (!TO_TRY->containsAll(current))
continue;
if (!normalizer->isNormalized(current, status) ||
ruleBasedCollator->compare(current, LOWER_A) < 0) {
continue;
}
int script = uscript_getScript(current.char32At(0), &status);
if (results[script].length() == 0) {
results[script] = current;
} else if (ruleBasedCollator->compare(current, results[script]) < 0) {
results[script] = current;
}
}
}
UVector *dest = new UVector(status);
dest->setDeleter(uprv_deleteUObject);
for (uint32_t i = 0; i < sizeof(results) / sizeof(results[0]); ++i) {
if (results[i].length() > 0) {
dest->addElement(results[i].clone(), status);
}
}
dest->sortWithUComparator(sortCollateComparator, ruleBasedCollator, status);
return dest;
}
示例8: getIndexExemplars
void AlphabeticIndex::getIndexExemplars(UnicodeSet &dest, const Locale &locale, UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
LocalULocaleDataPointer uld(ulocdata_open(locale.getName(), &status));
UnicodeSet exemplars;
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_INDEX, &status);
if (U_SUCCESS(status)) {
dest.addAll(exemplars);
return;
}
status = U_ZERO_ERROR; // Clear out U_MISSING_RESOURCE_ERROR
// Locale data did not include explicit Index characters.
// Synthesize a set of them from the locale's standard exemplar characters.
ulocdata_getExemplarSet(uld.getAlias(), exemplars.toUSet(), 0, ULOCDATA_ES_STANDARD, &status);
if (U_FAILURE(status)) {
return;
}
// Upper-case any that aren't already so.
// (We only do this for synthesized index characters.)
UnicodeSetIterator it(exemplars);
UnicodeString upperC;
UnicodeSet lowersToRemove;
UnicodeSet uppersToAdd;
while (it.next()) {
const UnicodeString &exemplarC = it.getString();
upperC = exemplarC;
upperC.toUpper(locale);
if (exemplarC != upperC) {
lowersToRemove.add(exemplarC);
uppersToAdd.add(upperC);
}
}
exemplars.removeAll(lowersToRemove);
exemplars.addAll(uppersToAdd);
// get the exemplars, and handle special cases
// question: should we add auxiliary exemplars?
if (exemplars.containsSome(*CORE_LATIN)) {
exemplars.addAll(*CORE_LATIN);
}
if (exemplars.containsSome(*HANGUL)) {
// cut down to small list
UnicodeSet BLOCK_HANGUL_SYLLABLES(UNICODE_STRING_SIMPLE("[:block=hangul_syllables:]"), status);
exemplars.removeAll(BLOCK_HANGUL_SYLLABLES);
exemplars.addAll(*HANGUL);
}
if (exemplars.containsSome(*ETHIOPIC)) {
// cut down to small list
// make use of the fact that Ethiopic is allocated in 8's, where
// the base is 0 mod 8.
UnicodeSetIterator it(*ETHIOPIC);
while (it.next() && !it.isString()) {
if ((it.getCodepoint() & 0x7) != 0) {
exemplars.remove(it.getCodepoint());
}
}
}
dest.addAll(exemplars);
}