本文整理汇总了C++中UnicodeString::moveIndex32方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeString::moveIndex32方法的具体用法?C++ UnicodeString::moveIndex32怎么用?C++ UnicodeString::moveIndex32使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeString
的用法示例。
在下文中一共展示了UnicodeString::moveIndex32方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1:
U_NAMESPACE_BEGIN
CStr::CStr(const UnicodeString &in) {
UErrorCode status = U_ZERO_ERROR;
#if !UCONFIG_NO_CONVERSION || U_CHARSET_IS_UTF8
int32_t length = in.extract(0, in.length(), static_cast<char *>(NULL), static_cast<uint32_t>(0));
int32_t resultCapacity = 0;
char *buf = s.getAppendBuffer(length, length, resultCapacity, status);
if (U_SUCCESS(status)) {
in.extract(0, in.length(), buf, resultCapacity);
s.append(buf, length, status);
}
#else
// No conversion available. Convert any invariant characters; substitute '?' for the rest.
// Note: can't just call u_UCharsToChars() or CharString.appendInvariantChars() on the
// whole string because they require that the entire input be invariant.
char buf[2];
for (int i=0; i<in.length(); i = in.moveIndex32(i, 1)) {
if (uprv_isInvariantUString(in.getBuffer()+i, 1)) {
u_UCharsToChars(in.getBuffer()+i, buf, 1);
} else {
buf[0] = '?';
}
s.append(buf, 1, status);
}
#endif
}
示例2:
void AccumulativeWordCounter::operator+=(const UnicodeString& ustr)
{
for(int32_t i=0; i<ustr.length(); i=ustr.moveIndex32(i,1))
{
this->operator+=(ustr.char32At(i));
}
}
示例3: isIllegalCombiningDotLeadCharacter
bool SpoofImpl::isIllegalCombiningDotLeadCharacter(UChar32 cp) const {
if (isIllegalCombiningDotLeadCharacterNoLookup(cp)) {
return true;
}
UnicodeString skelStr;
fSpoofData->confusableLookup(cp, skelStr);
UChar32 finalCp = skelStr.char32At(skelStr.moveIndex32(skelStr.length(), -1));
if (finalCp != cp && isIllegalCombiningDotLeadCharacterNoLookup(finalCp)) {
return true;
}
return false;
}
示例4: stripRules
//------------------------------------------------------------------------------
//
// stripRules Return a rules string without extra spaces.
// (Comments are removed separately, during rule parsing.)
//
//------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
UnicodeString strippedRules;
int32_t rulesLength = rules.length();
bool skippingSpaces = false;
for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
UChar32 cp = rules.char32At(idx);
bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
if (skippingSpaces && whiteSpace) {
continue;
}
strippedRules.append(cp);
skippingSpaces = whiteSpace;
}
return strippedRules;
}
示例5: separated
UnicodeString AlphabeticIndex::separated(const UnicodeString &item) {
UnicodeString result;
if (item.length() == 0) {
return result;
}
int32_t i = 0;
for (;;) {
UChar32 cp = item.char32At(i);
result.append(cp);
i = item.moveIndex32(i, 1);
if (i >= item.length()) {
break;
}
result.append(CGJ);
}
return result;
}
示例6: testConfData
// testConfData - Check each data item from the Unicode confusables.txt file,
// verify that it transforms correctly in a skeleton.
//
void IntlTestSpoof::testConfData() {
UErrorCode status = U_ZERO_ERROR;
const char *testDataDir = IntlTest::getSourceTestData(status);
TEST_ASSERT_SUCCESS(status);
char buffer[2000];
uprv_strcpy(buffer, testDataDir);
uprv_strcat(buffer, "confusables.txt");
LocalStdioFilePointer f(fopen(buffer, "rb"));
if (f.isNull()) {
errln("Skipping test spoof/testConfData. File confusables.txt not accessible.");
return;
}
fseek(f.getAlias(), 0, SEEK_END);
int32_t fileSize = ftell(f.getAlias());
LocalArray<char> fileBuf(new char[fileSize]);
fseek(f.getAlias(), 0, SEEK_SET);
int32_t amt_read = fread(fileBuf.getAlias(), 1, fileSize, f.getAlias());
TEST_ASSERT_EQ(amt_read, fileSize);
TEST_ASSERT(fileSize>0);
if (amt_read != fileSize || fileSize <=0) {
return;
}
UnicodeString confusablesTxt = UnicodeString::fromUTF8(StringPiece(fileBuf.getAlias(), fileSize));
LocalUSpoofCheckerPointer sc(uspoof_open(&status));
TEST_ASSERT_SUCCESS(status);
// Parse lines from the confusables.txt file. Example Line:
// FF44 ; 0064 ; SL # ( d -> d ) FULLWIDTH ....
// Three fields. The hex fields can contain more than one character,
// and each character may be more than 4 digits (for supplemntals)
// This regular expression matches lines and splits the fields into capture groups.
RegexMatcher parseLine("(?m)^([0-9A-F]{4}[^#;]*?);([^#;]*?);([^#]*)", confusablesTxt, 0, status);
TEST_ASSERT_SUCCESS(status);
while (parseLine.find()) {
UnicodeString from = parseHex(parseLine.group(1, status));
if (!Normalizer::isNormalized(from, UNORM_NFD, status)) {
// The source character was not NFD.
// Skip this case; the first step in obtaining a skeleton is to NFD the input,
// so the mapping in this line of confusables.txt will never be applied.
continue;
}
UnicodeString rawExpected = parseHex(parseLine.group(2, status));
UnicodeString expected;
Normalizer::decompose(rawExpected, FALSE /*NFD*/, 0, expected, status);
TEST_ASSERT_SUCCESS(status);
int32_t skeletonType = 0;
UnicodeString tableType = parseLine.group(3, status);
TEST_ASSERT_SUCCESS(status);
if (tableType.indexOf("SL") >= 0) {
skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE;
} else if (tableType.indexOf("SA") >= 0) {
skeletonType = USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_ANY_CASE;
} else if (tableType.indexOf("ML") >= 0) {
skeletonType = 0;
} else if (tableType.indexOf("MA") >= 0) {
skeletonType = USPOOF_ANY_CASE;
}
UnicodeString actual;
uspoof_getSkeletonUnicodeString(sc.getAlias(), skeletonType, from, actual, &status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(actual == expected);
if (actual != expected) {
errln(parseLine.group(0, status));
UnicodeString line = "Actual: ";
int i = 0;
while (i < actual.length()) {
appendHexUChar(line, actual.char32At(i));
i = actual.moveIndex32(i, 1);
}
errln(line);
}
if (U_FAILURE(status)) {
break;
}
}
}
示例7: buildIndex
void AlphabeticIndex::buildIndex(UErrorCode &status) {
if (U_FAILURE(status)) {
return;
}
if (!indexBuildRequired_) {
return;
}
// Discard any already-built data.
// This is important when the user builds and uses an index, then subsequently modifies it,
// necessitating a rebuild.
bucketList_->removeAllElements();
labels_->removeAllElements();
uhash_removeAll(alreadyIn_);
noDistinctSorting_->clear();
notAlphabetic_->clear();
// first sort the incoming Labels, with a "best" ordering among items
// that are the same according to the collator
UVector preferenceSorting(status); // Vector of UnicodeStrings; owned by the vector.
preferenceSorting.setDeleter(uprv_deleteUObject);
appendUnicodeSetToUVector(preferenceSorting, *initialLabels_, status);
preferenceSorting.sortWithUComparator(PreferenceComparator, &status, status);
// We now make a set of Labels.
// Some of the input may, however, be redundant.
// That is, we might have c, ch, d, where "ch" sorts just like "c", "h"
// So we make a pass through, filtering out those cases.
// TODO: filtering these out would seem to be at odds with the eventual goal
// of being able to split buckets that contain too many items.
UnicodeSet labelSet;
for (int32_t psIndex=0; psIndex<preferenceSorting.size(); psIndex++) {
UnicodeString item = *static_cast<const UnicodeString *>(preferenceSorting.elementAt(psIndex));
// TODO: Since preferenceSorting was originally populated from the contents of a UnicodeSet,
// is it even possible for duplicates to show up in this check?
if (labelSet.contains(item)) {
UnicodeSetIterator itemAlreadyInIter(labelSet);
while (itemAlreadyInIter.next()) {
const UnicodeString &itemAlreadyIn = itemAlreadyInIter.getString();
if (collatorPrimaryOnly_->compare(item, itemAlreadyIn) == 0) {
UnicodeSet *targets = static_cast<UnicodeSet *>(uhash_get(alreadyIn_, &itemAlreadyIn));
if (targets == NULL) {
// alreadyIn.put(itemAlreadyIn, targets = new LinkedHashSet<String>());
targets = new UnicodeSet();
uhash_put(alreadyIn_, itemAlreadyIn.clone(), targets, &status);
}
targets->add(item);
break;
}
}
} else if (item.moveIndex32(0, 1) < item.length() && // Label contains more than one code point.
collatorPrimaryOnly_->compare(item, separated(item)) == 0) {
noDistinctSorting_->add(item);
} else if (!ALPHABETIC->containsSome(item)) {
notAlphabetic_->add(item);
} else {
labelSet.add(item);
}
}
// If we have no labels, hard-code a fallback default set of [A-Z]
// This case can occur with locales that don't have exemplar character data, including root.
// A no-labels situation will cause other problems; it needs to be avoided.
if (labelSet.isEmpty()) {
labelSet.add((UChar32)0x41, (UChar32)0x5A);
}
// Move the set of Labels from the set into a vector, and sort
// according to the collator.
appendUnicodeSetToUVector(*labels_, labelSet, status);
labels_->sortWithUComparator(sortCollateComparator, collatorPrimaryOnly_, status);
// if the result is still too large, cut down to maxLabelCount_ elements, by removing every nth element
// Implemented by copying the elements to be retained to a new UVector.
const int32_t size = labelSet.size() - 1;
if (size > maxLabelCount_) {
UVector *newLabels = new UVector(status);
newLabels->setDeleter(uprv_deleteUObject);
int32_t count = 0;
int32_t old = -1;
for (int32_t srcIndex=0; srcIndex<labels_->size(); srcIndex++) {
const UnicodeString *str = static_cast<const UnicodeString *>(labels_->elementAt(srcIndex));
++count;
const int32_t bump = count * maxLabelCount_ / size;
if (bump == old) {
// it.remove();
} else {
newLabels->addElement(str->clone(), status);
old = bump;
}
}
delete labels_;
labels_ = newLabels;
}
//.........这里部分代码省略.........