本文整理汇总了C++中UnicodeString::char32At方法的典型用法代码示例。如果您正苦于以下问题:C++ UnicodeString::char32At方法的具体用法?C++ UnicodeString::char32At怎么用?C++ UnicodeString::char32At使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类UnicodeString
的用法示例。
在下文中一共展示了UnicodeString::char32At方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: findSetFor
//----------------------------------------------------------------------------------------
//
// findSetFor given a UnicodeString,
// - find the corresponding Unicode Set (uset node)
// (create one if necessary)
// - Set fLeftChild of the caller's node (should be a setRef node)
// to the uset node
// Maintain a hash table of uset nodes, so the same one is always used
// for the same string.
// If a "to adopt" set is provided and we haven't seen this key before,
// add the provided set to the hash table.
// If the string is one (32 bit) char in length, the set contains
// just one element which is the char in question.
// If the string is "any", return a set containing all chars.
//
//----------------------------------------------------------------------------------------
void RBBIRuleScanner::findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt) {
RBBISetTableEl *el;
// First check whether we've already cached a set for this string.
// If so, just use the cached set in the new node.
// delete any set provided by the caller, since we own it.
el = (RBBISetTableEl *)uhash_get(fSetTable, &s);
if (el != NULL) {
delete setToAdopt;
node->fLeftChild = el->val;
U_ASSERT(node->fLeftChild->fType == RBBINode::uset);
return;
}
// Haven't seen this set before.
// If the caller didn't provide us with a prebuilt set,
// create a new UnicodeSet now.
if (setToAdopt == NULL) {
if (s.compare(kAny, -1) == 0) {
setToAdopt = new UnicodeSet(0x000000, 0x10ffff);
} else {
UChar32 c;
c = s.char32At(0);
setToAdopt = new UnicodeSet(c, c);
}
}
//
// Make a new uset node to refer to this UnicodeSet
// This new uset node becomes the child of the caller's setReference node.
//
RBBINode *usetNode = new RBBINode(RBBINode::uset);
usetNode->fInputSet = setToAdopt;
usetNode->fParent = node;
node->fLeftChild = usetNode;
usetNode->fText = s;
//
// Add the new uset node to the list of all uset nodes.
//
fRB->fUSetNodes->addElement(usetNode, *fRB->fStatus);
//
// Add the new set to the set hash table.
//
el = (RBBISetTableEl *)uprv_malloc(sizeof(RBBISetTableEl));
UnicodeString *tkey = new UnicodeString(s);
if (tkey == NULL || el == NULL || setToAdopt == NULL) {
error(U_MEMORY_ALLOCATION_ERROR);
return;
}
el->key = tkey;
el->val = usetNode;
uhash_put(fSetTable, el->key, el, fRB->fStatus);
return;
}
示例2: hackName
void AlphabeticIndex::hackName(UnicodeString &dest, const UnicodeString &name, const Collator *col) {
if (langType_ != kSimplified || !UNIHAN->contains(name.char32At(0))) {
dest = name;
return;
}
UErrorCode status = U_ZERO_ERROR;
initPinyinBounds(col, status);
if (U_FAILURE(status)) {
dest = name;
return;
}
// TODO: use binary search
int index;
for (index=0; ; index++) {
if ((*HACK_PINYIN_LOOKUP)[index][0] == (UChar)0xffff) {
index--;
break;
}
int32_t compareResult = col->compare(name, UnicodeString(TRUE, (*HACK_PINYIN_LOOKUP)[index], -1));
if (compareResult < 0) {
index--;
}
if (compareResult <= 0) {
break;
}
}
UChar c = PINYIN_LOWER_BOUNDS[index];
dest.setTo(c);
dest.append(name);
return;
}
示例3:
void AccumulativeWordCounter::operator+=(const UnicodeString& ustr)
{
for(int32_t i=0; i<ustr.length(); i=ustr.moveIndex32(i,1))
{
this->operator+=(ustr.char32At(i));
}
}
示例4:
U_I18N_API UnicodeString & U_EXPORT2
uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
uint32_t /*type*/,
const UnicodeString &id,
UnicodeString &dest,
UErrorCode *status) {
const SpoofImpl *This = SpoofImpl::validateThis(sc, *status);
if (U_FAILURE(*status)) {
return dest;
}
UnicodeString nfdId;
gNfdNormalizer->normalize(id, nfdId, *status);
// Apply the skeleton mapping to the NFD normalized input string
// Accumulate the skeleton, possibly unnormalized, in a UnicodeString.
int32_t inputIndex = 0;
UnicodeString skelStr;
int32_t normalizedLen = nfdId.length();
for (inputIndex=0; inputIndex < normalizedLen; ) {
UChar32 c = nfdId.char32At(inputIndex);
inputIndex += U16_LENGTH(c);
This->fSpoofData->confusableLookup(c, skelStr);
}
gNfdNormalizer->normalize(skelStr, dest, *status);
return dest;
}
示例5: parseUnicodeIdentifier
UnicodeString ICU_Utility::parseUnicodeIdentifier(const UnicodeString& str, int32_t& pos) {
// assert(pos < str.length());
// assert(!uprv_isRuleWhiteSpace(str.char32At(pos)));
UnicodeString buf;
int p = pos;
while (p < str.length()) {
UChar32 ch = str.char32At(p);
if (buf.length() == 0) {
if (u_isIDStart(ch)) {
buf.append(ch);
} else {
buf.truncate(0);
return buf;
}
} else {
if (u_isIDPart(ch)) {
buf.append(ch);
} else {
break;
}
}
p += UTF_CHAR_LENGTH(ch);
}
pos = p;
return buf;
}
示例6: parseNumber
int32_t ICU_Utility::parseNumber(const UnicodeString& text,
int32_t& pos, int8_t radix) {
// assert(pos[0] >= 0);
// assert(radix >= 2);
// assert(radix <= 36);
int32_t n = 0;
int32_t p = pos;
while (p < text.length()) {
UChar32 ch = text.char32At(p);
int32_t d = u_digit(ch, radix);
if (d < 0) {
break;
}
n = radix*n + d;
// ASSUME that when a 32-bit integer overflows it becomes
// negative. E.g., 214748364 * 10 + 8 => negative value.
if (n < 0) {
return -1;
}
++p;
}
if (p == pos) {
return -1;
}
pos = p;
return n;
}
示例7: wholeScriptCheck
//---------------------------------------------------------------------------------------
//
// wholeScriptCheck()
//
// Input text is already normalized to NFD
// Return the set of scripts, each of which can represent something that is
// confusable with the input text. The script of the input text
// is included; input consisting of characters from a single script will
// always produce a result consisting of a set containing that script.
//
//---------------------------------------------------------------------------------------
void SpoofImpl::wholeScriptCheck(
const UnicodeString &text, ScriptSet *result, UErrorCode &status) const {
UTrie2 *table =
(fChecks & USPOOF_ANY_CASE) ? fSpoofData->fAnyCaseTrie : fSpoofData->fLowerCaseTrie;
result->setAll();
int32_t length = text.length();
for (int32_t inputIdx=0; inputIdx < length;) {
UChar32 c = text.char32At(inputIdx);
inputIdx += U16_LENGTH(c);
uint32_t index = utrie2_get32(table, c);
if (index == 0) {
// No confusables in another script for this char.
// TODO: we should change the data to have sets with just the single script
// bit for the script of this char. Gets rid of this special case.
// Until then, grab the script from the char and intersect it with the set.
UScriptCode cpScript = uscript_getScript(c, &status);
U_ASSERT(cpScript > USCRIPT_INHERITED);
result->intersect(cpScript, status);
} else if (index == 1) {
// Script == Common or Inherited. Nothing to do.
} else {
result->intersect(fSpoofData->fScriptSets[index]);
}
}
}
示例8: tmp
void
TextTrieMap::search(CharacterNode *node, const UnicodeString &text, int32_t start,
int32_t index, TextTrieMapSearchResultHandler *handler, UErrorCode &status) const {
if (U_FAILURE(status)) {
return;
}
if (node->hasValues()) {
if (!handler->handleMatch(index - start, node, status)) {
return;
}
if (U_FAILURE(status)) {
return;
}
}
UChar32 c = text.char32At(index);
if (fIgnoreCase) {
// size of character may grow after fold operation
UnicodeString tmp(c);
tmp.foldCase();
int32_t tmpidx = 0;
while (tmpidx < tmp.length()) {
c = tmp.char32At(tmpidx);
node = getChildNode(node, c);
if (node == NULL) {
break;
}
tmpidx = tmp.moveIndex32(tmpidx, 1);
}
} else {
node = getChildNode(node, c);
}
if (node != NULL) {
search(node, text, start, index+1, handler, status);
}
}
示例9: assertInSet
void StaticUnicodeSetsTest::assertInSet(const UnicodeString &localeName, const UnicodeString &setName,
const UnicodeSet &set, const UnicodeString &str) {
if (str.countChar32(0, str.length()) != 1) {
// Ignore locale strings with more than one code point (usually a bidi mark)
return;
}
assertInSet(localeName, setName, set, str.char32At(0));
}
示例10: transform
void transform(const UnicodeString &word, CharString &buf, UErrorCode &errorCode) {
UChar32 c = 0;
int32_t len = word.length();
for (int32_t i = 0; i < len; i += U16_LENGTH(c)) {
c = word.char32At(i);
buf.append(transform(c, errorCode), errorCode);
}
}
示例11:
int32_t toUChar32(UnicodeString& u, UChar32 *c, UErrorCode& status)
{
#if U_ICU_VERSION_HEX >= 0x04020000
return u.toUTF32(c, 1, status);
#else
int32_t len = u.length();
if (len >= 1)
*c = u.char32At(0);
return len;
#endif
}
示例12: isIllegalCombiningDotLeadCharacter
bool SpoofImpl::isIllegalCombiningDotLeadCharacter(UChar32 cp) const {
if (isIllegalCombiningDotLeadCharacterNoLookup(cp)) {
return true;
}
UnicodeString skelStr;
fSpoofData->confusableLookup(cp, skelStr);
UChar32 finalCp = skelStr.char32At(skelStr.moveIndex32(skelStr.length(), -1));
if (finalCp != cp && isIllegalCombiningDotLeadCharacterNoLookup(finalCp)) {
return true;
}
return false;
}
示例13: getNumerics
// Computes the set of numerics for a string, according to UTS 39 section 5.3.
void SpoofImpl::getNumerics(const UnicodeString& input, UnicodeSet& result, UErrorCode& /*status*/) const {
result.clear();
UChar32 codePoint;
for (int32_t i = 0; i < input.length(); i += U16_LENGTH(codePoint)) {
codePoint = input.char32At(i);
// Store a representative character for each kind of decimal digit
if (u_charType(codePoint) == U_DECIMAL_DIGIT_NUMBER) {
// Store the zero character as a representative for comparison.
// Unicode guarantees it is codePoint - value
result.add(codePoint - (UChar32)u_getNumericValue(codePoint));
}
}
}
示例14: HackPinyinTest
void AlphabeticIndexTest::HackPinyinTest() {
UErrorCode status = U_ZERO_ERROR;
AlphabeticIndex aindex(Locale::createFromName("zh"), status);
TEST_CHECK_STATUS;
UnicodeString names[sizeof(pinyinTestData) / sizeof(pinyinTestData[0])];
int32_t nameCount;
for (nameCount=0; pinyinTestData[nameCount] != NULL; nameCount++) {
names[nameCount] = UnicodeString(pinyinTestData[nameCount], -1, UnicodeString::kInvariant).unescape();
aindex.addRecord(names[nameCount], &names[nameCount], status);
TEST_CHECK_STATUS;
if (U_FAILURE(status)) {
return;
}
}
TEST_ASSERT(nameCount == aindex.getRecordCount(status));
// Weak checking: make sure that none of the Chinese names landed in the overflow bucket
// of the index, and that the names are distributed among several buckets.
// (Exact expected data would be subject to change with evolution of the collation rules.)
int32_t bucketCount = 0;
int32_t filledBucketCount = 0;
while (aindex.nextBucket(status)) {
bucketCount++;
UnicodeString label = aindex.getBucketLabel();
// std::string s;
// std::cout << label.toUTF8String(s) << ": ";
UBool bucketHasContents = FALSE;
while (aindex.nextRecord(status)) {
bucketHasContents = TRUE;
UnicodeString name = aindex.getRecordName();
if (aindex.getBucketLabelType() != U_ALPHAINDEX_NORMAL) {
errln("File %s, Line %d, Name \"\\u%x\" is in an under or overflow bucket.",
__FILE__, __LINE__, name.char32At(0));
}
// s.clear();
// std::cout << aindex.getRecordName().toUTF8String(s) << " ";
}
if (bucketHasContents) {
filledBucketCount++;
}
// std::cout << std::endl;
}
TEST_ASSERT(bucketCount > 25);
TEST_ASSERT(filledBucketCount > 15);
}
示例15: stripRules
//------------------------------------------------------------------------------
//
// stripRules Return a rules string without extra spaces.
// (Comments are removed separately, during rule parsing.)
//
//------------------------------------------------------------------------------
UnicodeString RBBIRuleScanner::stripRules(const UnicodeString &rules) {
UnicodeString strippedRules;
int32_t rulesLength = rules.length();
bool skippingSpaces = false;
for (int32_t idx=0; idx<rulesLength; idx = rules.moveIndex32(idx, 1)) {
UChar32 cp = rules.char32At(idx);
bool whiteSpace = u_hasBinaryProperty(cp, UCHAR_PATTERN_WHITE_SPACE);
if (skippingSpaces && whiteSpace) {
continue;
}
strippedRules.append(cp);
skippingSpaces = whiteSpace;
}
return strippedRules;
}