本文整理汇总了C++中Transliterator::transliterate方法的典型用法代码示例。如果您正苦于以下问题:C++ Transliterator::transliterate方法的具体用法?C++ Transliterator::transliterate怎么用?C++ Transliterator::transliterate使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Transliterator
的用法示例。
在下文中一共展示了Transliterator::transliterate方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: UnicodeString
void TransliteratorAPITest::TestTransliterate1(){
UnicodeString Data[]={
//ID, input string, transliterated string
"Any-Hex", "hello", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", "") ,
"Hex-Any", UnicodeString("\\u0068\\u0065\\u006C\\u006C\\u006F", ""), "hello" ,
"Latin-Devanagari",CharsToUnicodeString("bha\\u0304rata"), CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924") ,
"Latin-Devanagari",UnicodeString("kra ksha khra gra cra dya dhya",""), CharsToUnicodeString("\\u0915\\u094D\\u0930 \\u0915\\u094D\\u0936 \\u0916\\u094D\\u0930 \\u0917\\u094D\\u0930 \\u091a\\u094D\\u0930 \\u0926\\u094D\\u092F \\u0927\\u094D\\u092F") ,
"Devanagari-Latin", CharsToUnicodeString("\\u092D\\u093E\\u0930\\u0924"), CharsToUnicodeString("bh\\u0101rata"),
// "Contracted-Expanded", CharsToUnicodeString("\\u00C0\\u00C1\\u0042"), CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042") ,
// "Expanded-Contracted", CharsToUnicodeString("\\u0041\\u0300\\u0041\\u0301\\u0042"), CharsToUnicodeString("\\u00C0\\u00C1\\u0042") ,
//"Latin-Arabic", "aap", CharsToUnicodeString("\\u0627\\u06A4") ,
//"Arabic-Latin", CharsToUnicodeString("\\u0627\\u06A4"), "aap"
};
UnicodeString gotResult;
UnicodeString temp;
UnicodeString message;
Transliterator* t;
logln("Testing transliterate");
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
for(uint32_t i=0;i<sizeof(Data)/sizeof(Data[0]); i=i+3){
t=Transliterator::createInstance(Data[i+0], UTRANS_FORWARD, parseError, status);
if(t==0){
dataerrln("FAIL: construction: " + Data[i+0] + " Error: " + u_errorName(status));
dataerrln("PreContext: " + prettify(parseError.preContext) + " PostContext: " + prettify( parseError.postContext) );
status = U_ZERO_ERROR;
continue;
}
gotResult = Data[i+1];
t->transliterate(gotResult);
message=t->getID() + "->transliterate(UnicodeString, UnicodeString) for\n\t Source:" + prettify(Data[i+1]);
doTest(message, gotResult, Data[i+2]);
//doubt here
temp=Data[i+1];
t->transliterate(temp);
message.remove();
message.append(t->getID());
message.append("->transliterate(Replaceable) for \n\tSource:");
message.append(Data[i][1]);
doTest(message, temp, Data[i+2]);
callEverything(t, __LINE__);
delete t;
}
}
示例2: main
// ///////////// M A I N ///////////////
int main() {
// UTF8 string
UnicodeString lString ("À côté de chez vous");
UnicodeString lNormalisedString (lString);
// Create a Normalizer
UErrorCode status = U_ZERO_ERROR;
const char* lNormaliserID = "NFD; [:M:] Remove; NFC;";
Transliterator* lNormaliser =
Transliterator::createInstance (lNormaliserID, UTRANS_FORWARD, status);
if (lNormaliser == NULL || U_FAILURE (status)) {
std::cerr << "ERROR: Transliterator::createInstance() failed for "
<< lNormaliserID << std::endl;
return 0;
}
assert (lNormaliser != NULL);
// Register the Transliterator
Transliterator::registerInstance (lNormaliser);
// Normalise the string
lNormaliser->transliterate (lNormalisedString);
std::cout << "Original version (UTF8): '"
<< uprintf (lString)
<< "'; normalized version without accents: '"
<< uprintf (lNormalisedString) << "'" << std::endl;
return 0;
}
示例3: removeAccents
std::string kiwix::removeAccents(const std::string &text) {
ucnv_setDefaultName("UTF-8");
UErrorCode status = U_ZERO_ERROR;
Transliterator *removeAccentsTrans = Transliterator::createInstance("Lower; NFD; [:M:] remove; NFC", UTRANS_FORWARD, status);
UnicodeString ustring = UnicodeString(text.c_str());
removeAccentsTrans->transliterate(ustring);
delete removeAccentsTrans;
std::string unaccentedText;
ustring.toUTF8String(unaccentedText);
return unaccentedText;
}
示例4: Transliterator_transliterate
static jstring Transliterator_transliterate(JNIEnv* env, jclass, jlong peer, jstring javaString) {
Transliterator* t = fromPeer(peer);
ScopedJavaUnicodeString string(env, javaString);
if (!string.valid()) {
return NULL;
}
UnicodeString& s(string.unicodeString());
t->transliterate(s);
return env->NewString(s.getBuffer(), s.length());
}
示例5: check
void ReplaceableTest::check(const UnicodeString& transliteratorName,
const UnicodeString& test,
const UnicodeString& shouldProduceStyles)
{
UErrorCode status = U_ZERO_ERROR;
TestReplaceable *tr = new TestReplaceable(test, "");
UnicodeString expectedStyles = shouldProduceStyles;
UnicodeString original = tr->toString();
Transliterator* t;
if (transliteratorName.charAt(0) == 0x2A /*'*'*/) {
UnicodeString rules(transliteratorName);
rules.remove(0,1);
UParseError pe;
t = Transliterator::createFromRules("test", rules, UTRANS_FORWARD,
pe, status);
// test clone()
TestReplaceable *tr2 = (TestReplaceable *)tr->clone();
if(tr2 != NULL) {
delete tr;
tr = tr2;
}
} else {
t = Transliterator::createInstance(transliteratorName, UTRANS_FORWARD, status);
}
if (U_FAILURE(status)) {
log("FAIL: failed to create the ");
log(transliteratorName);
errln(" transliterator.");
delete tr;
return;
}
t->transliterate(*tr);
UnicodeString newStyles = tr->getStyles();
if (newStyles != expectedStyles) {
errln("FAIL Styles: " + transliteratorName + "{" + original + "} => "
+ tr->toString() + "; should be {" + expectedStyles + "}!");
} else {
log("OK: ");
log(transliteratorName);
log("(");
log(original);
log(") => ");
logln(tr->toString());
}
delete tr;
delete t;
}
示例6: removeAccents
string removeAccents(const string& str)
{
UnicodeString source = UnicodeString::fromUTF8(str.c_str());
UErrorCode status = U_ZERO_ERROR;
Transliterator* accentsConverter = Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status);
accentsConverter->transliterate(source);
delete accentsConverter;
string result;
source.toUTF8String(result);
return result;
}
示例7: StringPiece
static QByteArray
icuTransform( const QByteArray& _id, const QByteArray& _text, const RenderingContext& c )
{
UnicodeString id = UnicodeString::fromUTF8( StringPiece( _id ));
UnicodeString text = UnicodeString::fromUTF8( StringPiece( _text ));
UErrorCode status = U_ZERO_ERROR;
Transliterator *t = Transliterator::createInstance( id, UTRANS_FORWARD, status );
if( U_FAILURE( status )) {
c.info( QString( "icu_transform: Error %1 (%2)" )
.arg( status )
.arg( u_errorName( status )));
return _text;
}
t->transliterate( text );
QByteArray result;
text.toUTF8String(result);
return result;
}
示例8: main
int main(int argc, char **argv) {
Calendar *cal;
DateFormat *fmt;
DateFormat *defFmt;
Transliterator *greek_latin;
Transliterator *rbtUnaccent;
Transliterator *unaccent;
UParseError pError;
UErrorCode status = U_ZERO_ERROR;
Locale greece("el", "GR");
UnicodeString str, str2;
// Create a calendar in the Greek locale
cal = Calendar::createInstance(greece, status);
check(status, "Calendar::createInstance");
// Create a formatter
fmt = DateFormat::createDateInstance(DateFormat::kFull, greece);
fmt->setCalendar(*cal);
// Create a default formatter
defFmt = DateFormat::createDateInstance(DateFormat::kFull);
defFmt->setCalendar(*cal);
// Create a Greek-Latin Transliterator
greek_latin = Transliterator::createInstance("Greek-Latin", UTRANS_FORWARD, status);
if (greek_latin == 0) {
printf("ERROR: Transliterator::createInstance() failed\n");
exit(1);
}
// Create a custom Transliterator
rbtUnaccent = Transliterator::createFromRules("RBTUnaccent",
UNACCENT_RULES,
UTRANS_FORWARD,
pError,
status);
check(status, "Transliterator::createFromRules");
// Create a custom Transliterator
unaccent = new UnaccentTransliterator();
// Loop over various months
for (int32_t month = Calendar::JANUARY;
month <= Calendar::DECEMBER;
++month) {
// Set the calendar to a date
cal->clear();
cal->set(1999, month, 4);
// Format the date in default locale
str.remove();
defFmt->format(cal->getTime(status), str, status);
check(status, "DateFormat::format");
printf("Date: ");
uprintf(escape(str));
printf("\n");
// Format the date for Greece
str.remove();
fmt->format(cal->getTime(status), str, status);
check(status, "DateFormat::format");
printf("Greek formatted date: ");
uprintf(escape(str));
printf("\n");
// Transliterate result
greek_latin->transliterate(str);
printf("Transliterated via Greek-Latin: ");
uprintf(escape(str));
printf("\n");
// Transliterate result
str2 = str;
rbtUnaccent->transliterate(str);
printf("Transliterated via RBT unaccent: ");
uprintf(escape(str));
printf("\n");
unaccent->transliterate(str2);
printf("Transliterated via normalizer unaccent: ");
uprintf(escape(str2));
printf("\n\n");
}
// Clean up
delete fmt;
delete cal;
delete greek_latin;
delete unaccent;
delete rbtUnaccent;
printf("Exiting successfully\n");
return 0;
}
示例9: rt
//.........这里部分代码省略.........
"\\ube44\\uc601\\ub9ac", "\\uc870\\uc9c1\\uc73c\\ub85c\\uc11c", "\\ud604\\ub300",
"\\uc18c\\ud504\\ud2b8\\uc6e8\\uc5b4", "\\uc81c\\ud488\\uacfc",
"\\ud45c\\uc900\\uc5d0\\uc11c", "\\ud14d\\uc2a4\\ud2b8\\uc758", "\\ud45c\\ud604\\uc744",
"\\uc9c0\\uc815\\ud558\\ub294", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc758",
"\\uc0ac\\uc6a9\\uc744", "\\uac1c\\ubc1c\\ud558\\uace0", "\\ud655\\uc7a5\\ud558\\uba70",
"\\uc7a5\\ub824\\ud558\\uae30", "\\uc704\\ud574",
"\\uc138\\uc6cc\\uc84c\\uc2b5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4",
"\\uba64\\ubc84\\uc27d\\uc740", "\\ucef4\\ud4e8\\ud130\\uc640", "\\uc815\\ubcf4",
"\\ucc98\\ub9ac", "\\uc0b0\\uc5c5\\uc5d0", "\\uc885\\uc0ac\\ud558\\uace0", "\\uc788\\ub294",
"\\uad11\\ubc94\\uc704\\ud55c", "\\ud68c\\uc0ac", "\\ubc0f", "\\uc870\\uc9c1\\uc758",
"\\ubc94\\uc704\\ub97c",
"\\ub098\\ud0c0\\ub0c5\\ub2c8\\ub2e4.", "\\ucf58\\uc18c\\uc2dc\\uc5c4\\uc758",
"\\uc7ac\\uc815\\uc740", "\\uc804\\uc801\\uc73c\\ub85c", "\\ud68c\\ube44\\uc5d0",
"\\uc758\\ud574", "\\ucda9\\ub2f9\\ub429\\ub2c8\\ub2e4.", "\\uc720\\ub2c8\\ucf54\\ub4dc",
"\\ucee8\\uc18c\\uc2dc\\uc5c4\\uc5d0\\uc11c\\uc758", "\\uba64\\ubc84\\uc27d\\uc740",
"\\uc804", "\\uc138\\uacc4", "\\uc5b4\\ub290", "\\uacf3\\uc5d0\\uc11c\\ub098",
"\\uc720\\ub2c8\\ucf54\\ub4dc", "\\ud45c\\uc900\\uc744", "\\uc9c0\\uc6d0\\ud558\\uace0",
"\\uadf8", "\\ud655\\uc7a5\\uacfc", "\\uad6c\\ud604\\uc744",
"\\uc9c0\\uc6d0\\ud558\\uace0\\uc790\\ud558\\ub294", "\\uc870\\uc9c1\\uacfc",
"\\uac1c\\uc778\\uc5d0\\uac8c", "\\uac1c\\ubc29\\ub418\\uc5b4",
"\\uc788\\uc2b5\\ub2c8\\ub2e4.",
"\\ub354", "\\uc790\\uc138\\ud55c", "\\ub0b4\\uc6a9\\uc740", "\\uc6a9\\uc5b4\\uc9d1,",
"\\uc608\\uc81c", "\\uc720\\ub2c8\\ucf54\\ub4dc", "\\uc0ac\\uc6a9", "\\uac00\\ub2a5",
"\\uc81c\\ud488,", "\\uae30\\uc220", "\\uc815\\ubcf4", "\\ubc0f", "\\uae30\\ud0c0",
"\\uc720\\uc6a9\\ud55c", "\\uc815\\ubcf4\\ub97c",
"\\ucc38\\uc870\\ud558\\uc2ed\\uc2dc\\uc624."
};
enum { WHAT_IS_UNICODE_length = sizeof(WHAT_IS_UNICODE) / sizeof(WHAT_IS_UNICODE[0]) };
UParseError parseError;
UErrorCode status = U_ZERO_ERROR;
Transliterator* latinJamo = Transliterator::createInstance("Latin-Jamo", UTRANS_FORWARD, parseError, status);
Transliterator* jamoHangul = Transliterator::createInstance("NFC(NFD)", UTRANS_FORWARD, parseError, status);
if (latinJamo == 0 || jamoHangul == 0 || U_FAILURE(status)) {
delete latinJamo;
delete jamoHangul;
dataerrln("FAIL: createInstance returned NULL - %s", u_errorName(status));
return;
}
Transliterator* jamoLatin = latinJamo->createInverse(status);
Transliterator* hangulJamo = jamoHangul->createInverse(status);
if (jamoLatin == 0 || hangulJamo == 0) {
errln("FAIL: createInverse returned NULL");
delete latinJamo;
delete jamoLatin;
delete jamoHangul;
delete hangulJamo;
return;
}
Transliterator* tarray[4] =
{ hangulJamo, jamoLatin, latinJamo, jamoHangul };
CompoundTransliterator rt(tarray, 4);
UnicodeString buf;
int32_t total = 0;
int32_t errors = 0;
int32_t i;
for (i=0; i < WHAT_IS_UNICODE_length; ++i) {
++total;
UnicodeString hangul = UnicodeString(WHAT_IS_UNICODE[i], -1, US_INV);
hangul = hangul.unescape(); // Parse backslash-u escapes
UnicodeString hangulX = hangul;
rt.transliterate(hangulX);
if (hangul != hangulX) {
++errors;
UnicodeString jamo = hangul; hangulJamo->transliterate(jamo);
UnicodeString latin = jamo; jamoLatin->transliterate(latin);
UnicodeString jamo2 = latin; latinJamo->transliterate(jamo2);
UnicodeString hangul2 = jamo2; jamoHangul->transliterate(hangul2);
buf.remove(0);
buf.append("FAIL: ");
if (hangul2 != hangulX) {
buf.append((UnicodeString)"(Weird: " + hangulX + " != " + hangul2 + ")");
}
// The Hangul-Jamo conversion is not usually the
// bug here, so we hide it from display.
// Uncomment lines to see the Hangul.
buf.append(//hangul + " => " +
jamoToName(jamo) + " => " +
latin + " => " + jamoToName(jamo2)
//+ " => " + hangul2
);
errln(prettify(buf));
}
}
if (errors != 0) {
errln((UnicodeString)"Test word failures: " + errors + " out of " + total);
} else {
logln((UnicodeString)"All " + total + " test words passed");
}
delete latinJamo;
delete jamoLatin;
delete jamoHangul;
delete hangulJamo;
}
示例10: convertFile
//.........这里部分代码省略.........
flush = rd != bufsz;
ucnv_toUnicode(convfrom, &unibufp, unibufp + bufsz, &cbufp,
cbufp + rd, fromoffsets, flush, &err);
infoffset += (uint32_t)(cbufp - buf);
if (U_FAILURE(err)) {
char pos[32];
sprintf(pos, "%u", infoffset - 1);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, "problemCvtToU", str.getBuffer(), u_wmsg_errorName(err));
willexit = 1;
err = U_ZERO_ERROR; /* reset the error for the rest of the conversion. */
}
// At the last conversion, the converted characters should be
// equal to number of chars read.
if (flush && !willexit && cbufp != (buf + rd)) {
char pos[32];
sprintf(pos, "%u", infoffset);
UnicodeString str(pos, (int32_t)(uprv_strlen(pos) + 1));
initMsg(pname);
u_wmsg(stderr, "premEndInput", str.getBuffer());
willexit = 1;
}
// Prepare to transliterate and convert. Transliterate if needed.
#if !UCONFIG_NO_TRANSLITERATION
if (t) {
u.setTo(unibuf, (int32_t)(unibufp - unibuf)); // Copy into string.
t->transliterate(u);
} else
#endif
{
u.setTo(unibuf, (int32_t)(unibufp - unibuf), (int32_t)(bufsz)); // Share the buffer.
}
int32_t ulen = u.length();
// Convert the Unicode buffer into the destination codepage
// Again 'bufp' will be placed on the last converted character
// And 'unibufbp' will be placed on the last converted unicode character
// At the last conversion flush should be set to true to ensure that
// all characters left get converted
const UChar *unibufu = unibufbp = u.getBuffer();
do {
int32_t len = ulen > (int32_t)bufsz ? (int32_t)bufsz : ulen;
bufp = buf;
unibufp = (UChar *) (unibufbp + len);
ucnv_fromUnicode(convto, &bufp, bufp + tobufsz,
&unibufbp,
unibufp,
tooffsets, flush, &err);
if (U_FAILURE(err)) {
const char *errtag;
char pos[32];
uint32_t erroffset =