本文整理汇总了C++中RuleBasedBreakIterator类的典型用法代码示例。如果您正苦于以下问题:C++ RuleBasedBreakIterator类的具体用法?C++ RuleBasedBreakIterator怎么用?C++ RuleBasedBreakIterator使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RuleBasedBreakIterator类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: data
void RBBIAPITest::RoundtripRule(const char *dataFile) {
UErrorCode status = U_ZERO_ERROR;
UParseError parseError;
parseError.line = 0;
parseError.offset = 0;
LocalUDataMemoryPointer data(udata_open(U_ICUDATA_BRKITR, "brk", dataFile, &status));
uint32_t length;
const UChar *builtSource;
const uint8_t *rbbiRules;
const uint8_t *builtRules;
if (U_FAILURE(status)) {
errcheckln(status, "Can't open \"%s\" - %s", dataFile, u_errorName(status));
return;
}
builtRules = (const uint8_t *)udata_getMemory(data.getAlias());
builtSource = (const UChar *)(builtRules + ((RBBIDataHeader*)builtRules)->fRuleSource);
RuleBasedBreakIterator *brkItr = new RuleBasedBreakIterator(builtSource, parseError, status);
if (U_FAILURE(status)) {
errln("createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
u_errorName(status), parseError.line, parseError.offset);
return;
};
rbbiRules = brkItr->getBinaryRules(length);
logln("Comparing \"%s\" len=%d", dataFile, length);
if (memcmp(builtRules, rbbiRules, (int32_t)length) != 0) {
errln("Built rules and rebuilt rules are different %s", dataFile);
return;
}
delete brkItr;
}
示例2: RuleBasedBreakIterator
//
// Bug 2190 Regression test. Builder crash on rule consisting of only a
// $variable reference
void RBBIAPITest::TestBug2190() {
UnicodeString rulesString1 = "$aaa = abcd;\n"
"$bbb = $aaa;\n"
"$bbb;\n";
UnicodeString testString1 = "abcdabcd";
// 01234567890
int32_t bounds1[] = {0, 4, 8};
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString1, parseError, status);
if(U_FAILURE(status)) {
dataerrln("Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
doBoundaryTest(*bi, testString1, bounds1);
}
delete bi;
}
示例3: TEST_ASSERT_SUCCESS
void RBBIAPITest::TestRefreshInputText() {
/*
* RefreshInput changes out the input of a Break Iterator without
* changing anything else in the iterator's state. Used with Java JNI,
* when Java moves the underlying string storage. This test
* runs BreakIterator::next() repeatedly, moving the text in the middle of the sequence.
* The right set of boundaries should still be found.
*/
UChar testStr[] = {0x20, 0x41, 0x20, 0x42, 0x20, 0x43, 0x20, 0x44, 0x0}; /* = " A B C D" */
UChar movedStr[] = {0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0};
UErrorCode status = U_ZERO_ERROR;
UText ut1 = UTEXT_INITIALIZER;
UText ut2 = UTEXT_INITIALIZER;
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getEnglish(), status);
TEST_ASSERT_SUCCESS(status);
utext_openUChars(&ut1, testStr, -1, &status);
TEST_ASSERT_SUCCESS(status);
if (U_SUCCESS(status)) {
bi->setText(&ut1, status);
TEST_ASSERT_SUCCESS(status);
/* Line boundaries will occur before each letter in the original string */
TEST_ASSERT(1 == bi->next());
TEST_ASSERT(3 == bi->next());
/* Move the string, kill the original string. */
u_strcpy(movedStr, testStr);
u_memset(testStr, 0x20, u_strlen(testStr));
utext_openUChars(&ut2, movedStr, -1, &status);
TEST_ASSERT_SUCCESS(status);
RuleBasedBreakIterator *returnedBI = &bi->refreshInputText(&ut2, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(bi == returnedBI);
/* Find the following matches, now working in the moved string. */
TEST_ASSERT(5 == bi->next());
TEST_ASSERT(7 == bi->next());
TEST_ASSERT(8 == bi->next());
TEST_ASSERT(UBRK_DONE == bi->next());
utext_close(&ut1);
utext_close(&ut2);
}
delete bi;
}
示例4: doBoundaryTest
void RBBIAPITest::doBoundaryTest(RuleBasedBreakIterator& bi, UnicodeString& text, int32_t *boundaries){
logln((UnicodeString)"testIsBoundary():");
int32_t p = 0;
UBool isB;
for (int32_t i = 0; i < text.length(); i++) {
isB = bi.isBoundary(i);
logln((UnicodeString)"bi.isBoundary(" + i + ") -> " + isB);
if (i == boundaries[p]) {
if (!isB)
errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected true, got false");
p++;
}
else {
if (isB)
errln((UnicodeString)"Wrong result from isBoundary() for " + i + (UnicodeString)": expected false, got true");
}
}
}
示例5: ures_initStackObject
U_NAMESPACE_BEGIN
// -------------------------------------
BreakIterator*
BreakIterator::buildInstance(const Locale& loc, const char *type, int32_t kind, UErrorCode &status)
{
char fnbuff[256];
char ext[4]={'\0'};
char actualLocale[ULOC_FULLNAME_CAPACITY];
int32_t size;
const UChar* brkfname = NULL;
UResourceBundle brkRulesStack;
UResourceBundle brkNameStack;
UResourceBundle *brkRules = &brkRulesStack;
UResourceBundle *brkName = &brkNameStack;
RuleBasedBreakIterator *result = NULL;
if (U_FAILURE(status))
return NULL;
ures_initStackObject(brkRules);
ures_initStackObject(brkName);
// Get the locale
UResourceBundle *b = ures_open(U_ICUDATA_BRKITR, loc.getName(), &status);
/* this is a hack for now. Should be fixed when the data is fetched from
brk_index.txt */
if(status==U_USING_DEFAULT_WARNING){
status=U_ZERO_ERROR;
ures_openFillIn(b, U_ICUDATA_BRKITR, "", &status);
}
// Get the "boundaries" array.
if (U_SUCCESS(status)) {
brkRules = ures_getByKeyWithFallback(b, "boundaries", brkRules, &status);
// Get the string object naming the rules file
brkName = ures_getByKeyWithFallback(brkRules, type, brkName, &status);
// Get the actual string
brkfname = ures_getString(brkName, &size, &status);
U_ASSERT((size_t)size<sizeof(fnbuff));
if ((size_t)size>=sizeof(fnbuff)) {
size=0;
if (U_SUCCESS(status)) {
status = U_BUFFER_OVERFLOW_ERROR;
}
}
// Use the string if we found it
if (U_SUCCESS(status) && brkfname) {
uprv_strncpy(actualLocale,
ures_getLocale(brkName, &status),
sizeof(actualLocale)/sizeof(actualLocale[0]));
UChar* extStart=u_strchr(brkfname, 0x002e);
int len = 0;
if(extStart!=NULL){
len = (int)(extStart-brkfname);
u_UCharsToChars(extStart+1, ext, sizeof(ext)); // nul terminates the buff
u_UCharsToChars(brkfname, fnbuff, len);
}
fnbuff[len]=0; // nul terminate
}
}
ures_close(brkRules);
ures_close(brkName);
UDataMemory* file = udata_open(U_ICUDATA_BRKITR, ext, fnbuff, &status);
if (U_FAILURE(status)) {
ures_close(b);
return NULL;
}
// Create a RuleBasedBreakIterator
result = new RuleBasedBreakIterator(file, status);
// If there is a result, set the valid locale and actual locale, and the kind
if (U_SUCCESS(status) && result != NULL) {
U_LOCALE_BASED(locBased, *(BreakIterator*)result);
locBased.setLocaleIDs(ures_getLocaleByType(b, ULOC_VALID_LOCALE, &status), actualLocale);
result->setBreakType(kind);
}
ures_close(b);
if (U_FAILURE(status) && result != NULL) { // Sometimes redundant check, but simple
delete result;
return NULL;
}
if (result == NULL) {
udata_close(file);
if (U_SUCCESS(status)) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}
return result;
}
示例6: rulesString
//
// TestRuleStatusVec
// Test the vector form of break rule status.
//
void RBBIAPITest::TestRuleStatusVec() {
UnicodeString rulesString( "[A-N]{100}; \n"
"[a-w]{200}; \n"
"[\\p{L}]{300}; \n"
"[\\p{N}]{400}; \n"
"[0-5]{500}; \n"
"!.*;\n", -1, US_INV);
UnicodeString testString1 = "Aapz5?";
int32_t statusVals[10];
int32_t numStatuses;
int32_t pos;
UErrorCode status=U_ZERO_ERROR;
UParseError parseError;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(rulesString, parseError, status);
if (U_FAILURE(status)) {
dataerrln("Failure at file %s, line %d, error = %s", __FILE__, __LINE__, u_errorName(status));
} else {
bi->setText(testString1);
// A
pos = bi->next();
TEST_ASSERT(pos==1);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == 100);
TEST_ASSERT(statusVals[1] == 300);
// a
pos = bi->next();
TEST_ASSERT(pos==2);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == 200);
TEST_ASSERT(statusVals[1] == 300);
// p
pos = bi->next();
TEST_ASSERT(pos==3);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == 200);
TEST_ASSERT(statusVals[1] == 300);
// z
pos = bi->next();
TEST_ASSERT(pos==4);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 1);
TEST_ASSERT(statusVals[0] == 300);
// 5
pos = bi->next();
TEST_ASSERT(pos==5);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == 400);
TEST_ASSERT(statusVals[1] == 500);
// ?
pos = bi->next();
TEST_ASSERT(pos==6);
numStatuses = bi->getRuleStatusVec(statusVals, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(numStatuses == 1);
TEST_ASSERT(statusVals[0] == 0);
//
// Check buffer overflow error handling. Char == A
//
bi->first();
pos = bi->next();
TEST_ASSERT(pos==1);
memset(statusVals, -1, sizeof(statusVals));
numStatuses = bi->getRuleStatusVec(statusVals, 0, status);
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == -1);
status = U_ZERO_ERROR;
memset(statusVals, -1, sizeof(statusVals));
numStatuses = bi->getRuleStatusVec(statusVals, 1, status);
TEST_ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
TEST_ASSERT(numStatuses == 2);
TEST_ASSERT(statusVals[0] == 100);
TEST_ASSERT(statusVals[1] == -1);
status = U_ZERO_ERROR;
memset(statusVals, -1, sizeof(statusVals));
numStatuses = bi->getRuleStatusVec(statusVals, 2, status);
//.........这里部分代码省略.........
示例7: errcheckln
void RBBIAPITest::TestCloneEquals()
{
UErrorCode status=U_ZERO_ERROR;
RuleBasedBreakIterator* bi1 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* biequal = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* bi3 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createCharacterInstance(Locale::getDefault(), status);
RuleBasedBreakIterator* bi2 = (RuleBasedBreakIterator*)RuleBasedBreakIterator::createWordInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
errcheckln(status, "Fail : in construction - %s", u_errorName(status));
return;
}
UnicodeString testString="Testing word break iterators's clone() and equals()";
bi1->setText(testString);
bi2->setText(testString);
biequal->setText(testString);
bi3->setText("hello");
logln((UnicodeString)"Testing equals()");
logln((UnicodeString)"Testing == and !=");
UBool b = (*bi1 != *biequal);
b |= *bi1 == *bi2;
b |= *bi1 == *bi3;
if (b) {
errln((UnicodeString)"ERROR:1 RBBI's == and != operator failed.");
}
if(*bi2 == *biequal || *bi2 == *bi1 || *biequal == *bi3)
errln((UnicodeString)"ERROR:2 RBBI's == and != operator failed.");
// Quick test of RulesBasedBreakIterator assignment -
// Check that
// two different iterators are !=
// they are == after assignment
// source and dest iterator produce the same next() after assignment.
// deleting one doesn't disable the other.
logln("Testing assignment");
RuleBasedBreakIterator *bix = (RuleBasedBreakIterator *)BreakIterator::createLineInstance(Locale::getDefault(), status);
if(U_FAILURE(status)){
errcheckln(status, "Fail : in construction - %s", u_errorName(status));
return;
}
RuleBasedBreakIterator biDefault, biDefault2;
if(U_FAILURE(status)){
errln((UnicodeString)"FAIL : in construction of default iterator");
return;
}
if (biDefault == *bix) {
errln((UnicodeString)"ERROR: iterators should not compare ==");
return;
}
if (biDefault != biDefault2) {
errln((UnicodeString)"ERROR: iterators should compare ==");
return;
}
UnicodeString HelloString("Hello Kitty");
bix->setText(HelloString);
if (*bix == *bi2) {
errln(UnicodeString("ERROR: strings should not be equal before assignment."));
}
*bix = *bi2;
if (*bix != *bi2) {
errln(UnicodeString("ERROR: strings should be equal before assignment."));
}
int bixnext = bix->next();
int bi2next = bi2->next();
if (! (bixnext == bi2next && bixnext == 7)) {
errln(UnicodeString("ERROR: iterators behaved differently after assignment."));
}
delete bix;
if (bi2->next() != 8) {
errln(UnicodeString("ERROR: iterator.next() failed after deleting copy."));
}
logln((UnicodeString)"Testing clone()");
RuleBasedBreakIterator* bi1clone=(RuleBasedBreakIterator*)bi1->clone();
RuleBasedBreakIterator* bi2clone=(RuleBasedBreakIterator*)bi2->clone();
if(*bi1clone != *bi1 || *bi1clone != *biequal ||
*bi1clone == *bi3 || *bi1clone == *bi2)
errln((UnicodeString)"ERROR:1 RBBI's clone() method failed");
if(*bi2clone == *bi1 || *bi2clone == *biequal ||
*bi2clone == *bi3 || *bi2clone != *bi2)
errln((UnicodeString)"ERROR:2 RBBI's clone() method failed");
if(bi1->getText() != bi1clone->getText() ||
bi2clone->getText() != bi2->getText() ||
*bi2clone == *bi1clone )
//.........这里部分代码省略.........
示例8: main
//.........这里部分代码省略.........
}
//
// Convert the rules to UChar.
// Preflight first to determine required buffer size.
//
uint32_t destCap = ucnv_toUChars(conv,
NULL, // dest,
0, // destCapacity,
ruleSourceC,
ruleFileSize,
&status);
if (status != U_BUFFER_OVERFLOW_ERROR) {
fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
};
status = U_ZERO_ERROR;
UChar *ruleSourceU = new UChar[destCap+1];
ucnv_toUChars(conv,
ruleSourceU, // dest,
destCap+1,
ruleSourceC,
ruleFileSize,
&status);
if (U_FAILURE(status)) {
fprintf(stderr, "ucnv_toUChars: ICU Error \"%s\"\n", u_errorName(status));
exit(status);
};
ucnv_close(conv);
//
// Put the source rules into a UnicodeString
//
UnicodeString ruleSourceS(FALSE, ruleSourceU, destCap);
//
// Create the break iterator from the rules
// This will compile the rules.
//
UParseError parseError;
parseError.line = 0;
parseError.offset = 0;
RuleBasedBreakIterator *bi = new RuleBasedBreakIterator(ruleSourceS, parseError, status);
if (U_FAILURE(status)) {
fprintf(stderr, "createRuleBasedBreakIterator: ICU Error \"%s\" at line %d, column %d\n",
u_errorName(status), (int)parseError.line, (int)parseError.offset);
exit(status);
};
//
// Get the compiled rule data from the break iterator.
//
uint32_t outDataSize;
const uint8_t *outData;
outData = bi->getBinaryRules(outDataSize);
// Copy the data format version numbers from the RBBI data header into the UDataMemory header.
uprv_memcpy(dh.info.formatVersion, ((RBBIDataHeader *)outData)->fFormatVersion, sizeof(dh.info.formatVersion));
//
// Create the output file
//
size_t bytesWritten;
UNewDataMemory *pData;
pData = udata_create(outDir, NULL, outFileName, &(dh.info), copyright, &status);
if(U_FAILURE(status)) {
fprintf(stderr, "genbrk: Could not open output file \"%s\", \"%s\"\n",
outFileName, u_errorName(status));
exit(status);
}
// Write the data itself.
udata_writeBlock(pData, outData, outDataSize);
// finish up
bytesWritten = udata_finish(pData, &status);
if(U_FAILURE(status)) {
fprintf(stderr, "genbrk: error %d writing the output file\n", status);
exit(status);
}
if (bytesWritten != outDataSize) {
fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName);
exit(-1);
}
delete bi;
delete[] ruleSourceU;
delete[] ruleBufferC;
u_cleanup();
printf("genbrk: tool completed successfully.\n");
return 0;
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
}
示例9: u_unescape
//
// TestRuleStatus
// Test word break rule status constants.
//
void RBBIAPITest::TestRuleStatus() {
UChar str[30];
u_unescape("plain word 123.45 \\u9160\\u9161 \\u30a1\\u30a2 \\u3041\\u3094",
// 012345678901234567 8 9 0 1 2 3 4 5 6
// Ideographic Katakana Hiragana
str, 30);
UnicodeString testString1(str);
int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 19, 20, 21, 23, 24, 25, 26};
int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
UBRK_WORD_IDEO, UBRK_WORD_IDEO, UBRK_WORD_NONE,
UBRK_WORD_KANA, UBRK_WORD_NONE, UBRK_WORD_KANA, UBRK_WORD_KANA};
int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
UBRK_WORD_IDEO_LIMIT, UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT,
UBRK_WORD_KANA_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_KANA_LIMIT, UBRK_WORD_KANA_LIMIT};
UErrorCode status=U_ZERO_ERROR;
RuleBasedBreakIterator *bi = (RuleBasedBreakIterator *)BreakIterator::createWordInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
errcheckln(status, "Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
// First test that the breaks are in the right spots.
doBoundaryTest(*bi, testString1, bounds1);
// Then go back and check tag values
int32_t i = 0;
int32_t pos, tag;
for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
if (pos != bounds1[i]) {
errln("FAIL: unexpected word break at postion %d", pos);
break;
}
tag = bi->getRuleStatus();
if (tag < tag_lo[i] || tag >= tag_hi[i]) {
errln("FAIL: incorrect tag value %d at position %d", tag, pos);
break;
}
// Check that we get the same tag values from getRuleStatusVec()
int32_t vec[10];
int t = bi->getRuleStatusVec(vec, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(t==1);
TEST_ASSERT(vec[0] == tag);
}
}
delete bi;
// Now test line break status. This test mostly is to confirm that the status constants
// are correctly declared in the header.
testString1 = "test line. \n";
// break type s s h
bi = (RuleBasedBreakIterator *)
BreakIterator::createLineInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
} else {
int32_t i = 0;
int32_t pos, tag;
UBool success;
bi->setText(testString1);
pos = bi->current();
tag = bi->getRuleStatus();
for (i=0; i<3; i++) {
switch (i) {
case 0:
success = pos==0 && tag==UBRK_LINE_SOFT; break;
case 1:
success = pos==5 && tag==UBRK_LINE_SOFT; break;
case 2:
success = pos==12 && tag==UBRK_LINE_HARD; break;
default:
success = FALSE; break;
}
if (success == FALSE) {
errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
i, pos, tag);
break;
}
pos = bi->next();
tag = bi->getRuleStatus();
}
if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
(UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
errln("UBRK_LINE_* constants from header are inconsistent.");
}
}
delete bi;
//.........这里部分代码省略.........