本文整理汇总了C++中BreakIterator::setText方法的典型用法代码示例。如果您正苦于以下问题:C++ BreakIterator::setText方法的具体用法?C++ BreakIterator::setText怎么用?C++ BreakIterator::setText使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类BreakIterator
的用法示例。
在下文中一共展示了BreakIterator::setText方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。
示例1: main
/* Creating and using text boundaries */
int main( void )
{
puts("ICU Break Iterator Sample Program\n");
puts("C++ Break Iteration\n");
BreakIterator* boundary;
UnicodeString stringToExamine("Aaa bbb ccc. Ddd eee fff.");
printf("Examining: ");
printUnicodeString(stringToExamine);
puts("");
//print each sentence in forward and reverse order
UErrorCode status = U_ZERO_ERROR;
boundary = BreakIterator::createSentenceInstance(
Locale::getUS(), status );
if (U_FAILURE(status)) {
printf("failed to create sentence break iterator. status = %s",
u_errorName(status));
exit(1);
}
boundary->setText(stringToExamine);
puts("\n Sentence Boundaries... ");
puts("----- forward: -----------");
printEachForward(*boundary);
puts("----- backward: ----------");
printEachBackward(*boundary);
delete boundary;
//print each word in order
printf("\n Word Boundaries... \n");
boundary = BreakIterator::createWordInstance(
Locale::getUS(), status);
boundary->setText(stringToExamine);
puts("----- forward: -----------");
printEachForward(*boundary);
//print first element
puts("----- first: -------------");
printFirst(*boundary);
//print last element
puts("----- last: --------------");
printLast(*boundary);
//print word at charpos 10
puts("----- at pos 10: ---------");
printAt(*boundary, 10 );
delete boundary;
puts("\nEnd C++ Break Iteration");
// Call the C version
return c_main();
}
示例2: s
U_NAMESPACE_USE
/* functions available in the common library (for unistr_case.cpp) */
/* public API functions */
U_CAPI int32_t U_EXPORT2
u_strToTitle(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
LocalPointer<BreakIterator> ownedIter;
BreakIterator *iter;
if(titleIter!=NULL) {
iter=reinterpret_cast<BreakIterator *>(titleIter);
} else {
iter=BreakIterator::createWordInstance(Locale(locale), *pErrorCode);
ownedIter.adoptInstead(iter);
}
if(U_FAILURE(*pErrorCode)) {
return 0;
}
UnicodeString s(srcLength<0, src, srcLength);
iter->setText(s);
return ustrcase_mapWithOverlap(
ustrcase_getCaseLocale(locale), 0, iter,
dest, destCapacity,
src, srcLength,
ustrcase_internalToTitle, *pErrorCode);
}
示例3:
U_DRAFT void U_EXPORT2
ubrk_setUText(UBreakIterator *bi,
UText *text,
UErrorCode *status)
{
BreakIterator *brit = (BreakIterator *)bi;
brit->setText(text, *status);
}
示例4:
U_CAPI void U_EXPORT2
ubrk_setText(UBreakIterator* bi,
const UChar* text,
int32_t textLength,
UErrorCode* status)
{
BreakIterator *brit = (BreakIterator *)bi;
UText ut = UTEXT_INITIALIZER;
utext_openUChars(&ut, text, textLength, status);
brit->setText(&ut, *status);
// A stack allocated UText wrapping a UCHar * string
// can be dumped without explicitly closing it.
}
示例5: GetWordBoundaryPositions
void GetWordBoundaryPositions(const FunctionCallbackInfo<Value>& args) {
Isolate* isolate = Isolate::GetCurrent();
HandleScope scope(isolate);
if (args.Length() != 2) {
isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "must supply locale and text")));
return;
}
if (!args[0]->IsString()) {
isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "text is not specified")));
return;
}
if (!args[1]->IsString()) {
isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "locale is not specified")));
return;
}
// convert v8 locale to ICU
String::Utf8Value locale(args[1]->ToString());
const char* country = strtok(*locale, "_"), *language = strtok(NULL, "_");
Locale icuLocale(language, country);
// create the BreakIterator instance
UErrorCode err = U_ZERO_ERROR;
BreakIterator *iterator = BreakIterator::createWordInstance(icuLocale, err);
if (U_FAILURE(err)) {
ErrorCode errCode;
errCode.set(err);
isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, errCode.errorName())));
return;
}
// Convert v8 text to ICU Unicode value
Local<String> textStr = args[0]->ToString();
String::Utf8Value textValue(textStr);
UnicodeString uTextValue(*textValue, "UTF-8");
if (uTextValue.isBogus()) {
isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "unable to create unicode string")));
return;
}
iterator->setText(uTextValue);
// populate boundaries
Local<Array> results = Array::New(isolate);
int32_t arrayPosition = 0;
int32_t currentBoundary = iterator->first();
int32_t previousBoundary = 0;
while (currentBoundary != BreakIterator::DONE) {
if (currentBoundary > 0) {
Local<Object> boundaryResult = Object::New(isolate);
boundaryResult->Set(String::NewFromUtf8(isolate, "start"), Number::New(isolate, previousBoundary));
boundaryResult->Set(String::NewFromUtf8(isolate, "end"), Number::New(isolate, currentBoundary));
results->Set(arrayPosition++, boundaryResult);
}
previousBoundary = currentBoundary;
currentBoundary = iterator->next();
}
// cleanup
delete iterator;
args.GetReturnValue().Set(results);
}
示例6: slotHyphenate
void Hyphenator::slotHyphenate(PageItem* it)
{
if (!(it->asTextFrame()) || (it->itemText.length() == 0))
return;
m_doc->DoDrawing = false;
QString text = "";
int startC = 0;
if (it->itemText.selectionLength() > 0)
{
startC = it->itemText.startOfSelection();
text = it->itemText.text(startC, it->itemText.selectionLength());
}
else {
text = it->itemText.text(0, it->itemText.length());
}
rememberedWords.clear();
qApp->setOverrideCursor(QCursor(Qt::WaitCursor));
BreakIterator* bi = StoryText::getWordIterator();
bi->setText((const UChar*) text.utf16());
int pos = bi->first();
while (pos != BreakIterator::DONE)
{
int firstC = pos;
pos = bi->next();
int lastC = pos;
int countC = lastC - firstC;
const CharStyle& style = it->itemText.charStyle(firstC);
if (countC > 0 && countC > style.hyphenWordMin() - 1)
{
QString word = text.mid(firstC, countC);
QString wordLower = QLocale(style.language()).toLower(word);
if (wordLower.contains(SpecialChars::SHYPHEN))
break;
bool ok = loadDict(style.language());
if (!ok)
continue;
QByteArray te = m_codec->fromUnicode(wordLower);
char *buffer = static_cast<char*>(malloc(te.length() + 5));
if (buffer == nullptr)
break;
char **rep = nullptr;
int *pos = nullptr;
int *cut = nullptr;
// TODO: support non-standard hyphenation, see hnj_hyphen_hyphenate2 docs
if (!hnj_hyphen_hyphenate2(m_hdict, te.data(), te.length(), buffer, nullptr, &rep, &pos, &cut))
{
int i = 0;
buffer[te.length()] = '\0';
bool hasHyphen = false;
for (i = 1; i < wordLower.length()-1; ++i)
{
if(buffer[i] & 1)
{
hasHyphen = true;
break;
}
}
QString outs = "";
QString input = "";
outs += word[0];
for (i = 1; i < wordLower.length()-1; ++i)
{
outs += word[i];
if(buffer[i] & 1)
outs += "-";
}
outs += word.rightRef(1);
input = outs;
if (!ignoredWords.contains(word))
{
if (!hasHyphen)
it->itemText.hyphenateWord(startC + firstC, wordLower.length(), nullptr);
else if (m_automatic)
{
if (specialWords.contains(word))
{
outs = specialWords.value(word);
uint ii = 1;
for (i = 1; i < outs.length()-1; ++i)
{
QChar cht = outs[i];
if (cht == '-')
buffer[ii-1] = 1;
else
{
buffer[ii] = 0;
++ii;
}
}
}
it->itemText.hyphenateWord(startC + firstC, wordLower.length(), buffer);
}
//.........这里部分代码省略.........
示例7: if
TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
const size_t textLength,
const size_t minimalLength,
const size_t maximalLength,
bool lowerCase) {
TRI_vector_string_t* words;
UErrorCode status = U_ZERO_ERROR;
UnicodeString word;
if (textLength == 0) {
// input text is empty
return NULL;
}
if (textLength < minimalLength) {
// input text is shorter than required minimum length
return NULL;
}
size_t textUtf16Length = 0;
UChar* textUtf16 = NULL;
if (lowerCase) {
// lower case string
int32_t lowerLength = 0;
char* lower = tolower(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, lowerLength);
if (lower == NULL) {
// out of memory
return NULL;
}
if (lowerLength == 0) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
return NULL;
}
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, lower, lowerLength, &textUtf16Length);
TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
}
else {
textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, &textUtf16Length);
}
if (textUtf16 == NULL) {
return NULL;
}
ULocDataLocaleType type = ULOC_VALID_LOCALE;
const Locale& locale = _coll->getLocale(type, status);
if (U_FAILURE(status)) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
LOG_ERROR("error in Collator::getLocale(...): %s", u_errorName(status));
return NULL;
}
UChar* tempUtf16 = (UChar *) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, (textUtf16Length + 1) * sizeof(UChar), false);
if (tempUtf16 == NULL) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
return NULL;
}
words = (TRI_vector_string_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_vector_string_t), false);
if (words == NULL) {
TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
TRI_Free(TRI_UNKNOWN_MEM_ZONE, tempUtf16);
return NULL;
}
// estimate an initial vector size. this is not accurate, but setting the initial size to some
// value in the correct order of magnitude will save a lot of vector reallocations later
size_t initialWordCount = textLength / (2 * (minimalLength + 1));
if (initialWordCount < 32) {
// alloc at least 32 pointers (= 256b)
initialWordCount = 32;
}
else if (initialWordCount > 8192) {
// alloc at most 8192 pointers (= 64kb)
initialWordCount = 8192;
}
TRI_InitVectorString2(words, TRI_UNKNOWN_MEM_ZONE, initialWordCount);
BreakIterator* wordIterator = BreakIterator::createWordInstance(locale, status);
UnicodeString utext(textUtf16);
wordIterator->setText(utext);
int32_t start = wordIterator->first();
for(int32_t end = wordIterator->next(); end != BreakIterator::DONE;
start = end, end = wordIterator->next()) {
size_t tempUtf16Length = (size_t) (end - start);
// end - start = word length
if (tempUtf16Length >= minimalLength) {
size_t chunkLength = tempUtf16Length;
if (chunkLength > maximalLength) {
chunkLength = maximalLength;
//.........这里部分代码省略.........
示例8: TestRuleStatus
//
// TestRuleStatus
// Test word break rule status constants.
//
void RBBIAPITest::TestRuleStatus() {
UChar str[30];
//no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
// changed UBRK_WORD_KANA to UBRK_WORD_IDEO
u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
// 012345678901234567 8 9 0
// Katakana
str, 30);
UnicodeString testString1(str);
int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
int32_t tag_lo[] = {UBRK_WORD_NONE, UBRK_WORD_LETTER, UBRK_WORD_NONE, UBRK_WORD_LETTER,
UBRK_WORD_NONE, UBRK_WORD_NUMBER, UBRK_WORD_NONE,
UBRK_WORD_IDEO, UBRK_WORD_NONE};
int32_t tag_hi[] = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};
UErrorCode status=U_ZERO_ERROR;
BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
errcheckln(status, "Fail : in construction - %s", u_errorName(status));
} else {
bi->setText(testString1);
// First test that the breaks are in the right spots.
doBoundaryTest(*bi, testString1, bounds1);
// Then go back and check tag values
int32_t i = 0;
int32_t pos, tag;
for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
if (pos != bounds1[i]) {
errln("FAIL: unexpected word break at postion %d", pos);
break;
}
tag = bi->getRuleStatus();
if (tag < tag_lo[i] || tag >= tag_hi[i]) {
errln("FAIL: incorrect tag value %d at position %d", tag, pos);
break;
}
// Check that we get the same tag values from getRuleStatusVec()
int32_t vec[10];
int t = bi->getRuleStatusVec(vec, 10, status);
TEST_ASSERT_SUCCESS(status);
TEST_ASSERT(t==1);
TEST_ASSERT(vec[0] == tag);
}
}
delete bi;
// Now test line break status. This test mostly is to confirm that the status constants
// are correctly declared in the header.
testString1 = "test line. \n";
// break type s s h
bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
if(U_FAILURE(status)) {
errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
} else {
int32_t i = 0;
int32_t pos, tag;
UBool success;
bi->setText(testString1);
pos = bi->current();
tag = bi->getRuleStatus();
for (i=0; i<3; i++) {
switch (i) {
case 0:
success = pos==0 && tag==UBRK_LINE_SOFT; break;
case 1:
success = pos==5 && tag==UBRK_LINE_SOFT; break;
case 2:
success = pos==12 && tag==UBRK_LINE_HARD; break;
default:
success = FALSE; break;
}
if (success == FALSE) {
errln("Fail: incorrect word break status or position. i=%d, pos=%d, tag=%d",
i, pos, tag);
break;
}
pos = bi->next();
tag = bi->getRuleStatus();
}
if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
(UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
errln("UBRK_LINE_* constants from header are inconsistent.");
}
}
delete bi;
}
示例9: break_line
// In the Unicode string characters are always stored in logical order.
// This makes line breaking easy. One word is added to the current line at a time. Once the line is too long
// we either go back one step or inset the line break at the current position (depending on "wrap_before" setting).
// At the end everything that is left over is added as the final line.
void text_layout::break_line(text_line & line, double wrap_width, unsigned text_ratio, bool wrap_before)
{
shape_text(line);
if (!wrap_width || line.width() < wrap_width)
{
add_line(line);
return;
}
if (text_ratio)
{
double wrap_at;
double string_width = line.width();
double string_height = line.line_height();
for (double i = 1.0; ((wrap_at = string_width/i)/(string_height*i)) > text_ratio && (string_width/i) > wrap_width; i += 1.0) ;
wrap_width = wrap_at;
}
mapnik::value_unicode_string const& text = itemizer_.text();
Locale locale; // TODO: Is the default constructor correct?
UErrorCode status = U_ZERO_ERROR;
BreakIterator *breakitr = BreakIterator::createLineInstance(locale, status);
// Not breaking the text if an error occurs is probably the best thing we can do.
// https://github.com/mapnik/mapnik/issues/2072
if (!U_SUCCESS(status))
{
add_line(line);
MAPNIK_LOG_ERROR(text_layout) << " could not create BreakIterator: " << u_errorName(status);
return;
}
breakitr->setText(text);
double current_line_length = 0;
int last_break_position = static_cast<int>(line.first_char());
for (unsigned i=line.first_char(); i < line.last_char(); ++i)
{
// TODO: character_spacing
std::map<unsigned, double>::const_iterator width_itr = width_map_.find(i);
if (width_itr != width_map_.end())
{
current_line_length += width_itr->second;
}
if (current_line_length <= wrap_width) continue;
int break_position = wrap_before ? breakitr->preceding(i) : breakitr->following(i);
// following() returns a break position after the last word. So DONE should only be returned
// when calling preceding.
if (break_position <= last_break_position || break_position == static_cast<int>(BreakIterator::DONE))
{
// A single word is longer than the maximum line width.
// Violate line width requirement and choose next break position
break_position = breakitr->following(i);
if (break_position == static_cast<int>(BreakIterator::DONE))
{
break_position = line.last_char();
MAPNIK_LOG_ERROR(text_layout) << "Unexpected result in break_line. Trying to recover...\n";
}
}
// Break iterator operates on the whole string, while we only look at one line. So we need to
// clamp break values.
if (break_position < static_cast<int>(line.first_char()))
{
break_position = line.first_char();
}
if (break_position > static_cast<int>(line.last_char()))
{
break_position = line.last_char();
}
text_line new_line(last_break_position, break_position);
clear_cluster_widths(last_break_position, break_position);
shape_text(new_line);
add_line(new_line);
last_break_position = break_position;
i = break_position - 1;
current_line_length = 0;
}
if (last_break_position == static_cast<int>(line.first_char()))
{
// No line breaks => no reshaping required
add_line(line);
}
else if (last_break_position != static_cast<int>(line.last_char()))
{
text_line new_line(last_break_position, line.last_char());
clear_cluster_widths(last_break_position, line.last_char());
shape_text(new_line);
add_line(new_line);
}
}
示例10: stri_wrap
//.........这里部分代码省略.........
// nth line, nth para (i> 0, u> 0): prefix +exdent
StriWrapLineStart ii(initial_cont.get(0), indent_val);
StriWrapLineStart pi(prefix_cont.get(0), indent_val);
StriWrapLineStart pe(prefix_cont.get(0), exdent_val);
status = U_ZERO_ERROR;
//Unicode Newline Guidelines - Unicode Technical Report #13
UnicodeSet uset_linebreaks(UnicodeString::fromUTF8("[\\u000A-\\u000D\\u0085\\u2028\\u2029]"), status);
STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
uset_linebreaks.freeze();
status = U_ZERO_ERROR;
UnicodeSet uset_whitespaces(UnicodeString::fromUTF8("\\p{White_space}"), status);
STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
uset_whitespaces.freeze();
SEXP ret;
STRI__PROTECT(ret = Rf_allocVector(VECSXP, str_length));
for (R_len_t i = 0; i < str_length; ++i)
{
if (str_cont.isNA(i) || prefix_cont.isNA(0) || initial_cont.isNA(0)) {
SET_VECTOR_ELT(ret, i, stri__vector_NA_strings(1));
continue;
}
status = U_ZERO_ERROR;
const char* str_cur_s = str_cont.get(i).c_str();
R_len_t str_cur_n = str_cont.get(i).length();
str_text = utext_openUTF8(str_text, str_cur_s, str_cont.get(i).length(), &status);
STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
status = U_ZERO_ERROR;
briter->setText(str_text, status);
STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
// all right, first let's generate a list of places at which we may do line breaks
deque< R_len_t > occurrences_list; // this could be an R_len_t queue
R_len_t match = briter->first();
while (match != BreakIterator::DONE) {
if (!whitespace_only_val)
occurrences_list.push_back(match);
else {
if (match > 0 && match < str_cur_n) {
UChar32 c;
U8_GET((const uint8_t*)str_cur_s, 0, match-1, str_cur_n, c);
if (uset_whitespaces.contains(c))
occurrences_list.push_back(match);
}
else
occurrences_list.push_back(match);
}
match = briter->next();
}
R_len_t noccurrences = (R_len_t)occurrences_list.size(); // number of boundaries
if (noccurrences <= 1) { // no match (1 boundary == 0)
SET_VECTOR_ELT(ret, i, Rf_ScalarString(str_cont.toR(i)));
continue;
}
// the number of "words" is:
R_len_t nwords = noccurrences - 1;