当前位置: 首页>>代码示例>>C++>>正文


C++ BreakIterator::first方法代码示例

本文整理汇总了C++中BreakIterator::first方法的典型用法代码示例。如果您正苦于以下问题:C++ BreakIterator::first方法的具体用法?C++ BreakIterator::first怎么用?C++ BreakIterator::first使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在BreakIterator的用法示例。


在下文中一共展示了BreakIterator::first方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: _breakiterator_rewind

static void _breakiterator_rewind(zend_object_iterator *iter)
{
	BreakIterator *biter = _breakiter_prolog(iter);
	zoi_with_current *zoi_iter = (zoi_with_current*)iter;

	int32_t pos = biter->first();
	ZVAL_LONG(&zoi_iter->current, (zend_long)pos);
}
开发者ID:3F,项目名称:php-src,代码行数:8,代码来源:breakiterator_iterators.cpp

示例2: printEachForward

/* Print each element in order: */
void printEachForward( BreakIterator& boundary)
{
    int32_t start = boundary.first();
    for (int32_t end = boundary.next();
         end != BreakIterator::DONE;
         start = end, end = boundary.next())
    {
        printTextRange( boundary, start, end );
    }
}
开发者ID:winlibs,项目名称:icu4c,代码行数:11,代码来源:break.cpp

示例3: GetWordBoundaryPositions

void GetWordBoundaryPositions(const FunctionCallbackInfo<Value>& args) {
    Isolate* isolate = Isolate::GetCurrent();
    HandleScope scope(isolate);

    if (args.Length() != 2) {
        isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "must supply locale and text")));
        return;
    }

    if (!args[0]->IsString()) {
        isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "text is not specified")));
        return;
    }

    if (!args[1]->IsString()) {
        isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "locale is not specified")));
        return;
    }

    // convert v8 locale to ICU
    String::Utf8Value locale(args[1]->ToString());
    const char* country = strtok(*locale, "_"), *language = strtok(NULL, "_");
    Locale icuLocale(language, country);

    // create the BreakIterator instance
    UErrorCode err = U_ZERO_ERROR;
    BreakIterator *iterator = BreakIterator::createWordInstance(icuLocale, err);
    if (U_FAILURE(err)) {
        ErrorCode errCode;
        errCode.set(err);
        isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, errCode.errorName())));
        return;
    }

    // Convert v8 text to ICU Unicode value
    Local<String> textStr = args[0]->ToString();
    String::Utf8Value textValue(textStr);
    UnicodeString uTextValue(*textValue, "UTF-8");
    if (uTextValue.isBogus()) {
        isolate->ThrowException(Exception::TypeError(String::NewFromUtf8(isolate, "unable to create unicode string")));
        return;
    }
    iterator->setText(uTextValue);

    // populate boundaries
    Local<Array> results = Array::New(isolate);
    int32_t arrayPosition = 0;
    int32_t currentBoundary = iterator->first();
    int32_t previousBoundary = 0;

    while (currentBoundary != BreakIterator::DONE) {
        if (currentBoundary > 0) {
            Local<Object> boundaryResult = Object::New(isolate);
            boundaryResult->Set(String::NewFromUtf8(isolate, "start"), Number::New(isolate, previousBoundary));
            boundaryResult->Set(String::NewFromUtf8(isolate, "end"), Number::New(isolate, currentBoundary));

            results->Set(arrayPosition++, boundaryResult);
        }

        previousBoundary = currentBoundary;
        currentBoundary = iterator->next();
    }

    // cleanup
    delete iterator;

    args.GetReturnValue().Set(results);
}
开发者ID:imojiengineering,项目名称:node-icu-tokenizer,代码行数:68,代码来源:node-icu-tokenizer.cpp

示例4: slotHyphenate

void Hyphenator::slotHyphenate(PageItem* it)
{
	if (!(it->asTextFrame()) || (it->itemText.length() == 0))
		return;
	m_doc->DoDrawing = false;

	QString text = "";

	int startC = 0;
	if (it->itemText.selectionLength() > 0)
	{
		startC = it->itemText.startOfSelection();
		text = it->itemText.text(startC, it->itemText.selectionLength());
	}
	else {
		text = it->itemText.text(0, it->itemText.length());
	}

	rememberedWords.clear();
	qApp->setOverrideCursor(QCursor(Qt::WaitCursor));

	BreakIterator* bi = StoryText::getWordIterator();
	bi->setText((const UChar*) text.utf16());
	int pos = bi->first();
	while (pos != BreakIterator::DONE)
	{
		int firstC = pos;
		pos = bi->next();
		int lastC = pos;
		int countC = lastC - firstC;

		const CharStyle& style = it->itemText.charStyle(firstC);
		if (countC > 0 && countC > style.hyphenWordMin() - 1)
		{
			QString word = text.mid(firstC, countC);
			QString wordLower = QLocale(style.language()).toLower(word);
			if (wordLower.contains(SpecialChars::SHYPHEN))
				break;

			bool ok = loadDict(style.language());
			if (!ok)
				continue;

			QByteArray te = m_codec->fromUnicode(wordLower);
			char *buffer = static_cast<char*>(malloc(te.length() + 5));
			if (buffer == nullptr)
				break;

			char **rep = nullptr;
			int *pos = nullptr;
			int *cut = nullptr;
			// TODO: support non-standard hyphenation, see hnj_hyphen_hyphenate2 docs
			if (!hnj_hyphen_hyphenate2(m_hdict, te.data(), te.length(), buffer, nullptr, &rep, &pos, &cut))
			{
	  			int i = 0;
				buffer[te.length()] = '\0';
				bool hasHyphen = false;
				for (i = 1; i < wordLower.length()-1; ++i)
				{
					if(buffer[i] & 1)
					{
						hasHyphen = true;
						break;
					}
				}
				QString outs = "";
				QString input = "";
				outs += word[0];
				for (i = 1; i < wordLower.length()-1; ++i)
				{
					outs += word[i];
					if(buffer[i] & 1)
						outs += "-";
				}
				outs += word.rightRef(1);
				input = outs;
				if (!ignoredWords.contains(word))
				{
					if (!hasHyphen)
						it->itemText.hyphenateWord(startC + firstC, wordLower.length(), nullptr);
					else if (m_automatic)
					{
						if (specialWords.contains(word))
						{
							outs = specialWords.value(word);
							uint ii = 1;
							for (i = 1; i < outs.length()-1; ++i)
							{
								QChar cht = outs[i];
								if (cht == '-')
									buffer[ii-1] = 1;
								else
								{
									buffer[ii] = 0;
									++ii;
								}
							}
						}
						it->itemText.hyphenateWord(startC + firstC, wordLower.length(), buffer);
					}
//.........这里部分代码省略.........
开发者ID:luzpaz,项目名称:scribus,代码行数:101,代码来源:hyphenator.cpp

示例5: if

TRI_vector_string_t* Utf8Helper::getWords (const char* const text,
                                           const size_t textLength,
                                           const size_t minimalLength,
                                           const size_t maximalLength,
                                           bool lowerCase) {
  TRI_vector_string_t* words;
  UErrorCode status = U_ZERO_ERROR;
  UnicodeString word;

  if (textLength == 0) {
    // input text is empty
    return NULL;
  }

  if (textLength < minimalLength) {
    // input text is shorter than required minimum length
    return NULL;
  }

  size_t textUtf16Length = 0;
  UChar* textUtf16 = NULL;

  if (lowerCase) {
    // lower case string
    int32_t lowerLength = 0;
    char* lower = tolower(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, lowerLength);

    if (lower == NULL) {
      // out of memory
      return NULL;
    }

    if (lowerLength == 0) {
      TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
      return NULL;
    }

    textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, lower, lowerLength, &textUtf16Length);
    TRI_Free(TRI_UNKNOWN_MEM_ZONE, lower);
  }
  else {
    textUtf16 = TRI_Utf8ToUChar(TRI_UNKNOWN_MEM_ZONE, text, (int32_t) textLength, &textUtf16Length);
  }

  if (textUtf16 == NULL) {
    return NULL;
  }

  ULocDataLocaleType type = ULOC_VALID_LOCALE;
  const Locale& locale = _coll->getLocale(type, status);

  if (U_FAILURE(status)) {
    TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
    LOG_ERROR("error in Collator::getLocale(...): %s", u_errorName(status));
    return NULL;
  }

  UChar* tempUtf16 = (UChar *) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, (textUtf16Length + 1) * sizeof(UChar), false);

  if (tempUtf16 == NULL) {
    TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
    return NULL;
  }

  words = (TRI_vector_string_t*) TRI_Allocate(TRI_UNKNOWN_MEM_ZONE, sizeof(TRI_vector_string_t), false);

  if (words == NULL) {
    TRI_Free(TRI_UNKNOWN_MEM_ZONE, textUtf16);
    TRI_Free(TRI_UNKNOWN_MEM_ZONE, tempUtf16);
    return NULL;
  }

  // estimate an initial vector size. this is not accurate, but setting the initial size to some
  // value in the correct order of magnitude will save a lot of vector reallocations later
  size_t initialWordCount = textLength / (2 * (minimalLength + 1));
  if (initialWordCount < 32) {
    // alloc at least 32 pointers (= 256b)
    initialWordCount = 32;
  }
  else if (initialWordCount > 8192) {
    // alloc at most 8192 pointers (= 64kb)
    initialWordCount = 8192;
  }

  TRI_InitVectorString2(words, TRI_UNKNOWN_MEM_ZONE, initialWordCount);

  BreakIterator* wordIterator = BreakIterator::createWordInstance(locale, status);
  UnicodeString utext(textUtf16);

  wordIterator->setText(utext);
  int32_t start = wordIterator->first();
  for(int32_t end = wordIterator->next(); end != BreakIterator::DONE;
    start = end, end = wordIterator->next()) {

    size_t tempUtf16Length = (size_t) (end - start);
    // end - start = word length
    if (tempUtf16Length >= minimalLength) {
      size_t chunkLength = tempUtf16Length;
      if (chunkLength > maximalLength) {
        chunkLength = maximalLength;
//.........这里部分代码省略.........
开发者ID:bdacode,项目名称:ArangoDB,代码行数:101,代码来源:Utf8Helper.cpp

示例6: TestRuleStatus

//
//  TestRuleStatus
//      Test word break rule status constants.
//
void RBBIAPITest::TestRuleStatus() {
     UChar str[30];
     //no longer test Han or hiragana breaking here: ruleStatusVec would return nothing
     // changed UBRK_WORD_KANA to UBRK_WORD_IDEO
     u_unescape("plain word 123.45 \\u30a1\\u30a2 ",
              // 012345678901234567  8      9    0      
              //                     Katakana      
                str, 30);
     UnicodeString testString1(str);
     int32_t bounds1[] = {0, 5, 6, 10, 11, 17, 18, 20, 21};
     int32_t tag_lo[]  = {UBRK_WORD_NONE,     UBRK_WORD_LETTER, UBRK_WORD_NONE,    UBRK_WORD_LETTER,
                          UBRK_WORD_NONE,     UBRK_WORD_NUMBER, UBRK_WORD_NONE,
                          UBRK_WORD_IDEO,     UBRK_WORD_NONE};

     int32_t tag_hi[]  = {UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT, UBRK_WORD_NONE_LIMIT, UBRK_WORD_LETTER_LIMIT,
                          UBRK_WORD_NONE_LIMIT, UBRK_WORD_NUMBER_LIMIT, UBRK_WORD_NONE_LIMIT,
                          UBRK_WORD_IDEO_LIMIT, UBRK_WORD_NONE_LIMIT};

     UErrorCode status=U_ZERO_ERROR;

     BreakIterator *bi = BreakIterator::createWordInstance(Locale::getEnglish(), status);
     if(U_FAILURE(status)) {
         errcheckln(status, "Fail : in construction - %s", u_errorName(status));
     } else {
         bi->setText(testString1);
         // First test that the breaks are in the right spots.
         doBoundaryTest(*bi, testString1, bounds1);

         // Then go back and check tag values
         int32_t i = 0;
         int32_t pos, tag;
         for (pos = bi->first(); pos != BreakIterator::DONE; pos = bi->next(), i++) {
             if (pos != bounds1[i]) {
                 errln("FAIL: unexpected word break at postion %d", pos);
                 break;
             }
             tag = bi->getRuleStatus();
             if (tag < tag_lo[i] || tag >= tag_hi[i]) {
                 errln("FAIL: incorrect tag value %d at position %d", tag, pos);
                 break;
             }

             // Check that we get the same tag values from getRuleStatusVec()
             int32_t vec[10];
             int t = bi->getRuleStatusVec(vec, 10, status);
             TEST_ASSERT_SUCCESS(status);
             TEST_ASSERT(t==1);
             TEST_ASSERT(vec[0] == tag);
         }
     }
     delete bi;

     // Now test line break status.  This test mostly is to confirm that the status constants
     //                              are correctly declared in the header.
     testString1 =   "test line. \n";
     // break type    s    s     h

     bi = BreakIterator::createLineInstance(Locale::getEnglish(), status);
     if(U_FAILURE(status)) {
         errcheckln(status, "failed to create word break iterator. - %s", u_errorName(status));
     } else {
         int32_t i = 0;
         int32_t pos, tag;
         UBool   success;

         bi->setText(testString1);
         pos = bi->current();
         tag = bi->getRuleStatus();
         for (i=0; i<3; i++) {
             switch (i) {
             case 0:
                 success = pos==0  && tag==UBRK_LINE_SOFT; break;
             case 1:
                 success = pos==5  && tag==UBRK_LINE_SOFT; break;
             case 2:
                 success = pos==12 && tag==UBRK_LINE_HARD; break;
             default:
                 success = FALSE; break;
             }
             if (success == FALSE) {
                 errln("Fail: incorrect word break status or position.  i=%d, pos=%d, tag=%d",
                     i, pos, tag);
                 break;
             }
             pos = bi->next();
             tag = bi->getRuleStatus();
         }
         if (UBRK_LINE_SOFT >= UBRK_LINE_SOFT_LIMIT ||
             UBRK_LINE_HARD >= UBRK_LINE_HARD_LIMIT ||
             (UBRK_LINE_HARD > UBRK_LINE_SOFT && UBRK_LINE_HARD < UBRK_LINE_SOFT_LIMIT)) {
             errln("UBRK_LINE_* constants from header are inconsistent.");
         }
     }
     delete bi;

}
开发者ID:Distrotech,项目名称:icu,代码行数:100,代码来源:rbbiapts.cpp

示例7: printFirst

/* Print the first element */
void printFirst(BreakIterator& boundary)
{
    int32_t start = boundary.first();
    int32_t end = boundary.next();
    printTextRange( boundary, start, end );
}
开发者ID:winlibs,项目名称:icu4c,代码行数:7,代码来源:break.cpp

示例8: stri_wrap


//.........这里部分代码省略.........

   status = U_ZERO_ERROR;
   //Unicode Newline Guidelines - Unicode Technical Report #13
   UnicodeSet uset_linebreaks(UnicodeString::fromUTF8("[\\u000A-\\u000D\\u0085\\u2028\\u2029]"), status);
   STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
   uset_linebreaks.freeze();

   status = U_ZERO_ERROR;
   UnicodeSet uset_whitespaces(UnicodeString::fromUTF8("\\p{White_space}"), status);
   STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})
   uset_whitespaces.freeze();

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(VECSXP, str_length));
   for (R_len_t i = 0; i < str_length; ++i)
   {
      if (str_cont.isNA(i) || prefix_cont.isNA(0) || initial_cont.isNA(0)) {
         SET_VECTOR_ELT(ret, i, stri__vector_NA_strings(1));
         continue;
      }

      status = U_ZERO_ERROR;
      const char* str_cur_s = str_cont.get(i).c_str();
      R_len_t str_cur_n = str_cont.get(i).length();
      str_text = utext_openUTF8(str_text, str_cur_s, str_cont.get(i).length(), &status);
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

      status = U_ZERO_ERROR;
      briter->setText(str_text, status);
      STRI__CHECKICUSTATUS_THROW(status, {/* do nothing special on err */})

      // all right, first let's generate a list of places at which we may do line breaks
      deque< R_len_t > occurrences_list; // this could be an R_len_t queue
      R_len_t match = briter->first();
      while (match != BreakIterator::DONE) {

         if (!whitespace_only_val)
            occurrences_list.push_back(match);
         else {
            if (match > 0 && match < str_cur_n) {
               UChar32 c;
               U8_GET((const uint8_t*)str_cur_s, 0, match-1, str_cur_n, c);
               if (uset_whitespaces.contains(c))
                  occurrences_list.push_back(match);
            }
            else
               occurrences_list.push_back(match);
         }

         match = briter->next();
      }

      R_len_t noccurrences = (R_len_t)occurrences_list.size(); // number of boundaries
      if (noccurrences <= 1) { // no match (1 boundary == 0)
         SET_VECTOR_ELT(ret, i, Rf_ScalarString(str_cont.toR(i)));
         continue;
      }

      // the number of "words" is:
      R_len_t nwords = noccurrences - 1;

      // convert occurrences_list to a vector
      // in order to obtain end positions (in a string) of each "words",
      // noting that occurrences_list.at(0) == 0
#ifndef NDEBUG
      if (occurrences_list.at(0) != 0)
开发者ID:mmcnulty0531,项目名称:stringi,代码行数:67,代码来源:stri_wrap.cpp


注:本文中的BreakIterator::first方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。