当前位置: 首页>>代码示例>>C++>>正文


C++ U8_NEXT函数代码示例

本文整理汇总了C++中U8_NEXT函数的典型用法代码示例。如果您正苦于以下问题:C++ U8_NEXT函数的具体用法?C++ U8_NEXT怎么用?C++ U8_NEXT使用的例子?那么, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了U8_NEXT函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的C++代码示例。

示例1: checkStringEnd

	int ICUUnicodeSupport::_compareNoCase<1>(ConstStringHolder<1> _first, ConstStringHolder<1> _second)
	{
		int32_t len1 = _first.length();
		int32_t len2 = _second.length();
		int32_t ofs1 = 0;
		int32_t ofs2 = 0;

		int r = checkStringEnd(ofs1, len1, ofs2, len2);
		if(r != 2) return r;

		const uint8_t* buf1 = _first.c_str();
		const uint8_t* buf2 = _second.c_str();
		while(true)
		{
			UChar32 c1, c2;
			U8_NEXT(buf1, ofs1, len1, c1);
			U8_NEXT(buf2, ofs2, len2, c2);
			
			c1 = u_tolower(c1);
			c2 = u_tolower(c2);
			if(c1 != c2)
				return (c1 < c2) ? -1 : 1;

			r = checkStringEnd(ofs1, len1, ofs2, len2);
			if(r != 2) return r;
		}
	}
开发者ID:raduetsya,项目名称:GothOgre,代码行数:27,代码来源:ICUUnicodeSupport.cpp

示例2: utf8_comparison

static int utf8_comparison(const unsigned char *buf1, size_t buf1size,
        const unsigned char *buf2, size_t buf2size,
        int caseinsensitive, int genericwhitespace) {
    UChar32 c1, c2;
    int i1 = 0;
    int i2 = 0;
    while (i1 < (int)buf1size && i2 < (int)buf2size) {
        U8_NEXT(buf1, i1, buf1size, c1);
        assert(c1 >= 0);
        U8_NEXT(buf2, i2, buf2size, c2);
        assert(c2 >= 0);
        if (c1 != c2) {
            if (caseinsensitive) {
                // turn both lowercase and compare again:
                if (c1 < 127 && c2 < 127) {  // manual ascii case insensitive
                    char bytec1 = (char)c1;
                    char bytec2 = (char)c2;
                    if (((bytec1 >= 'a' && bytec1 <= 'z') ||
                            (bytec1 >= 'A' && bytec1 <= 'Z')) &&
                            ((bytec2 >= 'a' && bytec2 <= 'z') ||
                            (bytec2 >= 'A' && bytec2 <= 'Z'))) {
                        // turn them into uppercase
                        if (bytec1 >= 'a' && bytec1 <= 'z') {
                            bytec1 = bytec1 - ('a' - 'A');
                        }
                        if (bytec2 >= 'a' && bytec2 <= 'z') {
                            bytec2 = bytec2 - ('a' - 'A');
                        }
                        // compare with case insensitive:
                        if (bytec1 == bytec2) {
                            continue;
                        } else {
                            return ((int)bytec1) - ((int)bytec2);
                        }
                    }
                }
            }
            if (genericwhitespace) {
                // see if both are whitespace and try again:
                if (utf8_codepoint_is_whitespace(c1) &&
                        utf8_codepoint_is_whitespace(c2)) {
                    continue;
                }
            }
            // not equal. return difference:
            return ((int)c1) - ((int)c2);
        }
    }
    if (i1 < (int)buf1size) {
        U8_NEXT(buf1, i1, buf1size, c1);
        return c1;
    }
    if (i2 < (int)buf2size) {
        U8_NEXT(buf2, i2, buf2size, c2);
        return -((int)c2);
    }
    return 0;
}
开发者ID:JonasT,项目名称:TaskWarlord,代码行数:58,代码来源:utf8.c

示例3: _caseMap

/*
 * Case-maps [srcStart..srcLimit[ but takes
 * context [0..srcLength[ into account.
 */
static void
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
         const uint8_t *src, UCaseContext *csc,
         int32_t srcStart, int32_t srcLimit,
         icu::ByteSink &sink, icu::Edits *edits,
         UErrorCode &errorCode) {
    /* case mapping loop */
    int32_t srcIndex=srcStart;
    while (U_SUCCESS(errorCode) && srcIndex<srcLimit) {
        int32_t cpStart;
        csc->cpStart=cpStart=srcIndex;
        UChar32 c;
        U8_NEXT(src, srcIndex, srcLimit, c);
        csc->cpLimit=srcIndex;
        if(c<0) {
            // Malformed UTF-8.
            ByteSinkUtil::appendUnchanged(src+cpStart, srcIndex-cpStart,
                                          sink, options, edits, errorCode);
        } else {
            const UChar *s;
            c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
            appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
        }
    }
}
开发者ID:MIPS,项目名称:external-icu,代码行数:29,代码来源:ucasemap.cpp

示例4: convert_cp

static inline int convert_cp(UChar32* pcp, zval *zcp) {
	zend_long cp = -1;

	if (Z_TYPE_P(zcp) == IS_LONG) {
		cp = Z_LVAL_P(zcp);
	} else if (Z_TYPE_P(zcp) == IS_STRING) {
		int32_t i = 0;
		size_t zcp_len = Z_STRLEN_P(zcp);

		if (ZEND_SIZE_T_INT_OVFL(zcp_len)) {
			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
			intl_error_set_custom_msg(NULL, "Input string is too long.", 0);
			return FAILURE;
		}

		U8_NEXT(Z_STRVAL_P(zcp), i, zcp_len, cp);
		if ((size_t)i != zcp_len) {
			intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
			intl_error_set_custom_msg(NULL, "Passing a UTF-8 character for codepoint requires a string which is exactly one UTF-8 codepoint long.", 0);
			return FAILURE;
		}
	} else {
		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
		intl_error_set_custom_msg(NULL, "Invalid parameter for unicode point.  Must be either integer or UTF-8 sequence.", 0);
		return FAILURE;
	}
	if ((cp < UCHAR_MIN_VALUE) || (cp > UCHAR_MAX_VALUE)) {
		intl_error_set_code(NULL, U_ILLEGAL_ARGUMENT_ERROR);
		intl_error_set_custom_msg(NULL, "Codepoint out of range", 0);
		return FAILURE;
	}
	*pcp = (UChar32)cp;
	return SUCCESS;
}
开发者ID:AllenJB,项目名称:php-src,代码行数:34,代码来源:uchar.c

示例5: TestNextPrevNonCharacters

static void TestNextPrevNonCharacters() {
    /* test non-characters */
    static const uint8_t nonChars[]={
        0xef, 0xb7, 0x90,       /* U+fdd0 */
        0xef, 0xbf, 0xbf,       /* U+feff */
        0xf0, 0x9f, 0xbf, 0xbe, /* U+1fffe */
        0xf0, 0xbf, 0xbf, 0xbf, /* U+3ffff */
        0xf4, 0x8f, 0xbf, 0xbe  /* U+10fffe */
    };

    UChar32 ch;
    int32_t idx;

    for(idx=0; idx<(int32_t)sizeof(nonChars);) {
        U8_NEXT(nonChars, idx, sizeof(nonChars), ch);
        if(!U_IS_UNICODE_NONCHAR(ch)) {
            log_err("U8_NEXT(before %d) failed to read a non-character\n", idx);
        }
    }
    for(idx=(int32_t)sizeof(nonChars); idx>0;) {
        U8_PREV(nonChars, 0, idx, ch);
        if(!U_IS_UNICODE_NONCHAR(ch)) {
            log_err("U8_PREV(at %d) failed to read a non-character\n", idx);
        }
    }
}
开发者ID:00zhengfu00,项目名称:third_party,代码行数:26,代码来源:utf8tst.c

示例6: _caseMap

/*
 * Case-maps [srcStart..srcLimit[ but takes
 * context [0..srcLength[ into account.
 */
static int32_t
_caseMap(const UCaseMap *csm, UCaseMapFull *map,
         uint8_t *dest, int32_t destCapacity,
         const uint8_t *src, UCaseContext *csc,
         int32_t srcStart, int32_t srcLimit,
         UErrorCode *pErrorCode) {
    const UChar *s;
    UChar32 c;
    int32_t srcIndex, destIndex;
    int32_t locCache;

    locCache=csm->locCache;

    /* case mapping loop */
    srcIndex=srcStart;
    destIndex=0;
    while(srcIndex<srcLimit) {
        csc->cpStart=srcIndex;
        U8_NEXT(src, srcIndex, srcLimit, c);
        csc->cpLimit=srcIndex;
        c=map(csm->csp, c, utf8_caseContextIterator, csc, &s, csm->locale, &locCache);
        destIndex=appendResult(dest, destIndex, destCapacity, c, s);
    }

    if(destIndex>destCapacity) {
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
    }
    return destIndex;
}
开发者ID:mason105,项目名称:red5cpp,代码行数:33,代码来源:ucasemap.c

示例7: utf8_caseContextIterator

static UChar32 U_CALLCONV
utf8_caseContextIterator(void *context, int8_t dir) {
    UCaseContext *csc=(UCaseContext *)context;
    UChar32 c;

    if(dir<0) {
        /* reset for backward iteration */
        csc->index=csc->cpStart;
        csc->dir=dir;
    } else if(dir>0) {
        /* reset for forward iteration */
        csc->index=csc->cpLimit;
        csc->dir=dir;
    } else {
        /* continue current iteration direction */
        dir=csc->dir;
    }

    if(dir<0) {
        if(csc->start<csc->index) {
            U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
            return c;
        }
    } else {
        if(csc->index<csc->limit) {
            U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
            return c;
        }
    }
    return U_SENTINEL;
}
开发者ID:CucasLoon,项目名称:in-the-box,代码行数:31,代码来源:ucasemap.c

示例8: stri_subset_charclass

/**
 * Detect if a character class occurs in a string
 *
 * @param str character vector
 * @param pattern character vector
 * @param omit_na single logical value
 * @return logical vector
 *
 * @version 0.3-1 (Bartek Tartanus, 2014-07-25)
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-10-17)
 *                using std::vector<int> to avoid mem-leaks
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 *
 * @version 0.4-1 (Marek Gagolewski, 2014-12-04)
 *    FR #122: omit_na arg added
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *    FR #216: `negate` arg added
 */
SEXP stri_subset_charclass(SEXP str, SEXP pattern, SEXP omit_na, SEXP negate)
{
   bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate");
   bool omit_na1 = stri__prepare_arg_logical_1_notNA(omit_na, "omit_na");
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   PROTECT(pattern = stri_prepare_arg_string(pattern, "pattern"));
   R_len_t vectorize_length =
      stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));

   STRI__ERROR_HANDLER_BEGIN(2)
   StriContainerUTF8 str_cont(str, vectorize_length);
   StriContainerCharClass pattern_cont(pattern, vectorize_length);

   // BT: this cannot be done with deque, because pattern is reused so i does not
   // go like 0,1,2...n but 0,pat_len,2*pat_len,1,pat_len+1 and so on
   // MG: agreed
   std::vector<int> which(vectorize_length);
   int result_counter = 0;

   for (R_len_t i = pattern_cont.vectorize_init();
         i != pattern_cont.vectorize_end();
         i = pattern_cont.vectorize_next(i))
   {
      if (str_cont.isNA(i) || pattern_cont.isNA(i)) {
         if (omit_na1) which[i] = FALSE;
         else {
            which[i] = NA_LOGICAL;
            result_counter++;
         }
         continue;
      }

      const UnicodeSet* pattern_cur = &pattern_cont.get(i);
      R_len_t     str_cur_n = str_cont.get(i).length();
      const char* str_cur_s = str_cont.get(i).c_str();

      UChar32 chr = 0;
      which[i] = FALSE;
      for (R_len_t j=0; j<str_cur_n; ) {
         U8_NEXT(str_cur_s, j, str_cur_n, chr);
         if (chr < 0) // invalid utf-8 sequence
            throw StriException(MSG__INVALID_UTF8);
         if (pattern_cur->contains(chr)) {
            which[i] = TRUE;
            break;
         }
      }
      if (negate_1) which[i] = !which[i];
      if (which[i]) result_counter++;
   }

   SEXP ret;
   STRI__PROTECT(ret = stri__subset_by_logical(str_cont, which, result_counter));
   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
开发者ID:GuillaumeSmaha,项目名称:stringi,代码行数:79,代码来源:stri_search_class_subset.cpp

示例9: stri_enc_toutf32

/** Convert character vector to UTF-32
 *
 * @param str character vector
 * @return list with integer vectors
 *
 * @version 0.1-?? (Marek Gagolewski)
 *
 * @version 0.1-?? (Marek Gagolewski, 2013-06-16)
 *          make StriException-friendly
 *
 * @version 0.2-1 (Marek Gagolewski, 2014-03-26)
 *          use vector<UChar32> buf instead of R_alloc;
 *          warn and set NULL on improper UTF-8 byte sequences
 *
 * @version 0.2-3 (Marek Gagolewski, 2014-05-12)
 *          Use UChar32* instead of vector<UChar32> as ::data is C++11
 *
 * @version 0.3-1 (Marek Gagolewski, 2014-11-04)
 *    Issue #112: str_prepare_arg* retvals were not PROTECTed from gc
 */
SEXP stri_enc_toutf32(SEXP str)
{
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   R_len_t n = LENGTH(str);

   STRI__ERROR_HANDLER_BEGIN(1)
   StriContainerUTF8 str_cont(str, n);

   R_len_t bufsize = 1; // to avoid allocating an empty buffer
   for (R_len_t i=0; i<n; ++i) {
      if (str_cont.isNA(i)) continue;
      R_len_t ni = str_cont.get(i).length();
      if (ni > bufsize) bufsize = ni;
   }

   UChar32* buf = (UChar32*)R_alloc((size_t)bufsize, (int)sizeof(UChar32)); // at most bufsize UChars32 (bufsize/4 min.)
   if (!buf) throw StriException(MSG__MEM_ALLOC_ERROR);
   // deque<UChar32> was slower than using a common, over-sized buf

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(VECSXP, n)); // all

   for (R_len_t i=0; i<n; ++i) {

      if (str_cont.isNA(i)) {
         SET_VECTOR_ELT(ret, i, R_NilValue);
         continue;
      }

      UChar32 c = (UChar32)0;
      const char* s = str_cont.get(i).c_str();
      R_len_t sn = str_cont.get(i).length();
      R_len_t j = 0;
      R_len_t k = 0;
      while (c >= 0 && j < sn) {
         U8_NEXT(s, j, sn, c);
         buf[k++] = (int)c;
      }

      if (c < 0) {
         Rf_warning(MSG__INVALID_UTF8);
         SET_VECTOR_ELT(ret, i, R_NilValue);
         continue;
      }
      else {
         SEXP conv;
         STRI__PROTECT(conv = Rf_allocVector(INTSXP, k));
         memcpy(INTEGER(conv), buf, (size_t)sizeof(int)*k);
         SET_VECTOR_ELT(ret, i, conv);
         STRI__UNPROTECT(1);
      }
   }

   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END({ /* do nothing on error */ })
}
开发者ID:GuillaumeSmaha,项目名称:stringi,代码行数:77,代码来源:stri_encoding_conversion.cpp

示例10: stri_subset_charclass_replacement

/**
 * Substitutes vector elements if a pattern occurs in a string
 *
 * @param str character vector
 * @param pattern character vector
 * @param value character vector
 * @return character vector
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *   FR#124
 *
 * @version 1.0-3 (Marek Gagolewski, 2016-02-03)
 *    FR #216: `negate` arg added
 */
SEXP stri_subset_charclass_replacement(SEXP str, SEXP pattern, SEXP negate, SEXP value)
{
   bool negate_1 = stri__prepare_arg_logical_1_notNA(negate, "negate");
   PROTECT(str = stri_prepare_arg_string(str, "str"));
   PROTECT(pattern = stri_prepare_arg_string_1(pattern, "pattern"));
   PROTECT(value = stri_prepare_arg_string(value, "value"));

   int vectorize_length = LENGTH(str);
   int value_length = LENGTH(value);
   if (value_length == 0)
      Rf_error(MSG__REPLACEMENT_ZERO);

   STRI__ERROR_HANDLER_BEGIN(3)
   StriContainerUTF8 str_cont(str, vectorize_length);
   StriContainerUTF8 value_cont(value, value_length);
   StriContainerCharClass pattern_cont(pattern, vectorize_length);

   SEXP ret;
   STRI__PROTECT(ret = Rf_allocVector(STRSXP, vectorize_length));

   R_len_t k = 0;
   for (R_len_t i = str_cont.vectorize_init();
         i != str_cont.vectorize_end();
         i = str_cont.vectorize_next(i))
   {
      if (str_cont.isNA(i) || pattern_cont.isNA(i)) {
         SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }

      const UnicodeSet* pattern_cur = &pattern_cont.get(i);
      R_len_t     str_cur_n = str_cont.get(i).length();
      const char* str_cur_s = str_cont.get(i).c_str();

      UChar32 chr = 0;
      bool found = false;
      for (R_len_t j=0; j<str_cur_n; ) {
         U8_NEXT(str_cur_s, j, str_cur_n, chr);
         if (chr < 0) // invalid utf-8 sequence
            throw StriException(MSG__INVALID_UTF8);
         if (pattern_cur->contains(chr)) {
            found = true;
            break;
         }
      }

      if ((found && !negate_1) || (!found && negate_1))
         SET_STRING_ELT(ret, i, value_cont.toR((k++)%value_length));
      else
         SET_STRING_ELT(ret, i, str_cont.toR(i));
   }

   STRI__UNPROTECT_ALL
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
开发者ID:GuillaumeSmaha,项目名称:stringi,代码行数:70,代码来源:stri_search_class_subset.cpp

示例11: getResultsManually

static UBool *
getResultsManually(const char** encodings, int32_t num_encodings,
                   const char *utf8, int32_t length,
                   const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
  UBool* resultsManually;
  int32_t i;

  resultsManually = (UBool*) uprv_malloc(gCountAvailable);
  uprv_memset(resultsManually, 0, gCountAvailable);

  for(i = 0 ; i < num_encodings ; i++) {
    UErrorCode status = U_ZERO_ERROR;
    /* get unicode set for that converter */
    USet* set;
    UConverter* test_converter;
    UChar32 cp;
    int32_t encIndex, offset;

    set = uset_openEmpty();
    test_converter = ucnv_open(encodings[i], &status);
    ucnv_getUnicodeSet(test_converter, set,
                       whichSet, &status);
    if (excludedCodePoints != NULL) {
      uset_addAll(set, excludedCodePoints);
    }
    uset_freeze(set);
    offset = 0;
    cp = 0;

    encIndex = findIndex(encodings[i]);
    /*
     * The following is almost, but not entirely, the same as
     * resultsManually[encIndex] =
     *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
     * They might be different if the set contains strings,
     * or if the utf8 string contains an illegal sequence.
     *
     * The UConverterSelector does not currently handle strings that can be
     * converted, and it treats an illegal sequence as convertible
     * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
     */
    resultsManually[encIndex] = TRUE;
    while(offset<length) {
      U8_NEXT(utf8, offset, length, cp);
      if (cp >= 0 && !uset_contains(set, cp)) {
        resultsManually[encIndex] = FALSE;
        break;
      }
    }
    uset_close(set);
    ucnv_close(test_converter);
  }
  return resultsManually;
}
开发者ID:winlibs,项目名称:icu4c,代码行数:54,代码来源:ucnvseltst.c

示例12: utf8_length

static int utf8_length(const unsigned char *buf, size_t bufsize) {
    UChar32 c;
    size_t length = 0;
    int i = 0;
    while (i < (int)bufsize) {
        U8_NEXT(buf, i, bufsize, c);
        assert(c >= 0);
        length++;
    }
    return length;
}
开发者ID:JonasT,项目名称:TaskWarlord,代码行数:11,代码来源:utf8.c

示例13: stri__extract_firstlast_charclass

/**
 * Extract first or last occurences of a character class in each string
 *
 * @param str character vector
 * @param pattern character vector
 * @return character vector
 *
 * @version 0.1 (Marek Gagolewski, 2013-06-08)
 * @version 0.2 (Marek Gagolewski, 2013-06-15) Use StrContainerCharClass
 * @version 0.3 (Marek Gagolewski, 2013-06-16) make StriException-friendly
 */
SEXP stri__extract_firstlast_charclass(SEXP str, SEXP pattern, bool first)
{
   str = stri_prepare_arg_string(str, "str");
   pattern = stri_prepare_arg_string(pattern, "pattern");
   R_len_t vectorize_length = stri__recycling_rule(true, 2, LENGTH(str), LENGTH(pattern));

   STRI__ERROR_HANDLER_BEGIN
   StriContainerUTF8 str_cont(str, vectorize_length);
   StriContainerCharClass pattern_cont(pattern, vectorize_length);

   SEXP ret;
   PROTECT(ret = Rf_allocVector(STRSXP, vectorize_length));

   for (R_len_t i = pattern_cont.vectorize_init();
         i != pattern_cont.vectorize_end();
         i = pattern_cont.vectorize_next(i))
   {
      SET_STRING_ELT(ret, i, NA_STRING);

      if (str_cont.isNA(i) || pattern_cont.isNA(i))
         continue;

      CharClass pattern_cur = pattern_cont.get(i);
      R_len_t     str_cur_n = str_cont.get(i).length();
      const char* str_cur_s = str_cont.get(i).c_str();
      R_len_t j, jlast;
      UChar32 chr;

      if (first) {
         for (jlast=j=0; j<str_cur_n; ) {
            U8_NEXT(str_cur_s, j, str_cur_n, chr);
            if (pattern_cur.test(chr)) {
               SET_STRING_ELT(ret, i, Rf_mkCharLenCE(str_cur_s+jlast, j-jlast, CE_UTF8));
               break; // that's enough for first
            }
            jlast = j;
         }
      }
      else {
         for (jlast=j=str_cur_n; j>0; ) {
            U8_PREV(str_cur_s, 0, j, chr); // go backwards
            if (pattern_cur.test(chr)) {
               SET_STRING_ELT(ret, i, Rf_mkCharLenCE(str_cur_s+j, jlast-j, CE_UTF8));
               break; // that's enough for last
            }
            jlast = j;
         }
      }
   }

   UNPROTECT(1);
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
开发者ID:rforge,项目名称:stringi,代码行数:65,代码来源:stri_search_class_extract.cpp

示例14: stri_enc_toascii

/** Convert character vector to ASCII
 *
 * All charcodes > 127 are replaced with subst chars (0x1A)
 *
 * @param str character vector
 * @return character vector
 *
 * @version 0.1 (Marek Gagolewski)
 * @version 0.2 (Marek Gagolewski, 2013-06-16) make StriException-friendly
 */
SEXP stri_enc_toascii(SEXP str)
{
   str = stri_prepare_arg_string(str, "str");
   R_len_t n = LENGTH(str);

   STRI__ERROR_HANDLER_BEGIN
   SEXP ret;
   PROTECT(ret = Rf_allocVector(STRSXP, n));
   for (R_len_t i=0; i<n; ++i) {
      SEXP curs = STRING_ELT(str, i);
      if (curs == NA_STRING) {
         SET_STRING_ELT(ret, i, NA_STRING);
         continue;
      }
      else if (IS_ASCII(curs)) {
         SET_STRING_ELT(ret, i, curs);
      }
      else if (IS_UTF8(curs)) {
         R_len_t curn = LENGTH(curs);
         const char* curs_tab = CHAR(curs);
         // TODO: buffer reuse....
         String8 buf(curn+1); // this may be 4 times too much
         R_len_t k = 0;
         UChar32 c;
         for (int j=0; j<curn; ) {
            U8_NEXT(curs_tab, j, curn, c);
            if (c > ASCII_MAXCHARCODE)
               buf.data()[k++] = ASCII_SUBSTITUTE;
            else
               buf.data()[k++] = (char)c;
         }
         SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), k, CE_UTF8)); // will be marked as ASCII anyway by mkCharLenCE
      }
      else { // some 8-bit encoding
         R_len_t curn = LENGTH(curs);
         const char* curs_tab = CHAR(curs);
         // TODO: buffer reuse....
         String8 buf(curn+1);
         R_len_t k = 0;
         for (R_len_t j=0; j<curn; ++j) {
            if (U8_IS_SINGLE(curs_tab[j]))
               buf.data()[k++] = curs_tab[j];
            else {
               buf.data()[k++] = (char)ASCII_SUBSTITUTE; // subst char in ascii
            }
         }
         SET_STRING_ELT(ret, i, Rf_mkCharLenCE(buf.data(), k, CE_UTF8)); // will be marked as ASCII anyway by mkCharLenCE
      }
   }
   UNPROTECT(1);
   return ret;
   STRI__ERROR_HANDLER_END(;/* nothing special to be done on error */)
}
开发者ID:rforge,项目名称:stringi,代码行数:63,代码来源:stri_encoding_conversion.cpp

示例15: utf8_iterate_codepoints

static int utf8_iterate_codepoints(const unsigned char *buf, size_t bufsize,
        int (*do_something)(UChar32 c)) {
    UChar32 c;
    int i = 0;
    while (i < (int)bufsize) {
        U8_NEXT(buf, i, bufsize, c);
        assert(c >= 0);
        if (!do_something(c)) {
            return 0;
        }
    }
    return 1;
}
开发者ID:JonasT,项目名称:TaskWarlord,代码行数:13,代码来源:utf8.c


注:本文中的U8_NEXT函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。