当前位置: 首页>>代码示例>>Java>>正文


Java UTF16.getCharCount方法代码示例

本文整理汇总了Java中com.ibm.icu.text.UTF16.getCharCount方法的典型用法代码示例。如果您正苦于以下问题:Java UTF16.getCharCount方法的具体用法?Java UTF16.getCharCount怎么用?Java UTF16.getCharCount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在com.ibm.icu.text.UTF16的用法示例。


在下文中一共展示了UTF16.getCharCount方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: escape

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Convert characters outside the range U+0020 to U+007F to
 * Unicode escapes, and convert backslash to a double backslash.
 */
public static final String escape(String s) {
    StringBuilder buf = new StringBuilder();
    for (int i=0; i<s.length(); ) {
        int c = Character.codePointAt(s, i);
        i += UTF16.getCharCount(c);
        if (c >= ' ' && c <= 0x007F) {
            if (c == '\\') {
                buf.append("\\\\"); // That is, "\\"
            } else {
                buf.append((char)c);
            }
        } else {
            boolean four = c <= 0xFFFF;
            buf.append(four ? "\\u" : "\\U");
            buf.append(hex(c, four ? 4 : 8));
        }
    }
    return buf.toString();
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:24,代码来源:Utility.java

示例2: hex

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Convert a string to separated groups of hex uppercase
 * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
 * to the given Appendable.
 */
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
    try {
        if (useCodePoints) {
            int cp;
            for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                cp = Character.codePointAt(s, i);
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(cp,width));
            }
        } else {
            for (int i = 0; i < s.length(); ++i) {
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(s.charAt(i),width));
            }
        }
        return result;
    } catch (IOException e) {
        throw new IllegalIcuArgumentException(e);
    }
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:30,代码来源:Utility.java

示例3: parseUnicodeIdentifier

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or null if there is no
 * identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos[0] < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or null if there is no valid
 * identifier at pos[0].
 */
public static String parseUnicodeIdentifier(String str, int[] pos) {
    // assert(pos[0] < str.length());
    StringBuilder buf = new StringBuilder();
    int p = pos[0];
    while (p < str.length()) {
        int ch = Character.codePointAt(str, p);
        if (buf.length() == 0) {
            if (UCharacter.isUnicodeIdentifierStart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                return null;
            }
        } else {
            if (UCharacter.isUnicodeIdentifierPart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                break;
            }
        }
        p += UTF16.getCharCount(ch);
    }
    pos[0] = p;
    return buf.toString();
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:38,代码来源:Utility.java

示例4: calcStatus

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}
 
开发者ID:jprante,项目名称:elasticsearch-icu,代码行数:18,代码来源:BreakIteratorWrapper.java

示例5: calcStatus

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
  if (current == BreakIterator.DONE || next == BreakIterator.DONE)
    return RuleBasedBreakIterator.WORD_NONE;

  int begin = start + current;
  int end = start + next;

  int codepoint;
  for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
    codepoint = UTF16.charAt(text, 0, end, begin);

    if (UCharacter.isDigit(codepoint))
      return RuleBasedBreakIterator.WORD_NUMBER;
    else if (UCharacter.isLetter(codepoint)) {
      // TODO: try to separately specify ideographic, kana? 
      // [currently all bundled as letter for this case]
      return RuleBasedBreakIterator.WORD_LETTER;
    }
  }

  return RuleBasedBreakIterator.WORD_NONE;
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:BreakIteratorWrapper.java

示例6: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 *
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
    if (scriptLimit >= limit) {
        return false;
    }
    scriptCode = UScript.COMMON;
    scriptStart = scriptLimit;
    while (index < limit) {
        final int ch = UTF16.charAt(text, start, limit, index - start);
        final int sc = getScript(ch);
  /*
   * From UTR #24: Implementations that determine the boundaries between
   * characters of given scripts should never break between a non-spacing
   * mark and its base character. Thus for boundary determinations and
   * similar sorts of processing, a non-spacing mark — whatever its script
   * value — should inherit the script value of its base character.
   */
        if (isSameScript(scriptCode, sc)
                || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
            index += UTF16.getCharCount(ch);
    /*
     * Inherited or Common becomes the script code of the surrounding text.
     */
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;
            }
        } else {
            break;
        }
    }
    scriptLimit = index;
    return true;
}
 
开发者ID:jprante,项目名称:elasticsearch-icu,代码行数:38,代码来源:ScriptIterator.java

示例7: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 * 
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
  if (scriptLimit >= limit)
    return false;

  scriptCode = UScript.COMMON;
  scriptStart = scriptLimit;

  while (index < limit) {
    final int ch = UTF16.charAt(text, start, limit, index - start);
    final int sc = getScript(ch);

    /*
     * From UTR #24: Implementations that determine the boundaries between
     * characters of given scripts should never break between a non-spacing
     * mark and its base character. Thus for boundary determinations and
     * similar sorts of processing, a non-spacing mark — whatever its script
     * value — should inherit the script value of its base character.
     */
    if (isSameScript(scriptCode, sc)
        || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
      index += UTF16.getCharCount(ch);

      /*
       * Inherited or Common becomes the script code of the surrounding text.
       */
      if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
        scriptCode = sc;
      }

    } else {
      break;
    }
  }

  scriptLimit = index;
  return true;
}
 
开发者ID:europeana,项目名称:search,代码行数:43,代码来源:ScriptIterator.java

示例8: isWord

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
protected boolean isWord(char text[], int start, int end) {
  int codepoint;
  for (int i = start; i < end; i += UTF16.getCharCount(codepoint)) {
    codepoint = UTF16.charAt(text, 0, end, start);

    if (UCharacter.isLetterOrDigit(codepoint))
      return true;
    }

  return false;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:12,代码来源:TestLaoBreakIterator.java

示例9: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 *
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
    if (scriptLimit >= limit) {
        return false;
    }
    scriptCode = UScript.COMMON;
    scriptStart = scriptLimit;
    while (index < limit) {
        final int ch = UTF16.charAt(text, start, limit, index - start);
        final int sc = getScript(ch);
        /*
         * From UTR #24: Implementations that determine the boundaries between
         * characters of given scripts should never break between a non-spacing
         * mark and its base character. Thus for boundary determinations and
         * similar sorts of processing, a non-spacing mark — whatever its script
         * value — should inherit the script value of its base character.
         */
        if (isSameScript(scriptCode, sc)
                || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
            index += UTF16.getCharCount(ch);
            /*
             * Inherited or Common becomes the script code of the surrounding text.
             */
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;
            }
        } else {
            break;
        }
    }
    scriptLimit = index;
    return true;
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:38,代码来源:ScriptIterator.java

示例10: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Find the next script run. Returns <code>false</code> if there
 * isn't another run, returns <code>true</code> if there is.
 *
 * @return <code>false</code> if there isn't another run, <code>true</code> if there is.
 *
 * @internal
 * @deprecated This API is ICU internal only.
 */
@Deprecated
public final boolean next()
{
    // if we've fallen off the end of the text, we're done
    if (scriptLimit >= textLimit) {
        return false;
    }

    scriptCode  = UScript.COMMON;
    scriptStart = scriptLimit;
    
    syncFixup();
    
    while (textIndex < textLimit) {
        int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart);
        int codePointCount = UTF16.getCharCount(ch);
        int sc = UScript.getScript(ch);
        int pairIndex = getPairIndex(ch);

        textIndex += codePointCount;
        
        // Paired character handling:
        //
        // if it's an open character, push it onto the stack.
        // if it's a close character, find the matching open on the
        // stack, and use that script code. Any non-matching open
        // characters above it on the stack will be poped.
        if (pairIndex >= 0) {
            if ((pairIndex & 1) == 0) {
                push(pairIndex, scriptCode);
            } else {
                int pi = pairIndex & ~1;

                while (stackIsNotEmpty() && top().pairIndex != pi) {
                    pop();
                }

                if (stackIsNotEmpty()) {
                    sc = top().scriptCode;
                }
            }
        }

        if (sameScript(scriptCode, sc)) {
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;

                fixup(scriptCode);
            }

            // if this character is a close paired character,
            // pop the matching open character from the stack
            if (pairIndex >= 0 && (pairIndex & 1) != 0) {
                pop();
            }
        } else {
            // We've just seen the first character of
            // the next run. Back over it so we'll see
            // it again the next time.
            textIndex -= codePointCount;
            break;
        }
    }

    scriptLimit = textIndex;
    return true;
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:77,代码来源:UScriptRun.java

示例11: quoteLiteral

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
 * @param string String passed to quote a literal string.
 * @return A string using the available settings will place syntax, quote, or ignorable characters into quotes.
 */
public String quoteLiteral(String string) {
    if (needingQuoteCharacters == null) {
        needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters)
        if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
        if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
    }
    StringBuffer result = new StringBuffer();
    int quotedChar = NO_QUOTE;
    int cp;
    for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
        cp = UTF16.charAt(string, i);
        if (escapeCharacters.contains(cp)) {
            // we may have to fix up previous characters
            if (quotedChar == IN_QUOTE) {
                result.append(SINGLE_QUOTE);
                quotedChar = NO_QUOTE;
            }
            appendEscaped(result, cp);
            continue;
        }
        
        if (needingQuoteCharacters.contains(cp)) {
            // if we have already started a quote
            if (quotedChar == IN_QUOTE) {
                UTF16.append(result, cp);
                if (usingQuote && cp == SINGLE_QUOTE) { // double it
                    result.append(SINGLE_QUOTE);
                }
                continue;
            }
            // otherwise not already in quote
            if (usingSlash) {
                result.append(BACK_SLASH);
                UTF16.append(result, cp);
                continue;
            }
            if (usingQuote) {
                if (cp == SINGLE_QUOTE) { // double it and continue
                    result.append(SINGLE_QUOTE);
                    result.append(SINGLE_QUOTE);
                    continue;
                }
                result.append(SINGLE_QUOTE);
                UTF16.append(result, cp);
                quotedChar = IN_QUOTE;
                continue;
            }
            // we have no choice but to use \\u or \\U
            appendEscaped(result, cp);
            continue;
        }
        // otherwise cp doesn't need quoting
        // we may have to fix up previous characters
        if (quotedChar == IN_QUOTE) {
            result.append(SINGLE_QUOTE);
            quotedChar = NO_QUOTE;
        }
        UTF16.append(result, cp);
    }
    // all done. 
    // we may have to fix up previous characters
    if (quotedChar == IN_QUOTE) {
        result.append(SINGLE_QUOTE);
    }
    return result.toString();
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:72,代码来源:PatternTokenizer.java

示例12: addStringCaseClosure

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Maps the string to single code points and adds the associated case closure
 * mappings.
 * The string is mapped to code points if it is their full case folding string.
 * In other words, this performs a reverse full case folding and then
 * adds the case closure items of the resulting code points.
 * If the string is found and its closure applied, then
 * the string itself is added as well as part of its code points' closure.
 *
 * @return true if the string was found
 */
public final boolean addStringCaseClosure(String s, UnicodeSet set) {
    int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;

    if(unfold==null || s==null) {
        return false; /* no reverse case folding data, or no string */
    }
    length=s.length();
    if(length<=1) {
        /* the string is too short to find any match */
        /*
         * more precise would be:
         * if(!u_strHasMoreChar32Than(s, length, 1))
         * but this does not make much practical difference because
         * a single supplementary code point would just not be found
         */
        return false;
    }

    unfoldRows=unfold[UNFOLD_ROWS];
    unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
    unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
    //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;

    if(length>unfoldStringWidth) {
        /* the string is too long to find any match */
        return false;
    }

    /* do a binary search for the string */
    start=0;
    limit=unfoldRows;
    while(start<limit) {
        i=(start+limit)/2;
        unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
        result=strcmpMax(s, unfoldOffset, unfoldStringWidth);

        if(result==0) {
            /* found the string: add each code point, and its case closure */
            int c;

            for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
                c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
                set.add(c);
                addCaseClosure(c, set);
            }
            return true;
        } else if(result<0) {
            limit=i;
        } else /* result>0 */ {
            start=i+1;
        }
    }

    return false; /* string not found */
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:67,代码来源:UCaseProps.java

示例13: parsePattern

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Parse a pattern string within the given Replaceable and a parsing
 * pattern.  Characters are matched literally and case-sensitively
 * except for the following special characters:
 *
 * ~  zero or more Pattern_White_Space chars
 *
 * If end of pattern is reached with all matches along the way,
 * pos is advanced to the first unparsed index and returned.
 * Otherwise -1 is returned.
 * @param pat pattern that controls parsing
 * @param text text to be parsed, starting at index
 * @param index offset to first character to parse
 * @param limit offset after last character to parse
 * @return index after last parsed character, or -1 on parse failure.
 */
public static int parsePattern(String pat,
        Replaceable text,
        int index,
        int limit) {
    int ipat = 0;

    // empty pattern matches immediately
    if (ipat == pat.length()) {
        return index;
    }

    int cpat = Character.codePointAt(pat, ipat);

    while (index < limit) {
        int c = text.char32At(index);

        // parse \s*
        if (cpat == '~') {
            if (PatternProps.isWhiteSpace(c)) {
                index += UTF16.getCharCount(c);
                continue;
            } else {
                if (++ipat == pat.length()) {
                    return index; // success; c unparsed
                }
                // fall thru; process c again with next cpat
            }
        }

        // parse literal
        else if (c == cpat) {
            int n = UTF16.getCharCount(c);
            index += n;
            ipat += n;
            if (ipat == pat.length()) {
                return index; // success; c parsed
            }
            // fall thru; get next cpat
        }

        // match failure of literal
        else {
            return -1;
        }

        cpat = UTF16.charAt(pat, ipat);
    }

    return -1; // text ended before end of pat
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:67,代码来源:Utility.java


注:本文中的com.ibm.icu.text.UTF16.getCharCount方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。