当前位置: 首页>>代码示例>>Java>>正文


Java UTF16.charAt方法代码示例

本文整理汇总了Java中com.ibm.icu.text.UTF16.charAt方法的典型用法代码示例。如果您正苦于以下问题:Java UTF16.charAt方法的具体用法?Java UTF16.charAt怎么用?Java UTF16.charAt使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在com.ibm.icu.text.UTF16的用法示例。


在下文中一共展示了UTF16.charAt方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: calcStatus

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}
 
开发者ID:jprante,项目名称:elasticsearch-icu,代码行数:18,代码来源:BreakIteratorWrapper.java

示例2: calcStatus

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
  if (current == BreakIterator.DONE || next == BreakIterator.DONE)
    return RuleBasedBreakIterator.WORD_NONE;

  int begin = start + current;
  int end = start + next;

  int codepoint;
  for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
    codepoint = UTF16.charAt(text, 0, end, begin);

    if (UCharacter.isDigit(codepoint))
      return RuleBasedBreakIterator.WORD_NUMBER;
    else if (UCharacter.isLetter(codepoint)) {
      // TODO: try to separately specify ideographic, kana? 
      // [currently all bundled as letter for this case]
      return RuleBasedBreakIterator.WORD_LETTER;
    }
  }

  return RuleBasedBreakIterator.WORD_NONE;
}
 
开发者ID:europeana,项目名称:search,代码行数:23,代码来源:BreakIteratorWrapper.java

示例3: getNextDelimiter

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Gets the index of the next delimiter after offset
 * @param offset to the source string
 * @return offset of the immediate next delimiter, otherwise 
 *         (- source string length - 1) if there
 *         are no more delimiters after m_nextOffset
 */
private int getNextDelimiter(int offset)
{
    if (offset >= 0) {
        int result = offset; 
        int c = 0;
        if (delims == null) {
            do {
                c = UTF16.charAt(m_source_, result);
                if (m_delimiters_.contains(c)) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        } else {
            do {
                c = UTF16.charAt(m_source_, result);
                if (c < delims.length && delims[c]) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        }                
        if (result < m_length_) {
            return result;
        }
    }
    return -1 - m_length_;
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:36,代码来源:StringTokenizer.java

示例4: getNextNonDelimiter

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Gets the index of the next non-delimiter after m_nextOffset_
 * @param offset to the source string
 * @return offset of the immediate next non-delimiter, otherwise 
 *         (- source string length - 1) if there
 *         are no more delimiters after m_nextOffset
 */
private int getNextNonDelimiter(int offset)
{
    if (offset >= 0) {
        int result = offset; 
        int c = 0;
        if (delims == null) {
            do {
                c = UTF16.charAt(m_source_, result);
                if (!m_delimiters_.contains(c)) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        } else {
            do {
                c = UTF16.charAt(m_source_, result);
                if (!(c < delims.length && delims[c])) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        }
        if (result < m_length_) {
            return result;
        }
    }
    return -1 - m_length_;
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:36,代码来源:StringTokenizer.java

示例5: _current

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Returns the current 32-bit code point without parsing escapes, parsing
 * variables, or skipping whitespace.
 * @return the current 32-bit code point
 */
private int _current() {
    if (buf != null) {
        return UTF16.charAt(buf, 0, buf.length, bufPos);
    } else {
        int i = pos.getIndex();
        return (i < text.length()) ? UTF16.charAt(text, i) : DONE;
    }
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:14,代码来源:RuleCharacterIterator.java

示例6: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 *
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
    if (scriptLimit >= limit) {
        return false;
    }
    scriptCode = UScript.COMMON;
    scriptStart = scriptLimit;
    while (index < limit) {
        final int ch = UTF16.charAt(text, start, limit, index - start);
        final int sc = getScript(ch);
  /*
   * From UTR #24: Implementations that determine the boundaries between
   * characters of given scripts should never break between a non-spacing
   * mark and its base character. Thus for boundary determinations and
   * similar sorts of processing, a non-spacing mark — whatever its script
   * value — should inherit the script value of its base character.
   */
        if (isSameScript(scriptCode, sc)
                || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
            index += UTF16.getCharCount(ch);
    /*
     * Inherited or Common becomes the script code of the surrounding text.
     */
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;
            }
        } else {
            break;
        }
    }
    scriptLimit = index;
    return true;
}
 
开发者ID:jprante,项目名称:elasticsearch-icu,代码行数:38,代码来源:ScriptIterator.java

示例7: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 * 
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
  if (scriptLimit >= limit)
    return false;

  scriptCode = UScript.COMMON;
  scriptStart = scriptLimit;

  while (index < limit) {
    final int ch = UTF16.charAt(text, start, limit, index - start);
    final int sc = getScript(ch);

    /*
     * From UTR #24: Implementations that determine the boundaries between
     * characters of given scripts should never break between a non-spacing
     * mark and its base character. Thus for boundary determinations and
     * similar sorts of processing, a non-spacing mark — whatever its script
     * value — should inherit the script value of its base character.
     */
    if (isSameScript(scriptCode, sc)
        || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
      index += UTF16.getCharCount(ch);

      /*
       * Inherited or Common becomes the script code of the surrounding text.
       */
      if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
        scriptCode = sc;
      }

    } else {
      break;
    }
  }

  scriptLimit = index;
  return true;
}
 
开发者ID:europeana,项目名称:search,代码行数:43,代码来源:ScriptIterator.java

示例8: isWord

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
protected boolean isWord(char text[], int start, int end) {
  int codepoint;
  for (int i = start; i < end; i += UTF16.getCharCount(codepoint)) {
    codepoint = UTF16.charAt(text, 0, end, start);

    if (UCharacter.isLetterOrDigit(codepoint))
      return true;
    }

  return false;
}
 
开发者ID:pkarmstr,项目名称:NYBC,代码行数:12,代码来源:TestLaoBreakIterator.java

示例9: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Iterates to the next script run, returning true if one exists.
 *
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
    if (scriptLimit >= limit) {
        return false;
    }
    scriptCode = UScript.COMMON;
    scriptStart = scriptLimit;
    while (index < limit) {
        final int ch = UTF16.charAt(text, start, limit, index - start);
        final int sc = getScript(ch);
        /*
         * From UTR #24: Implementations that determine the boundaries between
         * characters of given scripts should never break between a non-spacing
         * mark and its base character. Thus for boundary determinations and
         * similar sorts of processing, a non-spacing mark — whatever its script
         * value — should inherit the script value of its base character.
         */
        if (isSameScript(scriptCode, sc)
                || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
            index += UTF16.getCharCount(ch);
            /*
             * Inherited or Common becomes the script code of the surrounding text.
             */
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;
            }
        } else {
            break;
        }
    }
    scriptLimit = index;
    return true;
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:38,代码来源:ScriptIterator.java

示例10: next

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Find the next script run. Returns <code>false</code> if there
 * isn't another run, returns <code>true</code> if there is.
 *
 * @return <code>false</code> if there isn't another run, <code>true</code> if there is.
 *
 * @internal
 * @deprecated This API is ICU internal only.
 */
@Deprecated
public final boolean next()
{
    // if we've fallen off the end of the text, we're done
    if (scriptLimit >= textLimit) {
        return false;
    }

    scriptCode  = UScript.COMMON;
    scriptStart = scriptLimit;
    
    syncFixup();
    
    while (textIndex < textLimit) {
        int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart);
        int codePointCount = UTF16.getCharCount(ch);
        int sc = UScript.getScript(ch);
        int pairIndex = getPairIndex(ch);

        textIndex += codePointCount;
        
        // Paired character handling:
        //
        // if it's an open character, push it onto the stack.
        // if it's a close character, find the matching open on the
        // stack, and use that script code. Any non-matching open
        // characters above it on the stack will be poped.
        if (pairIndex >= 0) {
            if ((pairIndex & 1) == 0) {
                push(pairIndex, scriptCode);
            } else {
                int pi = pairIndex & ~1;

                while (stackIsNotEmpty() && top().pairIndex != pi) {
                    pop();
                }

                if (stackIsNotEmpty()) {
                    sc = top().scriptCode;
                }
            }
        }

        if (sameScript(scriptCode, sc)) {
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;

                fixup(scriptCode);
            }

            // if this character is a close paired character,
            // pop the matching open character from the stack
            if (pairIndex >= 0 && (pairIndex & 1) != 0) {
                pop();
            }
        } else {
            // We've just seen the first character of
            // the next run. Back over it so we'll see
            // it again the next time.
            textIndex -= codePointCount;
            break;
        }
    }

    scriptLimit = textIndex;
    return true;
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:77,代码来源:UScriptRun.java

示例11: countTokens

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Calculates the number of times that this tokenizer's 
 * <code>nextToken</code> method can be called before it generates an 
 * exception. The current position is not advanced.
 * @return the number of tokens remaining in the string using the 
 *         current delimiter set.
 * @see #nextToken()
 * @stable ICU 2.4
 */
public int countTokens() 
{
    int result = 0;
    if (hasMoreTokens()) {
        if (m_tokenOffset_ >= 0) {
            return m_tokenSize_ - m_tokenOffset_;
        }
        if (m_tokenStart_ == null) {
            m_tokenStart_ = new int[TOKEN_SIZE_];
            m_tokenLimit_ = new int[TOKEN_SIZE_];
        }
        do {
            if (m_tokenStart_.length == result) {
                int temptokenindex[] = m_tokenStart_;
                int temptokensize[] = m_tokenLimit_;
                int originalsize = temptokenindex.length;
                int newsize = originalsize + TOKEN_SIZE_;
                m_tokenStart_ = new int[newsize];
                m_tokenLimit_ = new int[newsize];
                System.arraycopy(temptokenindex, 0, m_tokenStart_, 0, 
                                 originalsize);
                System.arraycopy(temptokensize, 0, m_tokenLimit_, 0, 
                                 originalsize);
            }
            m_tokenStart_[result] = m_nextOffset_;
            if (m_returnDelimiters_) {
                int c = UTF16.charAt(m_source_, m_nextOffset_);
                boolean contains = delims == null 
                    ? m_delimiters_.contains(c) 
                    : c < delims.length && delims[c];
                if (contains) {
                    if (m_coalesceDelimiters_) {
                        m_tokenLimit_[result] = getNextNonDelimiter(
                                                            m_nextOffset_);
                    } else {
                        int p = m_nextOffset_ + 1;
                        if (p == m_length_) {
                            p = -1;
                        }
                        m_tokenLimit_[result] = p;

                    }
                }
                else {
                    m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
                }
                m_nextOffset_ = m_tokenLimit_[result];
            }
            else {
                m_tokenLimit_[result] = getNextDelimiter(m_nextOffset_);
                m_nextOffset_ = getNextNonDelimiter(m_tokenLimit_[result]);
            }
            result ++;
        } while (m_nextOffset_ >= 0);
        m_tokenOffset_ = 0;
        m_tokenSize_ = result;
        m_nextOffset_ = m_tokenStart_[0];
    }
    return result;
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:70,代码来源:StringTokenizer.java

示例12: quoteLiteral

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
 * @param string String passed to quote a literal string.
 * @return A string using the available settings will place syntax, quote, or ignorable characters into quotes.
 */
public String quoteLiteral(String string) {
    if (needingQuoteCharacters == null) {
        needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters)
        if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
        if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
    }
    StringBuffer result = new StringBuffer();
    int quotedChar = NO_QUOTE;
    int cp;
    for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
        cp = UTF16.charAt(string, i);
        if (escapeCharacters.contains(cp)) {
            // we may have to fix up previous characters
            if (quotedChar == IN_QUOTE) {
                result.append(SINGLE_QUOTE);
                quotedChar = NO_QUOTE;
            }
            appendEscaped(result, cp);
            continue;
        }
        
        if (needingQuoteCharacters.contains(cp)) {
            // if we have already started a quote
            if (quotedChar == IN_QUOTE) {
                UTF16.append(result, cp);
                if (usingQuote && cp == SINGLE_QUOTE) { // double it
                    result.append(SINGLE_QUOTE);
                }
                continue;
            }
            // otherwise not already in quote
            if (usingSlash) {
                result.append(BACK_SLASH);
                UTF16.append(result, cp);
                continue;
            }
            if (usingQuote) {
                if (cp == SINGLE_QUOTE) { // double it and continue
                    result.append(SINGLE_QUOTE);
                    result.append(SINGLE_QUOTE);
                    continue;
                }
                result.append(SINGLE_QUOTE);
                UTF16.append(result, cp);
                quotedChar = IN_QUOTE;
                continue;
            }
            // we have no choice but to use \\u or \\U
            appendEscaped(result, cp);
            continue;
        }
        // otherwise cp doesn't need quoting
        // we may have to fix up previous characters
        if (quotedChar == IN_QUOTE) {
            result.append(SINGLE_QUOTE);
            quotedChar = NO_QUOTE;
        }
        UTF16.append(result, cp);
    }
    // all done. 
    // we may have to fix up previous characters
    if (quotedChar == IN_QUOTE) {
        result.append(SINGLE_QUOTE);
    }
    return result.toString();
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:72,代码来源:PatternTokenizer.java

示例13: addStringCaseClosure

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Maps the string to single code points and adds the associated case closure
 * mappings.
 * The string is mapped to code points if it is their full case folding string.
 * In other words, this performs a reverse full case folding and then
 * adds the case closure items of the resulting code points.
 * If the string is found and its closure applied, then
 * the string itself is added as well as part of its code points' closure.
 *
 * @return true if the string was found
 */
public final boolean addStringCaseClosure(String s, UnicodeSet set) {
    int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;

    if(unfold==null || s==null) {
        return false; /* no reverse case folding data, or no string */
    }
    length=s.length();
    if(length<=1) {
        /* the string is too short to find any match */
        /*
         * more precise would be:
         * if(!u_strHasMoreChar32Than(s, length, 1))
         * but this does not make much practical difference because
         * a single supplementary code point would just not be found
         */
        return false;
    }

    unfoldRows=unfold[UNFOLD_ROWS];
    unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
    unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
    //unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;

    if(length>unfoldStringWidth) {
        /* the string is too long to find any match */
        return false;
    }

    /* do a binary search for the string */
    start=0;
    limit=unfoldRows;
    while(start<limit) {
        i=(start+limit)/2;
        unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
        result=strcmpMax(s, unfoldOffset, unfoldStringWidth);

        if(result==0) {
            /* found the string: add each code point, and its case closure */
            int c;

            for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
                c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
                set.add(c);
                addCaseClosure(c, set);
            }
            return true;
        } else if(result<0) {
            limit=i;
        } else /* result>0 */ {
            start=i+1;
        }
    }

    return false; /* string not found */
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:67,代码来源:UCaseProps.java

示例14: parsePattern

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
 * Parse a pattern string within the given Replaceable and a parsing
 * pattern.  Characters are matched literally and case-sensitively
 * except for the following special characters:
 *
 * ~  zero or more Pattern_White_Space chars
 *
 * If end of pattern is reached with all matches along the way,
 * pos is advanced to the first unparsed index and returned.
 * Otherwise -1 is returned.
 * @param pat pattern that controls parsing
 * @param text text to be parsed, starting at index
 * @param index offset to first character to parse
 * @param limit offset after last character to parse
 * @return index after last parsed character, or -1 on parse failure.
 */
public static int parsePattern(String pat,
        Replaceable text,
        int index,
        int limit) {
    int ipat = 0;

    // empty pattern matches immediately
    if (ipat == pat.length()) {
        return index;
    }

    int cpat = Character.codePointAt(pat, ipat);

    while (index < limit) {
        int c = text.char32At(index);

        // parse \s*
        if (cpat == '~') {
            if (PatternProps.isWhiteSpace(c)) {
                index += UTF16.getCharCount(c);
                continue;
            } else {
                if (++ipat == pat.length()) {
                    return index; // success; c unparsed
                }
                // fall thru; process c again with next cpat
            }
        }

        // parse literal
        else if (c == cpat) {
            int n = UTF16.getCharCount(c);
            index += n;
            ipat += n;
            if (ipat == pat.length()) {
                return index; // success; c parsed
            }
            // fall thru; get next cpat
        }

        // match failure of literal
        else {
            return -1;
        }

        cpat = UTF16.charAt(pat, ipat);
    }

    return -1; // text ended before end of pat
}
 
开发者ID:abhijitvalluri,项目名称:fitnotifications,代码行数:67,代码来源:Utility.java

示例15: char32At

import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
@Override
public int char32At(int pos) {
  return UTF16.charAt(buffer, 0, length, pos);
}
 
开发者ID:europeana,项目名称:search,代码行数:5,代码来源:ICUTransformFilter.java


注:本文中的com.ibm.icu.text.UTF16.charAt方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。