本文整理汇总了Java中com.ibm.icu.text.UTF16.getCharCount方法的典型用法代码示例。如果您正苦于以下问题:Java UTF16.getCharCount方法的具体用法?Java UTF16.getCharCount怎么用?Java UTF16.getCharCount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.ibm.icu.text.UTF16
的用法示例。
在下文中一共展示了UTF16.getCharCount方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: escape
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Convert characters outside the range U+0020 to U+007F to
* Unicode escapes, and convert backslash to a double backslash.
*/
public static final String escape(String s) {
StringBuilder buf = new StringBuilder();
for (int i=0; i<s.length(); ) {
int c = Character.codePointAt(s, i);
i += UTF16.getCharCount(c);
if (c >= ' ' && c <= 0x007F) {
if (c == '\\') {
buf.append("\\\\"); // That is, "\\"
} else {
buf.append((char)c);
}
} else {
boolean four = c <= 0xFFFF;
buf.append(four ? "\\u" : "\\U");
buf.append(hex(c, four ? 4 : 8));
}
}
return buf.toString();
}
示例2: hex
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Convert a string to separated groups of hex uppercase
* digits. E.g., hex('ab'...) => "0041,0042". Append the output
* to the given Appendable.
*/
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
try {
if (useCodePoints) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = Character.codePointAt(s, i);
if (i != 0) {
result.append(separator);
}
result.append(hex(cp,width));
}
} else {
for (int i = 0; i < s.length(); ++i) {
if (i != 0) {
result.append(separator);
}
result.append(hex(s.charAt(i),width));
}
}
return result;
} catch (IOException e) {
throw new IllegalIcuArgumentException(e);
}
}
示例3: parseUnicodeIdentifier
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Parse a Unicode identifier from the given string at the given
* position. Return the identifier, or null if there is no
* identifier.
* @param str the string to parse
* @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the
* first character to examine. It must be less than str.length(),
* and it must not point to a whitespace character. That is, must
* have pos[0] < str.length(). On
* OUTPUT, the position after the last parsed character.
* @return the Unicode identifier, or null if there is no valid
* identifier at pos[0].
*/
public static String parseUnicodeIdentifier(String str, int[] pos) {
// assert(pos[0] < str.length());
StringBuilder buf = new StringBuilder();
int p = pos[0];
while (p < str.length()) {
int ch = Character.codePointAt(str, p);
if (buf.length() == 0) {
if (UCharacter.isUnicodeIdentifierStart(ch)) {
buf.appendCodePoint(ch);
} else {
return null;
}
} else {
if (UCharacter.isUnicodeIdentifierPart(ch)) {
buf.appendCodePoint(ch);
} else {
break;
}
}
p += UTF16.getCharCount(ch);
}
pos[0] = p;
return buf.toString();
}
示例4: calcStatus
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
return RuleBasedBreakIterator.WORD_NONE;
}
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint)) {
return RuleBasedBreakIterator.WORD_NUMBER;
} else if (UCharacter.isLetter(codepoint)) {
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
示例5: calcStatus
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE)
return RuleBasedBreakIterator.WORD_NONE;
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint))
return RuleBasedBreakIterator.WORD_NUMBER;
else if (UCharacter.isLetter(codepoint)) {
// TODO: try to separately specify ideographic, kana?
// [currently all bundled as letter for this case]
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
示例6: next
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit) {
return false;
}
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例7: next
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit)
return false;
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例8: isWord
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
protected boolean isWord(char text[], int start, int end) {
int codepoint;
for (int i = start; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, start);
if (UCharacter.isLetterOrDigit(codepoint))
return true;
}
return false;
}
示例9: next
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit) {
return false;
}
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例10: next
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Find the next script run. Returns <code>false</code> if there
* isn't another run, returns <code>true</code> if there is.
*
* @return <code>false</code> if there isn't another run, <code>true</code> if there is.
*
* @internal
* @deprecated This API is ICU internal only.
*/
@Deprecated
public final boolean next()
{
// if we've fallen off the end of the text, we're done
if (scriptLimit >= textLimit) {
return false;
}
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
syncFixup();
while (textIndex < textLimit) {
int ch = UTF16.charAt(text, textStart, textLimit, textIndex - textStart);
int codePointCount = UTF16.getCharCount(ch);
int sc = UScript.getScript(ch);
int pairIndex = getPairIndex(ch);
textIndex += codePointCount;
// Paired character handling:
//
// if it's an open character, push it onto the stack.
// if it's a close character, find the matching open on the
// stack, and use that script code. Any non-matching open
// characters above it on the stack will be poped.
if (pairIndex >= 0) {
if ((pairIndex & 1) == 0) {
push(pairIndex, scriptCode);
} else {
int pi = pairIndex & ~1;
while (stackIsNotEmpty() && top().pairIndex != pi) {
pop();
}
if (stackIsNotEmpty()) {
sc = top().scriptCode;
}
}
}
if (sameScript(scriptCode, sc)) {
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
fixup(scriptCode);
}
// if this character is a close paired character,
// pop the matching open character from the stack
if (pairIndex >= 0 && (pairIndex & 1) != 0) {
pop();
}
} else {
// We've just seen the first character of
// the next run. Back over it so we'll see
// it again the next time.
textIndex -= codePointCount;
break;
}
}
scriptLimit = textIndex;
return true;
}
示例11: quoteLiteral
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Quote a literal string, using the available settings. Thus syntax characters, quote characters, and ignorable characters will be put into quotes.
* @param string String passed to quote a literal string.
* @return A string using the available settings will place syntax, quote, or ignorable characters into quotes.
*/
public String quoteLiteral(String string) {
if (needingQuoteCharacters == null) {
needingQuoteCharacters = new UnicodeSet().addAll(syntaxCharacters).addAll(ignorableCharacters).addAll(extraQuotingCharacters); // .addAll(quoteCharacters)
if (usingSlash) needingQuoteCharacters.add(BACK_SLASH);
if (usingQuote) needingQuoteCharacters.add(SINGLE_QUOTE);
}
StringBuffer result = new StringBuffer();
int quotedChar = NO_QUOTE;
int cp;
for (int i = 0; i < string.length(); i += UTF16.getCharCount(cp)) {
cp = UTF16.charAt(string, i);
if (escapeCharacters.contains(cp)) {
// we may have to fix up previous characters
if (quotedChar == IN_QUOTE) {
result.append(SINGLE_QUOTE);
quotedChar = NO_QUOTE;
}
appendEscaped(result, cp);
continue;
}
if (needingQuoteCharacters.contains(cp)) {
// if we have already started a quote
if (quotedChar == IN_QUOTE) {
UTF16.append(result, cp);
if (usingQuote && cp == SINGLE_QUOTE) { // double it
result.append(SINGLE_QUOTE);
}
continue;
}
// otherwise not already in quote
if (usingSlash) {
result.append(BACK_SLASH);
UTF16.append(result, cp);
continue;
}
if (usingQuote) {
if (cp == SINGLE_QUOTE) { // double it and continue
result.append(SINGLE_QUOTE);
result.append(SINGLE_QUOTE);
continue;
}
result.append(SINGLE_QUOTE);
UTF16.append(result, cp);
quotedChar = IN_QUOTE;
continue;
}
// we have no choice but to use \\u or \\U
appendEscaped(result, cp);
continue;
}
// otherwise cp doesn't need quoting
// we may have to fix up previous characters
if (quotedChar == IN_QUOTE) {
result.append(SINGLE_QUOTE);
quotedChar = NO_QUOTE;
}
UTF16.append(result, cp);
}
// all done.
// we may have to fix up previous characters
if (quotedChar == IN_QUOTE) {
result.append(SINGLE_QUOTE);
}
return result.toString();
}
示例12: addStringCaseClosure
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Maps the string to single code points and adds the associated case closure
* mappings.
* The string is mapped to code points if it is their full case folding string.
* In other words, this performs a reverse full case folding and then
* adds the case closure items of the resulting code points.
* If the string is found and its closure applied, then
* the string itself is added as well as part of its code points' closure.
*
* @return true if the string was found
*/
public final boolean addStringCaseClosure(String s, UnicodeSet set) {
int i, length, start, limit, result, unfoldOffset, unfoldRows, unfoldRowWidth, unfoldStringWidth;
if(unfold==null || s==null) {
return false; /* no reverse case folding data, or no string */
}
length=s.length();
if(length<=1) {
/* the string is too short to find any match */
/*
* more precise would be:
* if(!u_strHasMoreChar32Than(s, length, 1))
* but this does not make much practical difference because
* a single supplementary code point would just not be found
*/
return false;
}
unfoldRows=unfold[UNFOLD_ROWS];
unfoldRowWidth=unfold[UNFOLD_ROW_WIDTH];
unfoldStringWidth=unfold[UNFOLD_STRING_WIDTH];
//unfoldCPWidth=unfoldRowWidth-unfoldStringWidth;
if(length>unfoldStringWidth) {
/* the string is too long to find any match */
return false;
}
/* do a binary search for the string */
start=0;
limit=unfoldRows;
while(start<limit) {
i=(start+limit)/2;
unfoldOffset=((i+1)*unfoldRowWidth); // +1 to skip the header values above
result=strcmpMax(s, unfoldOffset, unfoldStringWidth);
if(result==0) {
/* found the string: add each code point, and its case closure */
int c;
for(i=unfoldStringWidth; i<unfoldRowWidth && unfold[unfoldOffset+i]!=0; i+=UTF16.getCharCount(c)) {
c=UTF16.charAt(unfold, unfoldOffset, unfold.length, i);
set.add(c);
addCaseClosure(c, set);
}
return true;
} else if(result<0) {
limit=i;
} else /* result>0 */ {
start=i+1;
}
}
return false; /* string not found */
}
示例13: parsePattern
import com.ibm.icu.text.UTF16; //导入方法依赖的package包/类
/**
* Parse a pattern string within the given Replaceable and a parsing
* pattern. Characters are matched literally and case-sensitively
* except for the following special characters:
*
* ~ zero or more Pattern_White_Space chars
*
* If end of pattern is reached with all matches along the way,
* pos is advanced to the first unparsed index and returned.
* Otherwise -1 is returned.
* @param pat pattern that controls parsing
* @param text text to be parsed, starting at index
* @param index offset to first character to parse
* @param limit offset after last character to parse
* @return index after last parsed character, or -1 on parse failure.
*/
public static int parsePattern(String pat,
Replaceable text,
int index,
int limit) {
int ipat = 0;
// empty pattern matches immediately
if (ipat == pat.length()) {
return index;
}
int cpat = Character.codePointAt(pat, ipat);
while (index < limit) {
int c = text.char32At(index);
// parse \s*
if (cpat == '~') {
if (PatternProps.isWhiteSpace(c)) {
index += UTF16.getCharCount(c);
continue;
} else {
if (++ipat == pat.length()) {
return index; // success; c unparsed
}
// fall thru; process c again with next cpat
}
}
// parse literal
else if (c == cpat) {
int n = UTF16.getCharCount(c);
index += n;
ipat += n;
if (ipat == pat.length()) {
return index; // success; c parsed
}
// fall thru; get next cpat
}
// match failure of literal
else {
return -1;
}
cpat = UTF16.charAt(pat, ipat);
}
return -1; // text ended before end of pat
}