本文整理汇总了Java中com.ibm.icu.lang.UCharacter.getType方法的典型用法代码示例。如果您正苦于以下问题:Java UCharacter.getType方法的具体用法?Java UCharacter.getType怎么用?Java UCharacter.getType使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.ibm.icu.lang.UCharacter
的用法示例。
在下文中一共展示了UCharacter.getType方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFirstCharactersInScripts
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Return a list of the first character in each script. Only exposed for testing.
*
* @return list of first characters in each script
* @internal
* @deprecated This API is ICU internal, only for testing.
*/
@Deprecated
public List<String> getFirstCharactersInScripts() {
List<String> dest = new ArrayList<String>(200);
// Fetch the script-first-primary contractions which are defined in the root collator.
// They all start with U+FDD1.
UnicodeSet set = new UnicodeSet();
collatorPrimaryOnly.internalAddContractions(0xFDD1, set);
if (set.isEmpty()) {
throw new UnsupportedOperationException(
"AlphabeticIndex requires script-first-primary contractions");
}
for (String boundary : set) {
int gcMask = 1 << UCharacter.getType(boundary.codePointAt(1));
if ((gcMask & (GC_L_MASK | GC_CN_MASK)) == 0) {
// Ignore boundaries for the special reordering groups.
// Take only those for "real scripts" (where the sample character is a Letter,
// and the one for unassigned implicit weights (Cn).
continue;
}
dest.add(boundary);
}
return dest;
}
示例2: getNumerics
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Computes the set of numerics for a string, according to UTS 39 section 5.3.
*/
private void getNumerics(String input, UnicodeSet result) {
result.clear();
for (int utf16Offset = 0; utf16Offset < input.length();) {
int codePoint = Character.codePointAt(input, utf16Offset);
utf16Offset += Character.charCount(codePoint);
// Store a representative character for each kind of decimal digit
if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
// Store the zero character as a representative for comparison.
// Unicode guarantees it is codePoint - value
result.add(codePoint - UCharacter.getNumericValue(codePoint));
}
}
}
示例3: getType
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
if (UCharacterUtility.isNonCharacter(ch)) {
// not a character we return a invalid category count
return NON_CHARACTER_;
}
int result = UCharacter.getType(ch);
if (result == UCharacterCategory.SURROGATE) {
if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
result = LEAD_SURROGATE_;
}
else {
result = TRAIL_SURROGATE_;
}
}
return result;
}
示例4: isLetter
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
private static boolean isLetter(int c) {
int type = UCharacter.getType(c);
boolean result = type == UPPERCASE_LETTER
|| type == LOWERCASE_LETTER
|| type == TITLECASE_LETTER
|| type == MODIFIER_LETTER
|| type == OTHER_LETTER
|| type == LETTER_NUMBER;
return result;
}
示例5: contains
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
@Override
boolean contains(int c) {
// "horizontal space"
if(c<=0x9f) {
return c==9 || c==0x20; /* TAB or SPACE */
} else {
/* Zs */
return UCharacter.getType(c)==UCharacter.SPACE_SEPARATOR;
}
}
示例6: next
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit) {
return false;
}
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例7: next
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit)
return false;
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例8: getType
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Gets the general Unicode category of the specified code point.
*
* @param codePoint
* the Unicode code point to get the category of.
* @return the Unicode category of {@code codePoint}.
*/
public static int getType(int codePoint) {
if (codePoint < 1000 && codePoint > 0) {
return typeValuesCache[codePoint];
}
int type = UCharacter.getType(codePoint);
// the type values returned by UCharacter are not compatible with what
// the spec says.RI's Character type values skip the value 17.
if (type <= Character.FORMAT) {
return type;
}
return (type + 1);
}
示例9: next
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Iterates to the next script run, returning true if one exists.
*
* @return true if there is another script run, false otherwise.
*/
boolean next() {
if (scriptLimit >= limit) {
return false;
}
scriptCode = UScript.COMMON;
scriptStart = scriptLimit;
while (index < limit) {
final int ch = UTF16.charAt(text, start, limit, index - start);
final int sc = getScript(ch);
/*
* From UTR #24: Implementations that determine the boundaries between
* characters of given scripts should never break between a non-spacing
* mark and its base character. Thus for boundary determinations and
* similar sorts of processing, a non-spacing mark — whatever its script
* value — should inherit the script value of its base character.
*/
if (isSameScript(scriptCode, sc)
|| UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
index += UTF16.getCharCount(ch);
/*
* Inherited or Common becomes the script code of the surrounding text.
*/
if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
scriptCode = sc;
}
} else {
break;
}
}
scriptLimit = index;
return true;
}
示例10: getType
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* Gets the general Unicode category of the specified character.
*
* @param codePoint
* the character, including supplementary characters
* @return the Unicode category
*/
public static int getType(int codePoint) {
int type = UCharacter.getType(codePoint);
// the type values returned by UCharacter are not compatible with what
// the spec says.RI's Character type values skip the value 17.
if (type <= Character.FORMAT) {
return type;
}
return (type + 1);
}
示例11: handleTransliterate
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
@Override
protected synchronized void handleTransliterate(Replaceable text, Position pos, boolean incremental) {
boundaryCount = 0;
int boundary = 0;
getBreakIterator(); // Lazy-create it if necessary
bi.setText(new ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start));
// TODO: fix clumsy workaround used below.
/*
char[] tempBuffer = new char[text.length()];
text.getChars(0, text.length(), tempBuffer, 0);
bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
*/
// end debugging
// To make things much easier, we will stack the boundaries, and then insert at the end.
// generally, we won't need too many, since we will be filtered.
for(boundary = bi.first(); boundary != BreakIterator.DONE && boundary < pos.limit; boundary = bi.next()) {
if (boundary == 0) continue;
// HACK: Check to see that preceeding item was a letter
int cp = UTF16.charAt(text, boundary-1);
int type = UCharacter.getType(cp);
//System.out.println(Integer.toString(cp,16) + " (before): " + type);
if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
cp = UTF16.charAt(text, boundary);
type = UCharacter.getType(cp);
//System.out.println(Integer.toString(cp,16) + " (after): " + type);
if (((1<<type) & LETTER_OR_MARK_MASK) == 0) continue;
if (boundaryCount >= boundaries.length) { // realloc if necessary
int[] temp = new int[boundaries.length * 2];
System.arraycopy(boundaries, 0, temp, 0, boundaries.length);
boundaries = temp;
}
boundaries[boundaryCount++] = boundary;
//System.out.println(boundary);
}
int delta = 0;
int lastBoundary = 0;
if (boundaryCount != 0) { // if we found something, adjust
delta = boundaryCount * insertion.length();
lastBoundary = boundaries[boundaryCount-1];
// we do this from the end backwards, so that we don't have to keep updating.
while (boundaryCount > 0) {
boundary = boundaries[--boundaryCount];
text.replace(boundary, boundary, insertion);
}
}
// Now fix up the return values
pos.contextLimit += delta;
pos.limit += delta;
pos.start = incremental ? lastBoundary + delta : pos.limit;
}
示例12: contains
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
@Override
public boolean contains(int ch) {
return ((1 << UCharacter.getType(ch)) & mask) != 0;
}
示例13: U_GET_GC_MASK
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
private static int U_GET_GC_MASK(int c) {
return (1<<UCharacter.getType(c));
}
示例14: has
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
@Override
public boolean has(int codePoint, int value) {
return ((1 << UCharacter.getType(codePoint)) & value) != 0;
}
示例15: isLetter
import com.ibm.icu.lang.UCharacter; //导入方法依赖的package包/类
/**
* <p>
* Returns <code>true</code> if the character code {@code c} represents a letter.
* </p>
*
* <p>
* That is, {@code c} is either a {@link ECharacterCategory#UPPERCASE_LETTER},
* {@link ECharacterCategory#LOWERCASE_LETTER}, {@link ECharacterCategory#TITLECASE_LETTER},
* {@link ECharacterCategory#MODIFIER_LETTER}, {@link ECharacterCategory#OTHER_LETTER} or
* {@link ECharacterCategory#LETTER_NUMBER}
* </p>
*/
public static boolean isLetter(int c) {
int type = UCharacter.getType(c);
boolean result = type == UPPERCASE_LETTER || type == LOWERCASE_LETTER || type == TITLECASE_LETTER
|| type == MODIFIER_LETTER || type == OTHER_LETTER || type == LETTER_NUMBER;
return result;
}