本文整理汇总了Java中com.ibm.icu.text.UTF16类的典型用法代码示例。如果您正苦于以下问题:Java UTF16类的具体用法?Java UTF16怎么用?Java UTF16使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
UTF16类属于com.ibm.icu.text包,在下文中一共展示了UTF16类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getCodePointValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final char getCodePointValue(int ch)
{
int offset;
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
// handle U+D800..U+10FFFF
offset = getCodePointOffset(ch);
// return -1 if there is an error, in this case we return the default
// value: m_initialValue_
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
示例2: getCodePointOffset
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Internal trie getter from a code point.
* Could be faster(?) but longer with
* if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
* Gets the offset to data which the codepoint points to
* @param ch codepoint
* @return offset to data
*/
protected final int getCodePointOffset(int ch)
{
// if ((ch >> 16) == 0) slower
if (ch < 0) {
return -1;
} else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
return getRawOffset(0, (char)ch);
} else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
// BMP codepoint
return getBMPOffset((char)ch);
} else if (ch <= UCharacter.MAX_VALUE) {
// look at the construction of supplementary characters
// trail forms the ends of it.
return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
(char)(ch & SURROGATE_MASK_));
} else {
// return -1 if there is an error, in this case we return
return -1;
}
}
示例3: currentCodePoint
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Returns the current codepoint
* @return current codepoint
*/
@Override
public int currentCodePoint(){
// cannot use charAt due to it different
// behaviour when index is pointing at a
// trail surrogate, check for surrogates
int ch = current();
if(UTF16.isLeadSurrogate((char)ch)){
// advance the index to get the next code point
next();
// due to post increment semantics current() after next()
// actually returns the next char which is what we want
int ch2 = current();
// current should never change the current index so back off
previous();
if(UTF16.isTrailSurrogate((char)ch2)){
// we found a surrogate pair
return Character.toCodePoint((char)ch, (char)ch2);
}
}
return ch;
}
示例4: checkNullNextTrailIndex
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Checks if we are beginning at the start of a initial block.
* If we are then the rest of the codepoints in this initial block
* has the same values.
* We increment m_nextCodepoint_ and relevant data members if so.
* This is used only in for the supplementary codepoints because
* the offset to the trail indexes could be 0.
* @return true if we are at the start of a initial block.
*/
private final boolean checkNullNextTrailIndex()
{
if (m_nextIndex_ <= 0) {
m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_);
int leadBlock =
m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
Trie.INDEX_STAGE_2_SHIFT_;
if (m_trie_.m_dataManipulate_ == null) {
throw new NullPointerException(
"The field DataManipulate in this Trie is null");
}
m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
m_trie_.getValue(leadBlock +
(nextLead & Trie.INDEX_STAGE_3_MASK_)));
m_nextIndex_ --;
m_nextBlockIndex_ = DATA_BLOCK_LENGTH_;
return true;
}
return false;
}
示例5: escape
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Convert characters outside the range U+0020 to U+007F to
* Unicode escapes, and convert backslash to a double backslash.
*/
public static final String escape(String s) {
StringBuilder buf = new StringBuilder();
for (int i=0; i<s.length(); ) {
int c = Character.codePointAt(s, i);
i += UTF16.getCharCount(c);
if (c >= ' ' && c <= 0x007F) {
if (c == '\\') {
buf.append("\\\\"); // That is, "\\"
} else {
buf.append((char)c);
}
} else {
boolean four = c <= 0xFFFF;
buf.append(four ? "\\u" : "\\U");
buf.append(hex(c, four ? 4 : 8));
}
}
return buf.toString();
}
示例6: hex
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Convert a string to separated groups of hex uppercase
* digits. E.g., hex('ab'...) => "0041,0042". Append the output
* to the given Appendable.
*/
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
try {
if (useCodePoints) {
int cp;
for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
cp = Character.codePointAt(s, i);
if (i != 0) {
result.append(separator);
}
result.append(hex(cp,width));
}
} else {
for (int i = 0; i < s.length(); ++i) {
if (i != 0) {
result.append(separator);
}
result.append(hex(s.charAt(i),width));
}
}
return result;
} catch (IOException e) {
throw new IllegalIcuArgumentException(e);
}
}
示例7: parseUnicodeIdentifier
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Parse a Unicode identifier from the given string at the given
* position. Return the identifier, or null if there is no
* identifier.
* @param str the string to parse
* @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the
* first character to examine. It must be less than str.length(),
* and it must not point to a whitespace character. That is, must
* have pos[0] < str.length(). On
* OUTPUT, the position after the last parsed character.
* @return the Unicode identifier, or null if there is no valid
* identifier at pos[0].
*/
public static String parseUnicodeIdentifier(String str, int[] pos) {
// assert(pos[0] < str.length());
StringBuilder buf = new StringBuilder();
int p = pos[0];
while (p < str.length()) {
int ch = Character.codePointAt(str, p);
if (buf.length() == 0) {
if (UCharacter.isUnicodeIdentifierStart(ch)) {
buf.appendCodePoint(ch);
} else {
return null;
}
} else {
if (UCharacter.isUnicodeIdentifierPart(ch)) {
buf.appendCodePoint(ch);
} else {
break;
}
}
p += UTF16.getCharCount(ch);
}
pos[0] = p;
return buf.toString();
}
示例8: getCodePointValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final int getCodePointValue(int ch)
{
int offset;
// fastpath for U+0000..U+D7FF
if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
// copy of getRawOffset()
offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
+ (ch & INDEX_STAGE_3_MASK_);
return m_data_[offset];
}
// handle U+D800..U+10FFFF
offset = getCodePointOffset(ch);
return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
示例9: getSurrogateValue
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Get the value associated with a pair of surrogates.
* @param lead a lead surrogate
* @param trail a trail surrogate
*/
public final int getSurrogateValue(char lead, char trail)
{
if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
throw new IllegalArgumentException(
"Argument characters do not form a supplementary character");
}
// get fold position for the next trail surrogate
int offset = getSurrogateOffset(lead, trail);
// get the real data from the folded lead/trail units
if (offset > 0) {
return m_data_[offset];
}
// return m_initialValue_ if there is an error
return m_initialValue_;
}
示例10: getType
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
if (UCharacterUtility.isNonCharacter(ch)) {
// not a character we return a invalid category count
return NON_CHARACTER_;
}
int result = UCharacter.getType(ch);
if (result == UCharacterCategory.SURROGATE) {
if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
result = LEAD_SURROGATE_;
}
else {
result = TRAIL_SURROGATE_;
}
}
return result;
}
示例11: nextTrail32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int nextTrail32(CharacterIterator ci, int lead) {
if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
return DONE32;
}
int retVal = lead;
if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
char cTrail = ci.next();
if (UTF16.isTrailSurrogate(cTrail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.previous();
}
}
return retVal;
}
示例12: previous32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int previous32(CharacterIterator ci) {
if (ci.getIndex() <= ci.getBeginIndex()) {
return DONE32;
}
char trail = ci.previous();
int retVal = trail;
if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
char lead = ci.previous();
if (UTF16.isLeadSurrogate(lead)) {
retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
} else {
ci.next();
}
}
return retVal;
}
示例13: current32
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
public static int current32(CharacterIterator ci) {
char lead = ci.current();
int retVal = lead;
if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
return retVal;
}
if (UTF16.isLeadSurrogate(lead)) {
int trail = (int)ci.next();
ci.previous();
if (UTF16.isTrailSurrogate((char)trail)) {
retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
(trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
UTF16.SUPPLEMENTARY_MIN_VALUE;
}
} else {
if (lead == CharacterIterator.DONE) {
if (ci.getIndex() >= ci.getEndIndex()) {
retVal = DONE32;
}
}
}
return retVal;
}
示例14: calcStatus
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
return RuleBasedBreakIterator.WORD_NONE;
}
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint)) {
return RuleBasedBreakIterator.WORD_NUMBER;
} else if (UCharacter.isLetter(codepoint)) {
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}
示例15: calcStatus
import com.ibm.icu.text.UTF16; //导入依赖的package包/类
private int calcStatus(int current, int next) {
if (current == BreakIterator.DONE || next == BreakIterator.DONE)
return RuleBasedBreakIterator.WORD_NONE;
int begin = start + current;
int end = start + next;
int codepoint;
for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
codepoint = UTF16.charAt(text, 0, end, begin);
if (UCharacter.isDigit(codepoint))
return RuleBasedBreakIterator.WORD_NUMBER;
else if (UCharacter.isLetter(codepoint)) {
// TODO: try to separately specify ideographic, kana?
// [currently all bundled as letter for this case]
return RuleBasedBreakIterator.WORD_LETTER;
}
}
return RuleBasedBreakIterator.WORD_NONE;
}