本文整理匯總了Java中java.lang.Character.UnicodeBlock.LATIN_EXTENDED_B屬性的典型用法代碼示例。如果您正苦於以下問題:Java UnicodeBlock.LATIN_EXTENDED_B屬性的具體用法?Java UnicodeBlock.LATIN_EXTENDED_B怎麽用?Java UnicodeBlock.LATIN_EXTENDED_B使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類java.lang.Character.UnicodeBlock
的用法示例。
在下文中一共展示了UnicodeBlock.LATIN_EXTENDED_B屬性的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: normalize
/**
* Character Normalization
*
* @param ch character to normalize
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例2: normalize
/**
* Character Normalization
* @param ch
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例3: isLatinUnicodeBlock
private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
}
示例4: classify
/**
* Given a unicode block object, return corresponding language constant.
* If the block is not recognized, returns zero. Note that as there
* is no separate ARABIC block in Character, this case must
* be specially handled by the caller; EASTERN_ARABIC is preferred when
* both are specified.
* @param b the unicode block to classify
* @return the language constant, or zero if not recognized
*/
private int classify(UnicodeBlock b)
{
if (b == null)
return 0;
// ARABIC is handled by the caller; from testing we know
// that EASTERN_ARABIC takes precedence.
if (b == UnicodeBlock.ARABIC)
return EASTERN_ARABIC;
if (b == UnicodeBlock.BENGALI)
return BENGALI;
if (b == UnicodeBlock.DEVANAGARI)
return DEVANAGARI;
if (b == UnicodeBlock.ETHIOPIC)
return ETHIOPIC;
if (b == UnicodeBlock.BASIC_LATIN
|| b == UnicodeBlock.LATIN_1_SUPPLEMENT
|| b == UnicodeBlock.LATIN_EXTENDED_A
|| b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
|| b == UnicodeBlock.LATIN_EXTENDED_B)
return EUROPEAN;
if (b == UnicodeBlock.GUJARATI)
return GUJARATI;
if (b == UnicodeBlock.GURMUKHI)
return GURMUKHI;
if (b == UnicodeBlock.KANNADA)
return KANNADA;
if (b == UnicodeBlock.KHMER)
return KHMER;
if (b == UnicodeBlock.LAO)
return LAO;
if (b == UnicodeBlock.MALAYALAM)
return MALAYALAM;
if (b == UnicodeBlock.MONGOLIAN)
return MONGOLIAN;
if (b == UnicodeBlock.MYANMAR)
return MYANMAR;
if (b == UnicodeBlock.ORIYA)
return ORIYA;
if (b == UnicodeBlock.TAMIL)
return TAMIL;
if (b == UnicodeBlock.TELUGU)
return TELUGU;
if (b == UnicodeBlock.THAI)
return THAI;
if (b == UnicodeBlock.TIBETAN)
return TIBETAN;
return 0;
}
示例5: normalize
/**
* Character Normalization
* @param ch character
* @return Normalized character
*/
public static char normalize(char ch) {
final Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') {
ch = '\u015f'; // Small S with comma below => with cedilla
}
if (ch == '\u021b') {
ch = '\u0163'; // Small T with comma below => with cedilla
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a'; // Farsi yeh => Arabic yeh
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO
|| block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjkMap.containsKey(ch)) {
ch = cjkMap.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}