本文整理汇总了Java中java.lang.Character.UnicodeBlock.KATAKANA属性的典型用法代码示例。如果您正苦于以下问题:Java UnicodeBlock.KATAKANA属性的具体用法?Java UnicodeBlock.KATAKANA怎么用?Java UnicodeBlock.KATAKANA使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类java.lang.Character.UnicodeBlock
的用法示例。
在下文中一共展示了UnicodeBlock.KATAKANA属性的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: isFullKatakana
/**
* 文字種判別「全角カタカナ」。
*
* @param codePoint 対象文字 (コードポイントで指定すること)。
* @return 対象文字が「全角カタカナ」であれば真(true)、さもなくば、偽(false)。
*/
public static boolean isFullKatakana(int codePoint) {
// based on Unicode 3.2
return of(codePoint) == UnicodeBlock.KATAKANA || // \u30A0 - \u30FF
of(codePoint) == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS || // \u31F0-\u31FF
// import from HIRAGANA (\u3040 - \u309F)
// \u3040, \u3097, \u3098 is reserved
codePoint == '\u3099' || // MARK from HIRAGANA (not in Win31J)
codePoint == '\u309A' || // MARK from HIRAGANA (not in Win31J)
codePoint == '\u309B' || // '゛' from HIRAGANA
codePoint == '\u309C' || // '゜' from HIRAGANA
// \u309D 'ゝ' and \u309E 'ゞ' is iteration mark for HIRAGANA
codePoint == '\u309F' || // 'ゟ' from HIRAGANA (not in Win31J)
codePoint == '\u3001' || // '、'
codePoint == '\u3002' || // '。'
codePoint == '\u300C' || // '「'
codePoint == '\u300D' || // '」'
codePoint == '\u300E' || // '『'
codePoint == '\u300F'; // '』'
}
示例2: processInput
@Override
protected CharSequence processInput(final CharSequence input) {
final StringBuilder buf = new StringBuilder(input.length());
char prev = 0;
for (int pos = 0; pos < input.length(); pos++) {
final char c = input.charAt(pos);
switch (c) {
case U002D:
case UFF0D:
case U2010:
case U2011:
case U2012:
case U2013:
case U2014:
case U2015:
case U207B:
case U208B:
case U30FC:
if (prev != 0) {
final UnicodeBlock block = UnicodeBlock.of(prev);
if (block == UnicodeBlock.HIRAGANA
|| block == UnicodeBlock.KATAKANA
|| block == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS) {
buf.append(replacement);
} else {
buf.append(c);
}
} else {
buf.append(c);
}
break;
default:
buf.append(c);
break;
}
prev = c;
}
return buf;
}
示例3: normalize
/**
* Character Normalization
*
* @param ch character to normalize
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例4: normalize
/**
* Character Normalization
* @param ch
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例5: normalize
public static char normalize(char c) {
char ch = c;
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) {
ch = cjk_map.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例6: normalize
public static char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) {
ch = cjk_map.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例7: setCharacterSubsets
/**
* Implements InputMethod.setCharacterSubsets for Windows.
*
* @see java.awt.im.spi.InputMethod#setCharacterSubsets
*/
@Override
public void setCharacterSubsets(Subset[] subsets) {
if (subsets == null){
setConversionStatus(context, cmode);
setOpenStatus(context, open);
return;
}
// Use first subset only. Other subsets in array is ignored.
// This is restriction of Win32 implementation.
Subset subset1 = subsets[0];
Locale locale = getNativeLocale();
int newmode;
if (locale == null) {
return;
}
if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.KANJI
|| subset1 == UnicodeBlock.HIRAGANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
else if (subset1 == UnicodeBlock.KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN); // reserve ROMAN input mode
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.HANJA
|| subset1 == UnicodeBlock.HANGUL_SYLLABLES
|| subset1 == UnicodeBlock.HANGUL_JAMO
|| subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.TRADITIONAL_HANZI
|| subset1 == InputSubset.SIMPLIFIED_HANZI)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
}
}
示例8: symbolParsing
private boolean symbolParsing(Lattice lattice, char jaso, int idx) {
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(jaso);
//숫자
if (Character.isDigit(jaso)) {
return false;
} else if (unicodeBlock == Character.UnicodeBlock.BASIC_LATIN) {
//영어
if (((jaso >= 'A') && (jaso <= 'Z')) || ((jaso >= 'a') && (jaso <= 'z'))) {
return false;
} else if (this.resources.getObservation().getTrieDictionary().getValue("" + jaso) != null) {
return false;
} else if (jaso == ' ') {
return false;
}
//아스키 코드 범위 내에 사전에 없는 경우에는 기타 문자
else {
lattice.put(idx, idx + 1, "" + jaso, SYMBOL.SW, this.resources.getTable().getId(SYMBOL.SW), SCORE.SW);
return true;
}
}
//한글
else if (unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
|| unicodeBlock == UnicodeBlock.HANGUL_JAMO
|| unicodeBlock == UnicodeBlock.HANGUL_JAMO_EXTENDED_A
|| unicodeBlock == UnicodeBlock.HANGUL_JAMO_EXTENDED_B
|| unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES) {
return false;
}
//일본어
else if (unicodeBlock == UnicodeBlock.KATAKANA
|| unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS) {
return false;
}
//중국어
else if (UnicodeBlock.CJK_COMPATIBILITY.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B.equals(unicodeBlock)
|| UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS.equals(unicodeBlock)) {
return false;
}
//그 외 문자인 경우
else {
lattice.put(idx, idx + 1, "" + jaso, SYMBOL.SW, this.resources.getTable().getId(SYMBOL.SW), SCORE.SW);
return true;
}
}
示例9: normalize
/**
* Character Normalization
* @param ch character
* @return Normalized character
*/
public static char normalize(char ch) {
final Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') {
ch = '\u015f'; // Small S with comma below => with cedilla
}
if (ch == '\u021b') {
ch = '\u0163'; // Small T with comma below => with cedilla
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a'; // Farsi yeh => Arabic yeh
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO
|| block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjkMap.containsKey(ch)) {
ch = cjkMap.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例10: setCharacterSubsets
/**
* Implements InputMethod.setCharacterSubsets for Windows.
*
* @see java.awt.im.spi.InputMethod#setCharacterSubsets
*/
public void setCharacterSubsets(Subset[] subsets) {
if (subsets == null){
setConversionStatus(context, cmode);
setOpenStatus(context, open);
return;
}
// Use first subset only. Other subsets in array is ignored.
// This is restriction of Win32 implementation.
Subset subset1 = subsets[0];
Locale locale = getNativeLocale();
int newmode;
if (locale == null) {
return;
}
if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.KANJI
|| subset1 == UnicodeBlock.HIRAGANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
else if (subset1 == UnicodeBlock.KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN); // reserve ROMAN input mode
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.HANJA
|| subset1 == UnicodeBlock.HANGUL_SYLLABLES
|| subset1 == UnicodeBlock.HANGUL_JAMO
|| subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.TRADITIONAL_HANZI
|| subset1 == InputSubset.SIMPLIFIED_HANZI)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
}
}
示例11: symbolParsing
/**
* 입력된 문자로부터 symbol을 구분하여 lattice에 삽입
* @param in
* @param i
*/
private void symbolParsing(String in, int i) {
char ch = in.charAt(i);
Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(ch);
//숫자
if(Character.isDigit(ch)){
}
else if(unicodeBlock == Character.UnicodeBlock.BASIC_LATIN){
//영어
if (((ch >= 'A') && (ch <= 'Z')) || ((ch >= 'a') && (ch <= 'z'))) {
;
}
else if(observation.getTrieDictionary().get(ch) != null){
;
}
//symbol
else{
this.lattice.put(i, i+1, ""+ch, this.table.getId(SYMBOL.SW), SCORE.SW);
}
}
//한글
else if(unicodeBlock == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
|| unicodeBlock == UnicodeBlock.HANGUL_JAMO
|| unicodeBlock == UnicodeBlock.HANGUL_JAMO_EXTENDED_A
||unicodeBlock == UnicodeBlock.HANGUL_JAMO_EXTENDED_B
||unicodeBlock == UnicodeBlock.HANGUL_SYLLABLES){
;
}
//일본어
else if(unicodeBlock == UnicodeBlock.KATAKANA
|| unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS){
}
//중국어
else if(UnicodeBlock.CJK_COMPATIBILITY.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A.equals(unicodeBlock)
|| UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B.equals(unicodeBlock)
|| UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS.equals(unicodeBlock)){
;
}
else{
this.lattice.put(i, i+1, ""+ch, this.table.getId(SYMBOL.SW), SCORE.SW);
}
}
示例12: isJapanesePhoneticUnicodeBlock
private static boolean isJapanesePhoneticUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.KATAKANA ||
unicodeBlock == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ||
unicodeBlock == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS ||
unicodeBlock == UnicodeBlock.HIRAGANA;
}