本文整理汇总了Java中java.lang.Character.UnicodeBlock.BASIC_LATIN属性的典型用法代码示例。如果您正苦于以下问题:Java UnicodeBlock.BASIC_LATIN属性的具体用法?Java UnicodeBlock.BASIC_LATIN怎么用?Java UnicodeBlock.BASIC_LATIN使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类java.lang.Character.UnicodeBlock
的用法示例。
在下文中一共展示了UnicodeBlock.BASIC_LATIN属性的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: utf8ToUnicode
/**
* utf-8 转unicode
*
* @param inStr
* @return String
*/
public static String utf8ToUnicode(String inStr) {
char[] myBuffer = inStr.toCharArray();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
if (ub == UnicodeBlock.BASIC_LATIN) {
sb.append(myBuffer[i]);
} else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
int j = (int) myBuffer[i] - 65248;
sb.append((char) j);
} else {
short s = (short) myBuffer[i];
String hexS = Integer.toHexString(s);
String unicode = "\\u" + hexS;
sb.append(unicode.toLowerCase());
}
}
return sb.toString().replaceAll("ffff", "");
}
示例2: findWordStart
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
示例3: findWordEnd
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}
示例4: normalize
/**
* Character Normalization
*
* @param ch character to normalize
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例5: normalize
public static char normalize(char c) {
char ch = c;
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) {
ch = cjk_map.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例6: native2ascii
/**native character 2 unicode ascill code*/
public static String native2ascii(String text){
if(text == null) return null ;
char[] myBuffer = text.toCharArray() ;
StringBuffer sb = new StringBuffer() ;
for (int i = 0; i < myBuffer.length; i++) {
char c = myBuffer[i] ;
Character.UnicodeBlock ub = UnicodeBlock.of(c) ;
if(ub == UnicodeBlock.BASIC_LATIN){
//英文及数字等
sb.append(c) ;
}else{
//汉字
String hexS = Integer.toHexString(c & 0xffff) ;
sb.append("\\u") ;
if(hexS.length() < 4){
switch(hexS.length()){
case 1:
sb.append("000") ;
break ;
case 2:
sb.append("00") ;
break ;
case 3:
sb.append('0') ;
}
}
sb.append(hexS.toLowerCase()) ;
}
}
return sb.toString() ;
}
示例7: normalize
/**
* Character Normalization
* @param ch
* @return Normalized character
*/
static public char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') ch = '\u015f'; // Small S with comma below => with cedilla
if (ch == '\u021b') ch = '\u0163'; // Small T with comma below => with cedilla
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') ch = '\u064a'; // Farsi yeh => Arabic yeh
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') ch = '\u1ec3';
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例8: isLatinUnicodeBlock
private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
}
示例9: normalize
public static char normalize(char ch) {
Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjk_map.containsKey(ch)) {
ch = cjk_map.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例10: setCharacterSubsets
/**
* Implements InputMethod.setCharacterSubsets for Windows.
*
* @see java.awt.im.spi.InputMethod#setCharacterSubsets
*/
@Override
public void setCharacterSubsets(Subset[] subsets) {
if (subsets == null){
setConversionStatus(context, cmode);
setOpenStatus(context, open);
return;
}
// Use first subset only. Other subsets in array is ignored.
// This is restriction of Win32 implementation.
Subset subset1 = subsets[0];
Locale locale = getNativeLocale();
int newmode;
if (locale == null) {
return;
}
if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.KANJI
|| subset1 == UnicodeBlock.HIRAGANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
else if (subset1 == UnicodeBlock.KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN); // reserve ROMAN input mode
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.HANJA
|| subset1 == UnicodeBlock.HANGUL_SYLLABLES
|| subset1 == UnicodeBlock.HANGUL_JAMO
|| subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.TRADITIONAL_HANZI
|| subset1 == InputSubset.SIMPLIFIED_HANZI)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
}
}
示例11: classify
/**
* Given a unicode block object, return corresponding language constant.
* If the block is not recognized, returns zero. Note that as there
* is no separate ARABIC block in Character, this case must
* be specially handled by the caller; EASTERN_ARABIC is preferred when
* both are specified.
* @param b the unicode block to classify
* @return the language constant, or zero if not recognized
*/
private int classify(UnicodeBlock b)
{
if (b == null)
return 0;
// ARABIC is handled by the caller; from testing we know
// that EASTERN_ARABIC takes precedence.
if (b == UnicodeBlock.ARABIC)
return EASTERN_ARABIC;
if (b == UnicodeBlock.BENGALI)
return BENGALI;
if (b == UnicodeBlock.DEVANAGARI)
return DEVANAGARI;
if (b == UnicodeBlock.ETHIOPIC)
return ETHIOPIC;
if (b == UnicodeBlock.BASIC_LATIN
|| b == UnicodeBlock.LATIN_1_SUPPLEMENT
|| b == UnicodeBlock.LATIN_EXTENDED_A
|| b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
|| b == UnicodeBlock.LATIN_EXTENDED_B)
return EUROPEAN;
if (b == UnicodeBlock.GUJARATI)
return GUJARATI;
if (b == UnicodeBlock.GURMUKHI)
return GURMUKHI;
if (b == UnicodeBlock.KANNADA)
return KANNADA;
if (b == UnicodeBlock.KHMER)
return KHMER;
if (b == UnicodeBlock.LAO)
return LAO;
if (b == UnicodeBlock.MALAYALAM)
return MALAYALAM;
if (b == UnicodeBlock.MONGOLIAN)
return MONGOLIAN;
if (b == UnicodeBlock.MYANMAR)
return MYANMAR;
if (b == UnicodeBlock.ORIYA)
return ORIYA;
if (b == UnicodeBlock.TAMIL)
return TAMIL;
if (b == UnicodeBlock.TELUGU)
return TELUGU;
if (b == UnicodeBlock.THAI)
return THAI;
if (b == UnicodeBlock.TIBETAN)
return TIBETAN;
return 0;
}
示例12: normalize
/**
* Character Normalization
* @param ch character
* @return Normalized character
*/
public static char normalize(char ch) {
final Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
if (block == UnicodeBlock.BASIC_LATIN) {
if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
ch = ' ';
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
// normalization for Romanian
if (ch == '\u0219') {
ch = '\u015f'; // Small S with comma below => with cedilla
}
if (ch == '\u021b') {
ch = '\u0163'; // Small T with comma below => with cedilla
}
} else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
ch = ' ';
} else if (block == UnicodeBlock.ARABIC) {
if (ch == '\u06cc') {
ch = '\u064a'; // Farsi yeh => Arabic yeh
}
} else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
if (ch >= '\u1ea0') {
ch = '\u1ec3';
}
} else if (block == UnicodeBlock.HIRAGANA) {
ch = '\u3042';
} else if (block == UnicodeBlock.KATAKANA) {
ch = '\u30a2';
} else if (block == UnicodeBlock.BOPOMOFO
|| block == UnicodeBlock.BOPOMOFO_EXTENDED) {
ch = '\u3105';
} else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
if (cjkMap.containsKey(ch)) {
ch = cjkMap.get(ch);
}
} else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
ch = '\uac00';
}
return ch;
}
示例13: setCharacterSubsets
/**
* Implements InputMethod.setCharacterSubsets for Windows.
*
* @see java.awt.im.spi.InputMethod#setCharacterSubsets
*/
public void setCharacterSubsets(Subset[] subsets) {
if (subsets == null){
setConversionStatus(context, cmode);
setOpenStatus(context, open);
return;
}
// Use first subset only. Other subsets in array is ignored.
// This is restriction of Win32 implementation.
Subset subset1 = subsets[0];
Locale locale = getNativeLocale();
int newmode;
if (locale == null) {
return;
}
if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.KANJI
|| subset1 == UnicodeBlock.HIRAGANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
else if (subset1 == UnicodeBlock.KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN); // reserve ROMAN input mode
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.HANJA
|| subset1 == UnicodeBlock.HANGUL_SYLLABLES
|| subset1 == UnicodeBlock.HANGUL_JAMO
|| subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
} else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
setOpenStatus(context, false);
} else {
if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
|| subset1 == InputSubset.TRADITIONAL_HANZI
|| subset1 == InputSubset.SIMPLIFIED_HANZI)
newmode = IME_CMODE_NATIVE;
else if (subset1 == InputSubset.FULLWIDTH_LATIN)
newmode = IME_CMODE_FULLSHAPE;
else
return;
setOpenStatus(context, true);
setConversionStatus(context, newmode);
}
}
}
示例14: isBasicLatin
/**
* 文字種判別「半角文字(半角カナ含まず)」。
*
* @param codePoint 対象文字 (コードポイントで指定すること)。
* @return 対象文字が「半角文字(半角カナ含まず)」であれば真(true)、さもなくば、偽(false)。
*/
public static boolean isBasicLatin(int codePoint) {
return of(codePoint) == UnicodeBlock.BASIC_LATIN;
}
示例15: isSpace
/**
* 文字種判別「半角空白」。
*
* @param codePoint 対象文字 (コードポイントで指定すること)。
* @return 対象文字が「半角空白」であれば真(true)、さもなくば、偽(false)。
*/
public static boolean isSpace(int codePoint) {
return of(codePoint) == UnicodeBlock.BASIC_LATIN && isWhitespace(codePoint);
}