當前位置: 首頁>>代碼示例>>Java>>正文


Java UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS屬性代碼示例

本文整理匯總了Java中java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS屬性的典型用法代碼示例。如果您正苦於以下問題:Java UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS屬性的具體用法?Java UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS怎麽用?Java UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在java.lang.Character.UnicodeBlock的用法示例。


在下文中一共展示了UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS屬性的9個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: normalize

/**
 * Character Normalization
 *
 * @param ch character to normalize
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
開發者ID:malcolmgreaves,項目名稱:language-detection,代碼行數:35,代碼來源:NGram.java

示例2: isChinese

private static boolean isChinese(char c) {
	UnicodeBlock ub = UnicodeBlock.of(c);
	if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
		ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
		ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
		ub == UnicodeBlock.GENERAL_PUNCTUATION||
		ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
		ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
		return true;
	return false;
}
 
開發者ID:FudanNLP,項目名稱:fnlp,代碼行數:11,代碼來源:LangDetection.java

示例3: normalize

/**
 * Character Normalization
 * @param ch
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
開發者ID:deezer,項目名稱:weslang,代碼行數:34,代碼來源:NGram.java

示例4: normalize

public static char normalize(char c) {
    char ch = c;
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) {
            ch = cjk_map.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
開發者ID:jprante,項目名稱:elasticsearch-plugin-bundle,代碼行數:36,代碼來源:NGram.java

示例5: isCJKUnicodeBlock

private static boolean isCJKUnicodeBlock(UnicodeBlock block) {
    return block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
            || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
            || block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
            || block == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
            || block == UnicodeBlock.CJK_RADICALS_SUPPLEMENT
            || block == UnicodeBlock.CJK_COMPATIBILITY
            || block == UnicodeBlock.CJK_COMPATIBILITY_FORMS
            || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
            || block == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT;
}
 
開發者ID:SilentCircle,項目名稱:silent-contacts-android,代碼行數:11,代碼來源:NameSplitter.java

示例6: normalize

public static char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) {
            ch = cjk_map.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
開發者ID:jprante,項目名稱:elasticsearch-analysis-german,代碼行數:35,代碼來源:NGram.java

示例7: setCharacterSubsets

/**
 * Implements InputMethod.setCharacterSubsets for Windows.
 *
 * @see java.awt.im.spi.InputMethod#setCharacterSubsets
 */
@Override
public void setCharacterSubsets(Subset[] subsets) {
    if (subsets == null){
        setConversionStatus(context, cmode);
        setOpenStatus(context, open);
        return;
    }

    // Use first subset only. Other subsets in array is ignored.
    // This is restriction of Win32 implementation.
    Subset subset1 = subsets[0];

    Locale locale = getNativeLocale();
    int newmode;

    if (locale == null) {
        return;
    }

    if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.KANJI
                || subset1 == UnicodeBlock.HIRAGANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
            else if (subset1 == UnicodeBlock.KATAKANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
            else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN);   // reserve ROMAN input mode
            setConversionStatus(context, newmode);
        }
    } else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.HANJA
                || subset1 == UnicodeBlock.HANGUL_SYLLABLES
                || subset1 == UnicodeBlock.HANGUL_JAMO
                || subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
                newmode = IME_CMODE_NATIVE;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            setConversionStatus(context, newmode);
        }
    } else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.TRADITIONAL_HANZI
                || subset1 == InputSubset.SIMPLIFIED_HANZI)
                newmode = IME_CMODE_NATIVE;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            setConversionStatus(context, newmode);
        }
    }
}
 
開發者ID:SunburstApps,項目名稱:OpenJSharp,代碼行數:78,代碼來源:WInputMethod.java

示例8: normalize

/**
 * Character Normalization
 * @param ch character
 * @return Normalized character
 */
public static char normalize(char ch) {
    final Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') {
            ch = '\u015f'; // Small S with comma below => with cedilla
        }
        if (ch == '\u021b') {
            ch = '\u0163'; // Small T with comma below => with cedilla
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a'; // Farsi yeh => Arabic yeh
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO
            || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjkMap.containsKey(ch)) {
            ch = cjkMap.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
開發者ID:codelibs,項目名稱:elasticsearch-langfield,代碼行數:49,代碼來源:NGram.java

示例9: setCharacterSubsets

/**
 * Implements InputMethod.setCharacterSubsets for Windows.
 *
 * @see java.awt.im.spi.InputMethod#setCharacterSubsets
 */
public void setCharacterSubsets(Subset[] subsets) {
    if (subsets == null){
        setConversionStatus(context, cmode);
        setOpenStatus(context, open);
        return;
    }

    // Use first subset only. Other subsets in array is ignored.
    // This is restriction of Win32 implementation.
    Subset subset1 = subsets[0];

    Locale locale = getNativeLocale();
    int newmode;

    if (locale == null) {
        return;
    }

    if (locale.getLanguage().equals(Locale.JAPANESE.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.KANJI
                || subset1 == UnicodeBlock.HIRAGANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_FULLSHAPE;
            else if (subset1 == UnicodeBlock.KATAKANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA| IME_CMODE_FULLSHAPE;
            else if (subset1 == InputSubset.HALFWIDTH_KATAKANA)
                newmode = IME_CMODE_NATIVE | IME_CMODE_KATAKANA;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            newmode |= (getConversionStatus(context)&IME_CMODE_ROMAN);   // reserve ROMAN input mode
            setConversionStatus(context, newmode);
        }
    } else if (locale.getLanguage().equals(Locale.KOREAN.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.HANJA
                || subset1 == UnicodeBlock.HANGUL_SYLLABLES
                || subset1 == UnicodeBlock.HANGUL_JAMO
                || subset1 == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO)
                newmode = IME_CMODE_NATIVE;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            setConversionStatus(context, newmode);
        }
    } else if (locale.getLanguage().equals(Locale.CHINESE.getLanguage())) {
        if (subset1 == UnicodeBlock.BASIC_LATIN || subset1 == InputSubset.LATIN_DIGITS) {
            setOpenStatus(context, false);
        } else {
            if (subset1 == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
                || subset1 == InputSubset.TRADITIONAL_HANZI
                || subset1 == InputSubset.SIMPLIFIED_HANZI)
                newmode = IME_CMODE_NATIVE;
            else if (subset1 == InputSubset.FULLWIDTH_LATIN)
                newmode = IME_CMODE_FULLSHAPE;
            else
                return;
            setOpenStatus(context, true);
            setConversionStatus(context, newmode);
        }
    }
}
 
開發者ID:openjdk,項目名稱:jdk7-jdk,代碼行數:77,代碼來源:WInputMethod.java


注:本文中的java.lang.Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS屬性示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。