当前位置: 首页>>代码示例>>Java>>正文


Java UnicodeBlock.LATIN_1_SUPPLEMENT属性代码示例

本文整理汇总了Java中java.lang.Character.UnicodeBlock.LATIN_1_SUPPLEMENT属性的典型用法代码示例。如果您正苦于以下问题:Java UnicodeBlock.LATIN_1_SUPPLEMENT属性的具体用法?Java UnicodeBlock.LATIN_1_SUPPLEMENT怎么用?Java UnicodeBlock.LATIN_1_SUPPLEMENT使用的例子?那么, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在java.lang.Character.UnicodeBlock的用法示例。


在下文中一共展示了UnicodeBlock.LATIN_1_SUPPLEMENT属性的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: normalize

/**
 * Character Normalization
 *
 * @param ch character to normalize
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
开发者ID:malcolmgreaves,项目名称:language-detection,代码行数:35,代码来源:NGram.java

示例2: normalize

/**
 * Character Normalization
 * @param ch
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
开发者ID:deezer,项目名称:weslang,代码行数:34,代码来源:NGram.java

示例3: normalize

public static char normalize(char c) {
    char ch = c;
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) {
            ch = cjk_map.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
开发者ID:jprante,项目名称:elasticsearch-plugin-bundle,代码行数:36,代码来源:NGram.java

示例4: isLatinUnicodeBlock

private static boolean isLatinUnicodeBlock(UnicodeBlock unicodeBlock) {
    return unicodeBlock == UnicodeBlock.BASIC_LATIN ||
            unicodeBlock == UnicodeBlock.LATIN_1_SUPPLEMENT ||
            unicodeBlock == UnicodeBlock.LATIN_EXTENDED_A ||
            unicodeBlock == UnicodeBlock.LATIN_EXTENDED_B ||
            unicodeBlock == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL;
}
 
开发者ID:SilentCircle,项目名称:silent-contacts-android,代码行数:7,代码来源:NameSplitter.java

示例5: normalize

public static char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) {
            ch = cjk_map.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
开发者ID:jprante,项目名称:elasticsearch-analysis-german,代码行数:35,代码来源:NGram.java

示例6: classify

/**
 * Given a unicode block object, return corresponding language constant.
 * If the block is not recognized, returns zero.  Note that as there
 * is no separate ARABIC block in Character, this case must
 * be specially handled by the caller; EASTERN_ARABIC is preferred when
 * both are specified.
 * @param b the unicode block to classify
 * @return the language constant, or zero if not recognized
 */
private int classify(UnicodeBlock b)
{
  if (b == null)
    return 0;
  // ARABIC is handled by the caller; from testing we know
  // that EASTERN_ARABIC takes precedence.
  if (b == UnicodeBlock.ARABIC)
    return EASTERN_ARABIC;
  if (b == UnicodeBlock.BENGALI)
    return BENGALI;
  if (b == UnicodeBlock.DEVANAGARI)
    return DEVANAGARI;
  if (b == UnicodeBlock.ETHIOPIC)
    return ETHIOPIC;
  if (b == UnicodeBlock.BASIC_LATIN
      || b == UnicodeBlock.LATIN_1_SUPPLEMENT
      || b == UnicodeBlock.LATIN_EXTENDED_A
      || b == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL
      || b == UnicodeBlock.LATIN_EXTENDED_B)
    return EUROPEAN;
  if (b == UnicodeBlock.GUJARATI)
    return GUJARATI;
  if (b == UnicodeBlock.GURMUKHI)
    return GURMUKHI;
  if (b == UnicodeBlock.KANNADA)
    return KANNADA;
  if (b == UnicodeBlock.KHMER)
    return KHMER;
  if (b == UnicodeBlock.LAO)
    return LAO;
  if (b == UnicodeBlock.MALAYALAM)
    return MALAYALAM;
  if (b == UnicodeBlock.MONGOLIAN)
    return MONGOLIAN;
  if (b == UnicodeBlock.MYANMAR)
    return MYANMAR;
  if (b == UnicodeBlock.ORIYA)
    return ORIYA;
  if (b == UnicodeBlock.TAMIL)
    return TAMIL;
  if (b == UnicodeBlock.TELUGU)
    return TELUGU;
  if (b == UnicodeBlock.THAI)
    return THAI;
  if (b == UnicodeBlock.TIBETAN)
    return TIBETAN;
  return 0;
}
 
开发者ID:vilie,项目名称:javify,代码行数:57,代码来源:NumericShaper.java

示例7: normalize

/**
 * Character Normalization
 * @param ch character
 * @return Normalized character
 */
public static char normalize(char ch) {
    final Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') {
            ch = '\u015f'; // Small S with comma below => with cedilla
        }
        if (ch == '\u021b') {
            ch = '\u0163'; // Small T with comma below => with cedilla
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a'; // Farsi yeh => Arabic yeh
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO
            || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjkMap.containsKey(ch)) {
            ch = cjkMap.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
开发者ID:codelibs,项目名称:elasticsearch-langfield,代码行数:49,代码来源:NGram.java


注:本文中的java.lang.Character.UnicodeBlock.LATIN_1_SUPPLEMENT属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。