本文整理汇总了Java中java.lang.Character.UnicodeBlock.of方法的典型用法代码示例。如果您正苦于以下问题:Java UnicodeBlock.of方法的具体用法?Java UnicodeBlock.of怎么用?Java UnicodeBlock.of使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类java.lang.Character.UnicodeBlock
的用法示例。
在下文中一共展示了UnicodeBlock.of方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: removeDiacritics
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
/**
* Remove diacritics from the specified string.
* @param s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = Normalizer.normalize(s, Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.charAt(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0, i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
示例2: utf8ToUnicode
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
/**
* utf-8 转unicode
*
* @param inStr
* @return String
*/
public static String utf8ToUnicode(String inStr) {
char[] myBuffer = inStr.toCharArray();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
if (ub == UnicodeBlock.BASIC_LATIN) {
sb.append(myBuffer[i]);
} else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
int j = (int) myBuffer[i] - 65248;
sb.append((char) j);
} else {
short s = (short) myBuffer[i];
String hexS = Integer.toHexString(s);
String unicode = "\\u" + hexS;
sb.append(unicode.toLowerCase());
}
}
return sb.toString().replaceAll("ffff", "");
}
示例3: isChinese
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
public boolean isChinese(char c) {
Set<UnicodeBlock> chineseUnicodeBlocks = new HashSet<UnicodeBlock>();
chineseUnicodeBlocks.add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
// add(UnicodeBlock.CJK_COMPATIBILITY);
// add(UnicodeBlock.CJK_COMPATIBILITY_FORMS);
// add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
// add(UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT);
// add(UnicodeBlock.CJK_RADICALS_SUPPLEMENT);
// add(UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION);
// add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
// add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
// add(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B);
// add(UnicodeBlock.KANGXI_RADICALS);
// add(UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS);
UnicodeBlock block = UnicodeBlock.of(c);
return chineseUnicodeBlocks.contains(block);
}
示例4: tokenize
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
@Override
public List<String> tokenize(String text) {
int beginIndex = -1;
UnicodeBlock current = null;
List<String> list = new LinkedList<>();
for (int i = 0; i < text.length(); i++) {
UnicodeBlock block = UnicodeBlock.of(text.charAt(i));
if (current != block) {
if (beginIndex >= 0) {
list.add(text.substring(beginIndex, i));
}
beginIndex = i;
current = block;
}
}
if (beginIndex >= 0) {
list.add(text.substring(beginIndex));
}
return list;
}
示例5: utf8ToUnicode
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
/**
* utf-8 转换成 unicode
*
* @param inStr
* @return
* @author fanhui
* 2007-3-15
*/
public static String utf8ToUnicode(String inStr) {
char[] myBuffer = inStr.toCharArray();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < inStr.length(); i++) {
UnicodeBlock ub = UnicodeBlock.of(myBuffer[i]);
if (ub == UnicodeBlock.BASIC_LATIN) {
//英文及数字等
sb.append(myBuffer[i]);
} else if (ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
//全角半角字符
int j = (int) myBuffer[i] - 65248;
sb.append((char) j);
} else {
//汉字
short s = (short) myBuffer[i];
String hexS = Integer.toHexString(s);
String unicode = "\\u" + hexS;
sb.append(unicode.toLowerCase());
}
}
return sb.toString();
}
示例6: of
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
static public TTUnicodeRange of(long a_unicode) {
initList();
TTUnicodeRange retval = null;
UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
if (block == null) {
return retval;
}
int i;
for (i = 0; i < s_list.size(); i++) {
TTUnicodeRange range = s_list.get(i);
if (range.m_block.equals(block)) {
return range;
}
}
return retval;
}
示例7: guessCJKNameStyle
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
private int guessCJKNameStyle(String name, int offset) {
int length = name.length();
while (offset < length) {
int codePoint = Character.codePointAt(name, offset);
if (Character.isLetter(codePoint)) {
UnicodeBlock unicodeBlock = UnicodeBlock.of(codePoint);
if (isJapanesePhoneticUnicodeBlock(unicodeBlock)) {
return FullNameStyle.JAPANESE;
}
if (isKoreanUnicodeBlock(unicodeBlock)) {
return FullNameStyle.KOREAN;
}
}
offset += Character.charCount(codePoint);
}
return FullNameStyle.CJK;
}
示例8: recognizeLanguage
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
public static int recognizeLanguage(String text) {
int kanCount = 0;
int tamCount = 0;
int digitCount = 0;
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
UnicodeBlock ub = UnicodeBlock.of(c);
if (ub == UnicodeBlock.KANNADA) {
kanCount++;
} else if (ub == UnicodeBlock.TAMIL) {
tamCount++;
} else if (Character.isDigit(c)) {
digitCount++;
}
}
if (kanCount == 0 && tamCount == 0) {
if (digitCount > 0) {
return PREVIOUS_LANGUAGE;
}
return LANGUAGE_UNKNOWN;
}
if (tamCount > kanCount) {
PREVIOUS_LANGUAGE = LANGUAGE_TAMIL;
return LANGUAGE_TAMIL;
} else {
PREVIOUS_LANGUAGE = LANGUAGE_KANNADA;
return LANGUAGE_KANNADA;
}
}
示例9: testString
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
@Test
public void testString(){
char[] chars = "ㄱㄴㄷㅏㅠㅠ한[email protected]#$%^&*()_".toCharArray();
for (int i = 0; i < chars.length; i++) {
char ch = chars[i];
UnicodeBlock block = UnicodeBlock.of(ch);
String type = TypeTokenizer.getType(ch);
System.out.println(i+ " : " + ch +" : " + type + " : "+block);
}
}
示例10: testAll
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
@Test
public void testAll(){
for (int i = 0; i < 1000; i++) {
char ch = (char) i;
UnicodeBlock block = UnicodeBlock.of(ch);
String type = TypeTokenizer.getType(ch);
if(type == TypeTokenizer.UNCATEGORIZED){
System.out.println(i+ " : " + ch +" : " + type + " : "+block);
}
}
}
示例11: toUseGlyphRenderer
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
protected boolean toUseGlyphRenderer(JRPrintText text)
{
String value = styledTextUtil.getTruncatedText(text);
if (value == null)
{
return false;
}
if (glyphRendererBlocks.isEmpty())
{
return false;
}
int charCount = value.length();
char[] chars = new char[charCount];
value.getChars(0, charCount, chars, 0);
for (char c : chars)
{
UnicodeBlock block = UnicodeBlock.of(c);
if (glyphRendererBlocks.contains(block))
{
if (log.isTraceEnabled())
{
log.trace("found character in block " + block + ", using the glyph renderer");
}
return true;
}
}
return false;
}
示例12: isLatinLetter
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
/**
* Helper method to determine if a character is a Latin-script letter or not. For our purposes,
* combining marks should also return true since we assume they have been added to a preceding
* Latin character.
*/
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
// Combining marks are a subset of non-spacing-mark.
if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
return false;
}
UnicodeBlock block = UnicodeBlock.of(letter);
return block.equals(UnicodeBlock.BASIC_LATIN)
|| block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_A)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
|| block.equals(UnicodeBlock.LATIN_EXTENDED_B)
|| block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}
示例13: processInput
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
@Override
protected CharSequence processInput(final CharSequence input) {
final StringBuilder buf = new StringBuilder(input.length());
char prev = 0;
for (int pos = 0; pos < input.length(); pos++) {
final char c = input.charAt(pos);
switch (c) {
case U002D:
case UFF0D:
case U2010:
case U2011:
case U2012:
case U2013:
case U2014:
case U2015:
case U207B:
case U208B:
case U30FC:
if (prev != 0) {
final UnicodeBlock block = UnicodeBlock.of(prev);
if (block == UnicodeBlock.HIRAGANA
|| block == UnicodeBlock.KATAKANA
|| block == UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS) {
buf.append(replacement);
} else {
buf.append(c);
}
} else {
buf.append(c);
}
break;
default:
buf.append(c);
break;
}
prev = c;
}
return buf;
}
示例14: findWordStart
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
private static int findWordStart(CharSequence text, int start) {
if ( text.length() <= start ){
return start;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));
for (; start > 0; start--) {
char c = text.charAt(start - 1);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return start;
}
示例15: findWordEnd
import java.lang.Character.UnicodeBlock; //导入方法依赖的package包/类
private static int findWordEnd(CharSequence text, int end) {
int len = text.length();
if ( len <= end ){
return end;
}
UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));
for (; end < len; end++) {
char c = text.charAt(end);
UnicodeBlock cb = UnicodeBlock.of(c);
if ( c0 == UnicodeBlock.BASIC_LATIN ){
int type = Character.getType(c);
if (c != '\'' &&
type != Character.UPPERCASE_LETTER &&
type != Character.LOWERCASE_LETTER &&
type != Character.TITLECASE_LETTER &&
type != Character.MODIFIER_LETTER &&
type != Character.DECIMAL_DIGIT_NUMBER) {
break;
}
}else if ( c0 != cb ){
break;
}
}
return end;
}