本文整理汇总了Java中java.text.Normalizer类的典型用法代码示例。如果您正苦于以下问题:Java Normalizer类的具体用法?Java Normalizer怎么用?Java Normalizer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Normalizer类属于java.text包,在下文中一共展示了Normalizer类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: encode_UTF8
import java.text.Normalizer; //导入依赖的package包/类
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
* @param e
* A character that does not require encoding if found in the
* string.
*/
private static String encode_UTF8(String s, char e) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isUnreserved(b) || b == e) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
示例2: minimalEncode_UTF8
import java.text.Normalizer; //导入依赖的package包/类
/**
* Encodes a string containing non ASCII characters using an UTF-8 encoder.
*
* @param s
* The string the encode (assuming ASCII characters only)
*/
private static String minimalEncode_UTF8(String s) {
// TODO: Normalizer requires Java 6!
String n = (Normalizer.isNormalized(s, Form.NFKC)) ? s : Normalizer.normalize(s, Form.NFKC);
// convert String to UTF-8
ByteBuffer bb = UTF8.encode(n);
// URI encode
StringBuffer sb = new StringBuffer();
while (bb.hasRemaining()) {
int b = bb.get() & 0xff;
if (isLegal(b)) {
sb.append((char) b);
} else {
appendEscape(sb, (byte) b);
}
}
return sb.toString();
}
示例3: isNormalized
import java.text.Normalizer; //导入依赖的package包/类
/**
* Test if a string is in a given normalization form.
* This is semantically equivalent to source.equals(normalize(source, mode)).
*
* Unlike quickCheck(), this function returns a definitive result,
* never a "maybe".
* For NFD, NFKD, and FCD, both functions work exactly the same.
* For NFC and NFKC where quickCheck may return "maybe", this function will
* perform further tests to arrive at a true/false result.
* @param str the input string to be checked to see if it is normalized
* @param form the normalization form
* @param options the optional features to be enabled.
*/
public static boolean isNormalized(String str, Normalizer.Form form, int options) {
switch (form) {
case NFC:
return (NFC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFD:
return (NFD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKC:
return (NFKC.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
case NFKD:
return (NFKD.quickCheck(str.toCharArray(),0,str.length(),false,NormalizerImpl.getNX(options))==YES);
}
throw new IllegalArgumentException("Unexpected normalization form: " +
form);
}
示例4: translateBytes
import java.text.Normalizer; //导入依赖的package包/类
@Override
public Object translateBytes(byte[] bytes, DataFlavor flavor,
long format, Transferable transferable) throws IOException {
if (format == CF_URL && URL.class.equals(flavor.getRepresentationClass()))
{
String charset = getDefaultTextCharset();
if (transferable != null && transferable.isDataFlavorSupported(javaTextEncodingFlavor)) {
try {
charset = new String((byte[])transferable.getTransferData(javaTextEncodingFlavor), "UTF-8");
} catch (UnsupportedFlavorException cannotHappen) {
}
}
return new URL(new String(bytes, charset));
}
if (format == CF_STRING) {
bytes = Normalizer.normalize(new String(bytes, "UTF8"), Form.NFC).getBytes("UTF8");
}
return super.translateBytes(bytes, flavor, format, transferable);
}
示例5: setWaarde
import java.text.Normalizer; //导入依赖的package包/类
public void setWaarde(final String waarde) {
this.waarde = waarde;
if (waarde == null) {
this.slimZoekenWaarde = null;
} else if (waarde.startsWith("\\")) {
this.exact = true;
this.slimZoekenWaarde = waarde.substring(1);
} else if (waarde.endsWith("*")) {
this.wildcard = true;
this.slimZoekenWaarde = waarde.substring(0, waarde.length() - 1);
} else {
this.slimZoekenWaarde = waarde;
}
if (waarde != null && !this.exact) {
if (!waarde.matches(".*[A-Z].*") && attribuut.isString()) {
this.caseInsensitive = true;
}
String normalizedWaarde = Normalizer.normalize(waarde, Normalizer.Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
if (pattern.matcher(normalizedWaarde).find()) {
this.diakriet = true;
}
}
}
示例6: convertToJavaEnumName
import java.text.Normalizer; //导入依赖的package包/类
/**
* Converteer een naam naar een java enumeratie naam.
* @param javaNameBase naam
* @return enumeratie naam
*/
public static String convertToJavaEnumName(final String javaNameBase) {
if (javaNameBase.startsWith(LITERAL)) {
return StringEscapeUtils.unescapeJava(javaNameBase.replaceAll(String.format("^%s", LITERAL), ""));
} else {
String result = javaNameBase;
// Unaccent
result = Normalizer.normalize(result, Normalizer.Form.NFD);
// Replace whitespace with underscore
result = result.replaceAll("(\\s|-)", "_");
// Uppercase
result = result.toUpperCase();
// Remove unsupported characters
result = result.replaceAll("[^A-Z0-9_]", "");
// Remove duplicate seperators
result = result.replaceAll("_{2,}", "_");
return result;
}
}
示例7: removeDiacritics
import java.text.Normalizer; //导入依赖的package包/类
/**
* Remove diacritics from the specified string.
* @param s
* @return a copy of the specified string with diacritics removed.
*/
public static final String removeDiacritics(String s) {
String n = Normalizer.normalize(s, Form.NFD);
StringBuilder sb = null;
for (int i = 0; i < n.length(); ++i) {
char c = n.charAt(i);
UnicodeBlock b = UnicodeBlock.of(c);
if (UnicodeBlock.COMBINING_DIACRITICAL_MARKS.equals(b) || UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT.equals(b)) {
if (sb == null) {
sb = new StringBuilder(n.length());
sb.append(n.substring(0, i));
}
continue;
}
if (sb != null)
sb.append(c);
}
if (sb == null)
return n;
return sb.toString();
}
示例8: toSlug
import java.text.Normalizer; //导入依赖的package包/类
/**
* Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string, which will be converted.
* @return slug representation of string, which can be used to generate readable and SEO-friendly
* URLs.
*/
public static String toSlug(String input) {
String transliterated = transliterator.transform(input);
String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
示例9: toSlug
import java.text.Normalizer; //导入依赖的package包/类
/**
* Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly
* URLs.
*
* @param input string, which will be converted.
* @return slug representation of string, which can be used to generate readable and SEO-friendly
* URLs.
*/
public String toSlug(String input) {
String transliterated = transliterator.transform(input);
String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-");
String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD);
String slug = NONLATIN.matcher(normalized).replaceAll("");
slug = EDGESDHASHES.matcher(slug).replaceAll("");
return slug.toLowerCase(Locale.ENGLISH);
}
示例10: processData
import java.text.Normalizer; //导入依赖的package包/类
private String processData(String input) {
// to extract all alphabets from string
String withoutAccent = Normalizer.normalize(input, Normalizer.Form.NFD);
String output = withoutAccent.replaceAll("[^a-zA-Z ]", "");
return output;
//return s.replaceAll("[^A-Za-z]+", "");
}
示例11: convertToAlphaNumerics
import java.text.Normalizer; //导入依赖的package包/类
public static String convertToAlphaNumerics(String value) {
logger.debug("Before : " + value);
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
value = value.replaceAll("[^-_a-zA-Z0-9\\s]", "").replace(" ", "");
logger.debug("After : " + value);
return value;
}
示例12: deAccent
import java.text.Normalizer; //导入依赖的package包/类
public static String deAccent(String value) {
logger.debug("Before : " + value);
String nfdNormalizedString = Normalizer.normalize(value, Form.NFD);
Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");
value = pattern.matcher(nfdNormalizedString).replaceAll("");
logger.debug("After : " + value);
return value;
}
示例13: setTag
import java.text.Normalizer; //导入依赖的package包/类
public void setTag(String tag) {
if (tag != null) {
tag = tag.toLowerCase();
tag = Normalizer.normalize(tag, Normalizer.Form.NFD);
tag = tag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
tag = tag.replaceAll("[^a-z0-9]", "");
}
this.tag = tag;
}
示例14: setFullTag
import java.text.Normalizer; //导入依赖的package包/类
public void setFullTag(String fullTag) {
if (fullTag != null) {
fullTag = fullTag.toLowerCase();
fullTag = Normalizer.normalize(fullTag, Normalizer.Form.NFD);
fullTag = fullTag.replaceAll("[\\p{InCombiningDiacriticalMarks}]", "");
fullTag = fullTag.replaceAll("[^a-z0-9-]", "");
}
this.fullTag = fullTag;
}
示例15: convertToAlphaNumerics
import java.text.Normalizer; //导入依赖的package包/类
public static String convertToAlphaNumerics(String value, Integer countApp)
throws UnsupportedEncodingException {
value = new String(value.getBytes("ISO-8859-1"), "UTF-8");
value = Normalizer.normalize(value, Form.NFD);
value = value.replaceAll("[^\\p{ASCII}]", "")
.replaceAll("[^a-zA-Z0-9\\s]", "").replace(" ", "");
if (value.equalsIgnoreCase("")) {
value = "default" + countApp;
}
return value;
}