本文整理汇总了Java中com.ibm.icu.text.CharsetMatch.getName方法的典型用法代码示例。如果您正苦于以下问题:Java CharsetMatch.getName方法的具体用法?Java CharsetMatch.getName怎么用?Java CharsetMatch.getName使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类com.ibm.icu.text.CharsetMatch
的用法示例。
在下文中一共展示了CharsetMatch.getName方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: detectEncoding
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
/**
* 利用 icu4j 探测输入流编码,只能探测文本类型的输入流
* -
* 抛弃 juniversalchardet
*
* @param in
* @return
* @throws IOException
*/
public static Charset detectEncoding(InputStream in) throws IOException {
final CharsetDetector detector = new CharsetDetector();
detector.setText(in);
final CharsetMatch charsetMatch = detector.detect();
if (charsetMatch == null) {
log.info("Cannot detect source charset.");
return null;
}
//This is an integer from 0 to 100. The higher the value, the more confidence
//探测的相似度在 1~100 之间,相似度越高结果越准确。
int confidence = charsetMatch.getConfidence();
final String name = charsetMatch.getName();
log.info("CharsetMatch: {} ({}% 相似度,相似度小于 50% 时,可能编码无法判断。)", name, confidence);
//打印该文本编码,所有可能性
// CharsetMatch[] matches = detector.detectAll();
// System.out.println("All possibilities : " + Arrays.asList(matches));
return Charset.forName(name);
}
示例2: getText
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
/**
* Extract text to be indexed
*/
public static String getText(String mimeType, String encoding, InputStream isContent) throws IOException {
BufferedInputStream bis = new BufferedInputStream(isContent);
TextExtractor te = engine.get(mimeType);
String text = null;
if (te != null) {
if (mimeType.startsWith("text/") && encoding == null) {
CharsetDetector detector = new CharsetDetector();
detector.setText(bis);
CharsetMatch cm = detector.detect();
encoding = cm.getName();
}
text = te.extractText(bis, mimeType, encoding);
} else {
throw new IOException("Full text indexing of '" + mimeType + "' is not supported");
}
IOUtils.closeQuietly(bis);
return text;
}
示例3: showEncode
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
protected String showEncode(Document doc) {
String charsetName = "";
try {
String convertedPlainText = doc.getText(0, doc.getLength());
try (InputStream is = convertStringToStream(convertedPlainText)) {
CharsetMatch charsetMatch = new CharsetDetector().setText(is).detect();
charsetName = charsetMatch.getName();
charsetName = charsetName != null ? charsetName : "NULL";
if (isPoorMatch(charsetMatch.getConfidence())) {
charsetName = verifyPossibleUtf8(charsetName, is);
}
charsetName += showByteOfMark(is);
}
} catch (BadLocationException | IOException ex) {
Exceptions.printStackTrace(ex);
}
return charsetName;
}
示例4: guessEncoding
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
/**
* Detect charset encoding of a byte array
*
* @param bytes: the byte array to detect encoding from
* @return the charset encoding
*/
public static String guessEncoding(byte[] bytes) {
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
if (encoding == null || "MACCYRILLIC".equals(encoding)) {
// juniversalchardet incorrectly detects windows-1256 as MACCYRILLIC
// If encoding is MACCYRILLIC or null, we use ICU4J
CharsetMatch detected = new CharsetDetector().setText(bytes).detect();
if (detected != null) {
encoding = detected.getName();
} else {
encoding = "UTF-8";
}
}
return encoding;
}
示例5: getCharset
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
/**
* Uses ICU4J to determine the charset of the given InputStream.
*
* @param input
* @return Detected charset name; null if not detected.
* @throws IOException
* @should detect charset correctly
*/
public static String getCharset(InputStream input) throws IOException {
CharsetDetector cd = new CharsetDetector();
try (BufferedInputStream bis = new BufferedInputStream(input)) {
cd.setText(bis);
CharsetMatch cm = cd.detect();
if (cm != null) {
return cm.getName();
}
}
return null;
}
示例6: ibmICU4j
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
private String ibmICU4j(byte[] bytes) {
CharsetDetector charsetDetector = new CharsetDetector();
charsetDetector.setText(bytes);
CharsetMatch charsetMatch = charsetDetector.detect();
String charset = charsetMatch.getName();
return charset;
}
示例7: suggestEncoding
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
protected String suggestEncoding(final byte[] bytes) {
final CharsetDetector cd = new CharsetDetector();
cd.setText(bytes);
final CharsetMatch charsetMatch = cd.detect();
final String charSet = charsetMatch.getName();
final int confidence = charsetMatch.getConfidence();
logger.info("CharsetMatch: {} ({}% confidence)", charSet, confidence);
return charSet;
}
示例8: autoDetectEncoding
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
public String autoDetectEncoding(final byte[] bytes) {
final CharsetDetector cd = new CharsetDetector();
cd.setText(bytes);
final CharsetMatch charsetMatch = cd.detect();
final String charSet = charsetMatch.getName();
final int confidence = charsetMatch.getConfidence();
logger.info("CharsetMatch: {} ({}% confidence)", charSet, confidence);
setSelectedItem(charSet);
return charSet;
}
示例9: readFile
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
private BufferedReader readFile(File file)
throws UnsupportedEncodingException, FileNotFoundException {
// InputStreamReader isr = new InputStreamReader(new
// FileInputStream(file));
// System.out.println(isr.getEncoding());
// System.exit(0);
String charSet = "UTF-8";
final BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(file));
final CharsetDetector cd = new CharsetDetector();
try {
cd.setText(bis);
final CharsetMatch cm = cd.detect();
if (cm != null && cm.getName() == "ISO-8859-1") {
charSet = cm.getName();
}
} catch (final IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
// System.out.println(charSet);
// String charSet = "UTF-8"; //"ISO-8859-1";
final BufferedReader bf = new BufferedReader(new InputStreamReader(
new FileInputStream(file), charSet));
return bf;
}
示例10: detectEncoding
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
/**
* Detect encoding by analyzing characters in the array
*/
public static Charset detectEncoding(byte[] bytes) {
String encoding = "UTF-8";
CharsetDetector detector = new CharsetDetector();
detector.setText(bytes);
CharsetMatch match = detector.detect();
if (match != null) {
encoding = match.getName();
}
if (encoding.equalsIgnoreCase("ISO-8859-1")) {
encoding = "windows-1252";
}
return Charset.forName(encoding);
}
示例11: uploadHlog
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
public String uploadHlog(FileTransfer file,String fileName, Integer type){
if(type == 3 ){//上传数据同步文件
return uploadDataSyncFile(file,fileName);
}
String msg = "";
BufferedReader br = null;//该buffreredReader是对ByteArrayInputStream的封装 不需要关闭
ByteArrayOutputStream byteOut = null;
try {
String extensions = fileName.substring(fileName.lastIndexOf("."));// 后缀名
if (fileName.indexOf(".") <= 0||!extensions.equals(".csv")) {
msg = "文件类型不正确!";
}
int length = 0;
byte[] buff = new byte[1024];
byteOut = new ByteArrayOutputStream();
InputStream in = file.getInputStream();
while((length = in.read(buff)) != -1){
byteOut.write(buff,0,length);
}
byte[] fileBytes = byteOut.toByteArray();
byteOut = null;
// //判断上传文件的编码
CharsetDetector detec = new CharsetDetector();
detec.setText(fileBytes);
CharsetMatch match = detec.detect();
if(match == null){
throw new Exception("未知的文件编码");
}
String encoding = match.getName();
logger.info("-------- file["+fileName+"] encoding:"+encoding+" --------");
br = new BufferedReader(new InputStreamReader(new ByteArrayInputStream(fileBytes),encoding));
String[] mapping = DATA_MAPPING.get(type);
Strategy[] strategies = PARSE_STRATEGIES.get(type);
List<Hlog> hlogList = new ArrayList<Hlog>();
br.readLine();//过滤表头
if (null != mapping) {
String line = "";
while ((line = br.readLine())!=null) {
String[] values = line.split(",");
Hlog hlog= new Hlog();
hlog.setTstat((short)2);//交易状态为成功
hlog.setType((short)5);//增值业务
hlog.setMid("325");//商户号 325
BeanWrapper bw = new BeanWrapperImpl(hlog);
for (int i = 0;i<mapping.length;i++) {
String fieldName = mapping[i];
if(StringUtils.isNotBlank(fieldName)){
Strategy stra = strategies[i];
String value = values[i].trim();
if(stra != null){
stra.setValue(fieldName,value,bw);//需要自定义转换
}else if(StringUtils.isNotBlank(value)){
bw.setPropertyValue(fieldName, value);//直接set
}else{
throw new Exception("["+fieldName+"]不能为空");
}
}
}
hlogList.add(hlog);
}
fileBytes = null;
if (!hlogList.isEmpty()) {
int count = dao.batchAdd(hlogList);
if (count != 0) {
msg = "上传成功,共上传 " + count + " 条记录";
}else{
msg = "上传失败";
}
}else{
msg = "上传失败,上传文件中没有订单数据";
}
}else{
msg = "上传失败,未知的业务类型";
}
} catch (Exception e) {
msg = "上传失败";
LogUtil.printErrorLog(getClass().getCanonicalName(), "uploadHlog", "fileName="+fileName+" type="+type, e);
}
return msg;
}
示例12: ibmICU4j
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
private String ibmICU4j(byte[] bytes) {
CharsetDetector charsetDetector = new CharsetDetector();
charsetDetector.setText(bytes);
CharsetMatch charsetMatch = charsetDetector.detect();
return charsetMatch.getName();
}
示例13: detect
import com.ibm.icu.text.CharsetMatch; //导入方法依赖的package包/类
public static String detect(CharsetDetector detector) {
final CharsetMatch match = detector.detect();
return match == null ? null : match.getName();
}