本文整理汇总了Java中org.mozilla.universalchardet.UniversalDetector.getDetectedCharset方法的典型用法代码示例。如果您正苦于以下问题:Java UniversalDetector.getDetectedCharset方法的具体用法?Java UniversalDetector.getDetectedCharset怎么用?Java UniversalDetector.getDetectedCharset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.mozilla.universalchardet.UniversalDetector
的用法示例。
在下文中一共展示了UniversalDetector.getDetectedCharset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFileCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String getFileCharset(File file) throws IOException {
byte[] buf = new byte[4096];
BufferedInputStream bufferedInputStream = new BufferedInputStream(
new FileInputStream(file));
final UniversalDetector universalDetector = new UniversalDetector(null);
int numberOfBytesRead;
while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0
&& !universalDetector.isDone()) {
universalDetector.handleData(buf, 0, numberOfBytesRead);
}
universalDetector.dataEnd();
String encoding = universalDetector.getDetectedCharset();
universalDetector.reset();
bufferedInputStream.close();
return encoding;
}
示例2: determineCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Override
public Charset determineCharset(byte[] bytes) {
UniversalDetector detector = charsetDetector.get();
try {
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
return Charset.forName(encoding);
}
return null;
} finally {
detector.reset();
}
}
示例3: detectCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detectCharset(InputStream fis) throws IOException {
byte[] buf = new byte[4096];
// (1)
UniversalDetector detector = new UniversalDetector(null);
// (2)
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
// (3)
detector.dataEnd();
// (4)
String encoding = detector.getDetectedCharset();
// (5)
detector.reset();
return encoding;
}
示例4: guessEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* Detect charset encoding of a byte array
*
* @param bytes: the byte array to detect encoding from
* @return the charset encoding
*/
public static String guessEncoding(byte[] bytes) {
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
if (encoding == null || "MACCYRILLIC".equals(encoding)) {
// juniversalchardet incorrectly detects windows-1256 as MACCYRILLIC
// If encoding is MACCYRILLIC or null, we use ICU4J
CharsetMatch detected = new CharsetDetector().setText(bytes).detect();
if (detected != null) {
encoding = detected.getName();
} else {
encoding = "UTF-8";
}
}
return encoding;
}
示例5: createFromEventBody
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static <T> EnrichedEventBodyGeneric createFromEventBody(byte[] payload, boolean isEnriched, Class<T> clazz) throws IOException {
EnrichedEventBodyGeneric enrichedEventBodyGeneric;
if (isEnriched) {
JavaType javaType = JSONStringSerializer.getJavaType(EnrichedEventBodyGeneric.class, clazz);
enrichedEventBodyGeneric = (EnrichedEventBodyGeneric) JSONStringSerializer.fromBytes(payload, javaType);
} else {
// Detecting payload charset
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(payload, 0, payload.length);
detector.dataEnd();
String charset = detector.getDetectedCharset();
detector.reset();
if (charset == null) {
charset = DEFAULT_CHARSET;
}
enrichedEventBodyGeneric = new EnrichedEventBodyGeneric(new String(payload, charset), clazz);
}
return enrichedEventBodyGeneric;
}
示例6: createFromEventBody
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static EnrichedEventBody createFromEventBody(byte[] payload, boolean isEnriched) throws IOException {
EnrichedEventBody enrichedBody;
if (isEnriched) {
enrichedBody = JSONStringSerializer.fromBytes(payload, EnrichedEventBody.class);
} else {
// Detecting payload charset
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(payload, 0, payload.length);
detector.dataEnd();
String charset = detector.getDetectedCharset();
detector.reset();
if (charset == null) {
charset = DEFAULT_CHARSET;
}
enrichedBody = new EnrichedEventBody(new String(payload, charset));
}
return enrichedBody;
}
示例7: testEventCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Test
public void testEventCharset() throws IOException {
String expectedCharset = StandardCharsets.UTF_8.name();
Path path = Paths.get("src/test/resources/notUTFString.txt");
byte[] payload = Files.readAllBytes(path);
EnrichedEventBody message = EnrichedEventBody.createFromEventBody(payload, false);
byte[] output = message.buildEventBody();
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(output, 0, output.length);
detector.dataEnd();
String outputCharset = detector.getDetectedCharset();
detector.reset();
Assert.assertEquals(outputCharset, expectedCharset, "Invalid charset");
}
示例8: detectEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* Detect the encoding of the supplied file.
*
* @see <a href="https://code.google.com/p/juniversalchardet/">Original</a>
* @see <a href="https://github.com/amake/juniversalchardet">Fork</a>
*/
public static String detectEncoding(InputStream stream) throws IOException {
UniversalDetector detector = new UniversalDetector(null);
byte[] buffer = new byte[4096];
int read;
while ((read = stream.read(buffer)) > 0 && !detector.isDone()) {
detector.handleData(buffer, 0, read);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
return encoding;
}
示例9: detectCharacterSet
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* Detects the character set of the input text.
* @param input The input text as a byte array.
* @return The character set of the input text, or null if it cannot be detected.
*/
public static Charset detectCharacterSet(byte[] input) {
if (input == null) {
return null;
}
Charset charset = null;
input = input.clone();
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(input, 0, input.length);
detector.dataEnd();
String detectedCharset = detector.getDetectedCharset();
if (StringUtils.hasText(detectedCharset)) {
try {
charset = Charset.forName(detectedCharset);
} catch (UnsupportedCharsetException e) {
throw new RuntimeException("Detected unsupported character set " + detectedCharset);
}
}
return charset;
}
示例10: detect
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detect(InputStream inputStream) throws IOException {
UniversalDetector detector = Charset.getSingleton()
.getCharsetDetector();
byte[] buf = new byte[4096];
int nread;
while ((nread = inputStream.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
inputStream.close();
if (encoding == null) {
// If none encoding is detected, we assume UTF-8
encoding = UTF8;
}
return encoding;
}
示例11: detectFileCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* 探测文本编码.
*/
public static String detectFileCharset(File file, int detectLength) throws IOException {
String charset = null;
FileInputStream fis = null;
try {
byte[] buf = new byte[detectLength];
fis = new FileInputStream(file);
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
charset = detector.getDetectedCharset();
detector.reset();
} finally {
if (fis != null) {
fis.close();
}
}
return charset;
}
示例12: guessCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String guessCharset(String fileName) throws IOException{
byte[] buf = new byte[4096];
java.io.FileInputStream fis = new java.io.FileInputStream(fileName);
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
Log.d("ConvertUtil",fileName+" detected encoding = " + encoding);
} else {
Log.d("ConvertUtil","No encoding detected = " + encoding);
}
detector.reset();
return encoding;
}
示例13: extractCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* This method extracts the charset from the html source code.
* If the charset is not specified, it is set to UTF-8 by default
* @param is
* @return
*/
public static String extractCharset(InputStream is) throws java.io.IOException {
byte[] buf = new byte[4096];
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
LOGGER.debug("Detected encoding = " + encoding);
} else {
LOGGER.debug("No encoding detected.");
}
detector.reset();
if (encoding != null && CrawlUtils.isValidCharset(encoding)) {
return encoding;
} else {
return DEFAULT_CHARSET;
}
}
示例14: detectEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
private static String detectEncoding(File file) throws IOException {
byte[] buf = new byte[4096];
FileInputStream fis = new FileInputStream(file);
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) detector.handleData(buf, 0, nread);
Util.closeStream(fis);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding == null) encoding = DEFAULT_ENCODING;
return encoding;
}
示例15: getFileEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* 获取文件编码
* @author eko.zhan at Jul 3, 2017 1:54:50 PM
* @param file
* @return
* @throws IOException
*/
public static String getFileEncoding(File file) throws IOException{
UniversalDetector detector = new UniversalDetector(null);
byte[] bytes = FileUtils.readFileToByteArray(file);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
return detector.getDetectedCharset();
}