本文整理汇总了Java中org.mozilla.universalchardet.UniversalDetector.reset方法的典型用法代码示例。如果您正苦于以下问题:Java UniversalDetector.reset方法的具体用法?Java UniversalDetector.reset怎么用?Java UniversalDetector.reset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.mozilla.universalchardet.UniversalDetector
的用法示例。
在下文中一共展示了UniversalDetector.reset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getFileCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String getFileCharset(File file) throws IOException {
byte[] buf = new byte[4096];
BufferedInputStream bufferedInputStream = new BufferedInputStream(
new FileInputStream(file));
final UniversalDetector universalDetector = new UniversalDetector(null);
int numberOfBytesRead;
while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0
&& !universalDetector.isDone()) {
universalDetector.handleData(buf, 0, numberOfBytesRead);
}
universalDetector.dataEnd();
String encoding = universalDetector.getDetectedCharset();
universalDetector.reset();
bufferedInputStream.close();
return encoding;
}
示例2: determineCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Override
public Charset determineCharset(byte[] bytes) {
UniversalDetector detector = charsetDetector.get();
try {
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
return Charset.forName(encoding);
}
return null;
} finally {
detector.reset();
}
}
示例3: detectCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detectCharset(InputStream fis) throws IOException {
byte[] buf = new byte[4096];
// (1)
UniversalDetector detector = new UniversalDetector(null);
// (2)
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
// (3)
detector.dataEnd();
// (4)
String encoding = detector.getDetectedCharset();
// (5)
detector.reset();
return encoding;
}
示例4: guessEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* Detect charset encoding of a byte array
*
* @param bytes: the byte array to detect encoding from
* @return the charset encoding
*/
public static String guessEncoding(byte[] bytes) {
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
if (encoding == null || "MACCYRILLIC".equals(encoding)) {
// juniversalchardet incorrectly detects windows-1256 as MACCYRILLIC
// If encoding is MACCYRILLIC or null, we use ICU4J
CharsetMatch detected = new CharsetDetector().setText(bytes).detect();
if (detected != null) {
encoding = detected.getName();
} else {
encoding = "UTF-8";
}
}
return encoding;
}
示例5: createFromEventBody
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static <T> EnrichedEventBodyGeneric createFromEventBody(byte[] payload, boolean isEnriched, Class<T> clazz) throws IOException {
EnrichedEventBodyGeneric enrichedEventBodyGeneric;
if (isEnriched) {
JavaType javaType = JSONStringSerializer.getJavaType(EnrichedEventBodyGeneric.class, clazz);
enrichedEventBodyGeneric = (EnrichedEventBodyGeneric) JSONStringSerializer.fromBytes(payload, javaType);
} else {
// Detecting payload charset
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(payload, 0, payload.length);
detector.dataEnd();
String charset = detector.getDetectedCharset();
detector.reset();
if (charset == null) {
charset = DEFAULT_CHARSET;
}
enrichedEventBodyGeneric = new EnrichedEventBodyGeneric(new String(payload, charset), clazz);
}
return enrichedEventBodyGeneric;
}
示例6: createFromEventBody
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static EnrichedEventBody createFromEventBody(byte[] payload, boolean isEnriched) throws IOException {
EnrichedEventBody enrichedBody;
if (isEnriched) {
enrichedBody = JSONStringSerializer.fromBytes(payload, EnrichedEventBody.class);
} else {
// Detecting payload charset
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(payload, 0, payload.length);
detector.dataEnd();
String charset = detector.getDetectedCharset();
detector.reset();
if (charset == null) {
charset = DEFAULT_CHARSET;
}
enrichedBody = new EnrichedEventBody(new String(payload, charset));
}
return enrichedBody;
}
示例7: testEventCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Test
public void testEventCharset() throws IOException {
String expectedCharset = StandardCharsets.UTF_8.name();
Path path = Paths.get("src/test/resources/notUTFString.txt");
byte[] payload = Files.readAllBytes(path);
EnrichedEventBody message = EnrichedEventBody.createFromEventBody(payload, false);
byte[] output = message.buildEventBody();
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(output, 0, output.length);
detector.dataEnd();
String outputCharset = detector.getDetectedCharset();
detector.reset();
Assert.assertEquals(outputCharset, expectedCharset, "Invalid charset");
}
示例8: detectEncoding
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* Detect the encoding of the supplied file.
*
* @see <a href="https://code.google.com/p/juniversalchardet/">Original</a>
* @see <a href="https://github.com/amake/juniversalchardet">Fork</a>
*/
public static String detectEncoding(InputStream stream) throws IOException {
UniversalDetector detector = new UniversalDetector(null);
byte[] buffer = new byte[4096];
int read;
while ((read = stream.read(buffer)) > 0 && !detector.isDone()) {
detector.handleData(buffer, 0, read);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
return encoding;
}
示例9: detect
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detect(InputStream inputStream) throws IOException {
UniversalDetector detector = Charset.getSingleton()
.getCharsetDetector();
byte[] buf = new byte[4096];
int nread;
while ((nread = inputStream.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
inputStream.close();
if (encoding == null) {
// If none encoding is detected, we assume UTF-8
encoding = UTF8;
}
return encoding;
}
示例10: detectFileCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* 探测文本编码.
*/
public static String detectFileCharset(File file, int detectLength) throws IOException {
String charset = null;
FileInputStream fis = null;
try {
byte[] buf = new byte[detectLength];
fis = new FileInputStream(file);
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
charset = detector.getDetectedCharset();
detector.reset();
} finally {
if (fis != null) {
fis.close();
}
}
return charset;
}
示例11: guessCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String guessCharset(String fileName) throws IOException{
byte[] buf = new byte[4096];
java.io.FileInputStream fis = new java.io.FileInputStream(fileName);
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
Log.d("ConvertUtil",fileName+" detected encoding = " + encoding);
} else {
Log.d("ConvertUtil","No encoding detected = " + encoding);
}
detector.reset();
return encoding;
}
示例12: extractCharset
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* This method extracts the charset from the html source code.
* If the charset is not specified, it is set to UTF-8 by default
* @param is
* @return
*/
public static String extractCharset(InputStream is) throws java.io.IOException {
byte[] buf = new byte[4096];
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
LOGGER.debug("Detected encoding = " + encoding);
} else {
LOGGER.debug("No encoding detected.");
}
detector.reset();
if (encoding != null && CrawlUtils.isValidCharset(encoding)) {
return encoding;
} else {
return DEFAULT_CHARSET;
}
}
示例13: setLyricFile
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public void setLyricFile(File file) {
if (file == null || !file.exists()) {
reset();
return;
} else if (file.getPath().equals(mCurrentLyricFilePath)) {
return;
} else {
mCurrentLyricFilePath = file.getPath();
reset();
}
try {
FileInputStream fis = new FileInputStream(file);
byte[] buf = new byte[1024];
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
setLyricFile(file, encoding);
} else {
setLyricFile(file, "UTF-8");
}
detector.reset();
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
示例14: setLyricFile
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public void setLyricFile(File file) {
if (file == null || !file.exists()) {
reset();
mCurrentLyricFilePath = "";
return;
} else if (file.getPath().equals(mCurrentLyricFilePath)) {
return;
} else {
mCurrentLyricFilePath = file.getPath();
reset();
}
try {
FileInputStream fis = new FileInputStream(file);
byte[] buf = new byte[1024];
UniversalDetector detector = new UniversalDetector(null);
int nread;
while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
detector.handleData(buf, 0, nread);
}
detector.dataEnd();
String encoding = detector.getDetectedCharset();
if (encoding != null) {
setLyricFile(file, encoding);
} else {
setLyricFile(file, "UTF-8");
}
detector.reset();
fis.close();
} catch (IOException e) {
e.printStackTrace();
}
}
示例15: guessEncodingByMozilla
import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
* 根据字节数组,猜测可能的字符集,如果检测失败,返回utf-8
*
* @param bytes 待检测的字节数组
* @return 可能的字符集,如果检测失败,返回utf-8
*/
public static String guessEncodingByMozilla(byte[] bytes) {
String DEFAULT_ENCODING = "UTF-8";
UniversalDetector detector = new UniversalDetector(null);
detector.handleData(bytes, 0, bytes.length);
detector.dataEnd();
String encoding = detector.getDetectedCharset();
detector.reset();
if (encoding == null) {
encoding = DEFAULT_ENCODING;
}
return encoding;
}