当前位置: 首页>>代码示例>>Java>>正文


Java UniversalDetector.getDetectedCharset方法代码示例

本文整理汇总了Java中org.mozilla.universalchardet.UniversalDetector.getDetectedCharset方法的典型用法代码示例。如果您正苦于以下问题:Java UniversalDetector.getDetectedCharset方法的具体用法?Java UniversalDetector.getDetectedCharset怎么用?Java UniversalDetector.getDetectedCharset使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.mozilla.universalchardet.UniversalDetector的用法示例。


在下文中一共展示了UniversalDetector.getDetectedCharset方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getFileCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String getFileCharset(File file) throws IOException {
	byte[] buf = new byte[4096];
	BufferedInputStream bufferedInputStream = new BufferedInputStream(
			new FileInputStream(file));
	final UniversalDetector universalDetector = new UniversalDetector(null);
	int numberOfBytesRead;
	while ((numberOfBytesRead = bufferedInputStream.read(buf)) > 0
			&& !universalDetector.isDone()) {
		universalDetector.handleData(buf, 0, numberOfBytesRead);
	}
	universalDetector.dataEnd();
	String encoding = universalDetector.getDetectedCharset();
	universalDetector.reset();
	bufferedInputStream.close();
	return encoding;
}
 
开发者ID:simbest,项目名称:simbest-cores,代码行数:17,代码来源:CharsetUtil.java

示例2: determineCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Override
public Charset determineCharset(byte[] bytes) {
    UniversalDetector detector = charsetDetector.get();
    try {
        detector.handleData(bytes, 0, bytes.length);
        detector.dataEnd();

        String encoding = detector.getDetectedCharset();
        if (encoding != null) {
            return Charset.forName(encoding);
        }

        return null;
    } finally {
        detector.reset();
    }
}
 
开发者ID:goldmansachs,项目名称:obevo,代码行数:18,代码来源:DetectCharsetStrategy.java

示例3: detectCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detectCharset(InputStream fis) throws IOException {
	byte[] buf = new byte[4096];
	// (1)
	UniversalDetector detector = new UniversalDetector(null);
	// (2)
	int nread;
	while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
		detector.handleData(buf, 0, nread);
	}
	// (3)
	detector.dataEnd();
	// (4)
	String encoding = detector.getDetectedCharset();
	// (5)
	detector.reset();
	return encoding;
}
 
开发者ID:NewTranx,项目名称:newtranx-utils,代码行数:18,代码来源:DetectCharset.java

示例4: guessEncoding

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * Detect charset encoding of a byte array
 * 
 * @param bytes: the byte array to detect encoding from
 * @return the charset encoding
 */
public static String guessEncoding(byte[] bytes) {
	UniversalDetector detector = new UniversalDetector(null);

	detector.handleData(bytes, 0, bytes.length);
	detector.dataEnd();

	String encoding = detector.getDetectedCharset();
	detector.reset();

	if (encoding == null || "MACCYRILLIC".equals(encoding)) {
		// juniversalchardet incorrectly detects windows-1256 as MACCYRILLIC
		// If encoding is MACCYRILLIC or null, we use ICU4J
		CharsetMatch detected = new CharsetDetector().setText(bytes).detect();
		if (detected != null) {
			encoding = detected.getName();
		} else {
			encoding = "UTF-8";
		}
	}

	return encoding;
}
 
开发者ID:dnbn,项目名称:submerge,代码行数:29,代码来源:FileUtils.java

示例5: createFromEventBody

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static <T> EnrichedEventBodyGeneric createFromEventBody(byte[] payload, boolean isEnriched, Class<T> clazz) throws IOException {

        EnrichedEventBodyGeneric enrichedEventBodyGeneric;

        if (isEnriched) {
            JavaType javaType = JSONStringSerializer.getJavaType(EnrichedEventBodyGeneric.class, clazz);
            enrichedEventBodyGeneric = (EnrichedEventBodyGeneric) JSONStringSerializer.fromBytes(payload, javaType);
        } else {
            // Detecting payload charset
            UniversalDetector detector = new UniversalDetector(null);
            detector.handleData(payload, 0, payload.length);
            detector.dataEnd();
            String charset = detector.getDetectedCharset();
            detector.reset();

            if (charset == null) {
                charset = DEFAULT_CHARSET;
            }
           enrichedEventBodyGeneric = new EnrichedEventBodyGeneric(new String(payload, charset), clazz);
        }

        return enrichedEventBodyGeneric;
    }
 
开发者ID:keedio,项目名称:flume-enrichment-interceptor-skeleton,代码行数:24,代码来源:EnrichedEventBodyGeneric.java

示例6: createFromEventBody

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static EnrichedEventBody createFromEventBody(byte[] payload, boolean isEnriched) throws IOException {

        EnrichedEventBody enrichedBody;

        if (isEnriched) {
            enrichedBody = JSONStringSerializer.fromBytes(payload, EnrichedEventBody.class);
        } else {
            // Detecting payload charset
            UniversalDetector detector = new UniversalDetector(null);
            detector.handleData(payload, 0, payload.length);
            detector.dataEnd();
            String charset = detector.getDetectedCharset();
            detector.reset();

            if (charset == null) {
                charset = DEFAULT_CHARSET;
            }
            enrichedBody = new EnrichedEventBody(new String(payload, charset));
        }

        return enrichedBody;
    }
 
开发者ID:keedio,项目名称:flume-enrichment-interceptor-skeleton,代码行数:23,代码来源:EnrichedEventBody.java

示例7: testEventCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
@Test
public void testEventCharset() throws IOException {
    String expectedCharset = StandardCharsets.UTF_8.name();

    Path path = Paths.get("src/test/resources/notUTFString.txt");
    byte[] payload = Files.readAllBytes(path);

    EnrichedEventBody message = EnrichedEventBody.createFromEventBody(payload, false);
    byte[] output = message.buildEventBody();

    UniversalDetector detector = new UniversalDetector(null);
    detector.handleData(output, 0, output.length);
    detector.dataEnd();
    String outputCharset = detector.getDetectedCharset();
    detector.reset();

    Assert.assertEquals(outputCharset, expectedCharset, "Invalid charset");
}
 
开发者ID:keedio,项目名称:flume-enrichment-interceptor-skeleton,代码行数:19,代码来源:EnrichedEventBodyTest.java

示例8: detectEncoding

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * Detect the encoding of the supplied file.
 *
 * @see <a href="https://code.google.com/p/juniversalchardet/">Original</a>
 * @see <a href="https://github.com/amake/juniversalchardet">Fork</a>
 */
public static String detectEncoding(InputStream stream) throws IOException {
    UniversalDetector detector = new UniversalDetector(null);

    byte[] buffer = new byte[4096];
    int read;
    while ((read = stream.read(buffer)) > 0 && !detector.isDone()) {
        detector.handleData(buffer, 0, read);
    }

    detector.dataEnd();

    String encoding = detector.getDetectedCharset();
    detector.reset();

    return encoding;
}
 
开发者ID:miurahr,项目名称:tmpotter,代码行数:23,代码来源:EncodingDetector.java

示例9: detectCharacterSet

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * Detects the character set of the input text.
 * @param input The input text as a byte array.
 * @return The character set of the input text, or null if it cannot be detected.
 */
public static Charset detectCharacterSet(byte[] input) {
    if (input == null) {
        return null;
    }

    Charset charset = null;
    input = input.clone();
    UniversalDetector detector = new UniversalDetector(null);
    detector.handleData(input, 0, input.length);
    detector.dataEnd();
    String detectedCharset = detector.getDetectedCharset();
    if (StringUtils.hasText(detectedCharset)) {
        try {
            charset = Charset.forName(detectedCharset);
        } catch (UnsupportedCharsetException e) {
            throw new RuntimeException("Detected unsupported character set " + detectedCharset);
        }
    }
    return charset;
}
 
开发者ID:SEEG-Oxford,项目名称:ABRAID-MP,代码行数:26,代码来源:CharacterSetUtils.java

示例10: detect

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String detect(InputStream inputStream) throws IOException {
	UniversalDetector detector = Charset.getSingleton()
			.getCharsetDetector();
	byte[] buf = new byte[4096];
	int nread;
	while ((nread = inputStream.read(buf)) > 0 && !detector.isDone()) {
		detector.handleData(buf, 0, nread);
	}
	detector.dataEnd();
	String encoding = detector.getDetectedCharset();
	detector.reset();
	inputStream.close();
	if (encoding == null) {
		// If none encoding is detected, we assume UTF-8
		encoding = UTF8;
	}
	return encoding;
}
 
开发者ID:bonigarcia,项目名称:dualsub,代码行数:19,代码来源:Charset.java

示例11: detectFileCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * 探测文本编码.
 */
public static String detectFileCharset(File file, int detectLength) throws IOException {
	String charset = null;
	FileInputStream fis = null;
	try {
		byte[] buf = new byte[detectLength];
		fis = new FileInputStream(file);
		UniversalDetector detector = new UniversalDetector(null);
		int nread;
		while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
			detector.handleData(buf, 0, nread);
		}
		detector.dataEnd();
		charset = detector.getDetectedCharset();
		detector.reset();
	} finally {
		if (fis != null) {
			fis.close();
		}
	}
	return charset;
}
 
开发者ID:baishui2004,项目名称:common_gui_tools,代码行数:25,代码来源:JUniversalChardet.java

示例12: guessCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
public static String guessCharset(String fileName) throws IOException{
	 byte[] buf = new byte[4096];
	    java.io.FileInputStream fis = new java.io.FileInputStream(fileName);


	    UniversalDetector detector = new UniversalDetector(null);


	    int nread;
	    while ((nread = fis.read(buf)) > 0 && !detector.isDone()) {
	      detector.handleData(buf, 0, nread);
	    }
	    detector.dataEnd();

	    String encoding = detector.getDetectedCharset();
	    if (encoding != null) {
	      Log.d("ConvertUtil",fileName+" detected encoding = " + encoding);
	    } else {
	    	   Log.d("ConvertUtil","No encoding detected = " + encoding);
	    }

	    detector.reset();
	    return encoding;
}
 
开发者ID:misgod,项目名称:palmbookreader,代码行数:25,代码来源:ConvertUtil.java

示例13: extractCharset

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * This method extracts the charset from the html source code.
 * If the charset is not specified, it is set to UTF-8 by default
 * @param is
 * @return
 */
public static String extractCharset(InputStream is) throws java.io.IOException {
    byte[] buf = new byte[4096];
    UniversalDetector detector = new UniversalDetector(null);
    int nread;
    while ((nread = is.read(buf)) > 0 && !detector.isDone()) {
        detector.handleData(buf, 0, nread);
    }
    detector.dataEnd();

    String encoding = detector.getDetectedCharset();
    if (encoding != null) {
        LOGGER.debug("Detected encoding = " + encoding);
    } else {
        LOGGER.debug("No encoding detected.");
    }

    detector.reset();
    if (encoding != null && CrawlUtils.isValidCharset(encoding)) {
        return encoding;
    } else {
        return DEFAULT_CHARSET;
    }
}
 
开发者ID:Tanaguru,项目名称:Tanaguru,代码行数:30,代码来源:CrawlUtils.java

示例14: detectEncoding

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
private static String detectEncoding(File file) throws IOException {
	byte[] buf = new byte[4096];

	FileInputStream  fis = new FileInputStream(file);
	UniversalDetector detector = new UniversalDetector(null);
	
    int nread;
    while ((nread = fis.read(buf)) > 0 && !detector.isDone()) detector.handleData(buf, 0, nread);

    Util.closeStream(fis);
    
    detector.dataEnd();
    String encoding = detector.getDetectedCharset();
    
    if (encoding == null) encoding = DEFAULT_ENCODING;
	return encoding;
}
 
开发者ID:chms,项目名称:jdotxt,代码行数:18,代码来源:TaskIo.java

示例15: getFileEncoding

import org.mozilla.universalchardet.UniversalDetector; //导入方法依赖的package包/类
/**
 * 获取文件编码
 * @author eko.zhan at Jul 3, 2017 1:54:50 PM
 * @param file
 * @return
 * @throws IOException 
 */
public static String getFileEncoding(File file) throws IOException{
	UniversalDetector detector = new UniversalDetector(null);
	byte[] bytes = FileUtils.readFileToByteArray(file);
	detector.handleData(bytes, 0, bytes.length);
	detector.dataEnd();
	return detector.getDetectedCharset();
}
 
开发者ID:ekoz,项目名称:kbase-doc,代码行数:15,代码来源:HtmlUtils.java


注:本文中的org.mozilla.universalchardet.UniversalDetector.getDetectedCharset方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。