本文整理汇总了Java中org.apache.tika.detect.AutoDetectReader类的典型用法代码示例。如果您正苦于以下问题:Java AutoDetectReader类的具体用法?Java AutoDetectReader怎么用?Java AutoDetectReader使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
AutoDetectReader类属于org.apache.tika.detect包,在下文中一共展示了AutoDetectReader类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: detectCharset
import org.apache.tika.detect.AutoDetectReader; //导入依赖的package包/类
private static Charset detectCharset(final Path path, final Metadata metadata) throws IOException {
final Charset charset;
// Try to parse the character set from the content-encoding.
String orig = metadata.get(Metadata.CONTENT_ENCODING);
// Try to detect the character set.
if (null != orig && Charset.isSupported(orig)) {
return Charset.forName(orig);
}
try (
final InputStream input = new BufferedInputStream(Files.newInputStream(path));
final AutoDetectReader detector = new AutoDetectReader(input, metadata)
) {
charset = detector.getCharset();
} catch (TikaException e) {
throw new IOException("Unable to detect charset.", e);
}
return charset;
}
示例2: detectContentTypeAndCharset
import org.apache.tika.detect.AutoDetectReader; //导入依赖的package包/类
public static String detectContentTypeAndCharset(String uri) throws FileNotFoundException, IOException, TikaException {
final Detector detector = config.getDetector();
final TikaInputStream inputStream = createInputStream(uri);
final Metadata metadata = new Metadata();
// Set the file name. This provides some level of type-hinting.
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, new File(uri).getName());
// Detect the content type.
String contentType = detector.detect(inputStream, metadata).toString();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
// Return the default content-type if undetermined.
if (contentType == null || contentType.isEmpty()) {
return MediaType.OCTET_STREAM.toString();
}
// Append the charset if the content-type was determined.
if (charset != null && !charset.isEmpty()) {
return contentType + "; charset=" + charset;
}
return contentType;
}
示例3: parse
import org.apache.tika.detect.AutoDetectReader; //导入依赖的package包/类
public void parse(InputStream stream, ContentHandler handler,
Metadata metadata, ParseContext context)
throws IOException, SAXException, TikaException {
//Only outputting the MIME type as metadata
metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);
// The following code was taken from the TXTParser
// Automatically detect the character encoding
AutoDetectReader reader =
new AutoDetectReader(new CloseShieldInputStream(stream), metadata);
try {
Charset charset = reader.getCharset();
MediaType type = new MediaType(MediaType.TEXT_PLAIN, charset);
// deprecated, see TIKA-431
metadata.set(Metadata.CONTENT_ENCODING, charset.name());
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
//text contents of the xhtml
xhtml.startElement("p");
char[] buffer = new char[4096];
int n = reader.read(buffer);
while (n != -1) {
xhtml.characters(buffer, 0, n);
n = reader.read(buffer);
}
xhtml.endElement("p");
xhtml.endDocument();
}
finally{
reader.close();
}
}
示例4: detectCharset
import org.apache.tika.detect.AutoDetectReader; //导入依赖的package包/类
public static String detectCharset(String uri, String contentType) throws FileNotFoundException, IOException, TikaException {
final Metadata metadata = new Metadata();
// Use metadata to provide type-hinting to the AutoDetectReader.
fillMetadata(metadata, contentType, uri);
final TikaInputStream inputStream = createInputStream(uri, metadata);
// Detect the character set.
final AutoDetectReader reader = new AutoDetectReader(inputStream, metadata);
String charset = reader.getCharset().toString();
inputStream.close();
return charset;
}