本文整理汇总了Java中org.apache.hadoop.io.compress.CompressionCodec.createInputStream方法的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodec.createInputStream方法的具体用法?Java CompressionCodec.createInputStream怎么用?Java CompressionCodec.createInputStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.compress.CompressionCodec
的用法示例。
在下文中一共展示了CompressionCodec.createInputStream方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: Reader
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
/**
* Construct an IFile Reader.
*
* @param conf Configuration File
* @param in The input stream
* @param length Length of the data in the stream, including the checksum
* bytes.
* @param codec codec
* @param readsCounter Counter for records read from disk
* @throws IOException
*/
public Reader(Configuration conf, FSDataInputStream in, long length,
CompressionCodec codec,
Counters.Counter readsCounter) throws IOException {
readRecordsCounter = readsCounter;
checksumIn = new IFileInputStream(in,length, conf);
if (codec != null) {
decompressor = CodecPool.getDecompressor(codec);
if (decompressor != null) {
this.in = codec.createInputStream(checksumIn, decompressor);
} else {
LOG.warn("Could not obtain decompressor from CodecPool");
this.in = checksumIn;
}
} else {
this.in = checksumIn;
}
this.dataIn = new DataInputStream(this.in);
this.fileLength = length;
if (conf != null) {
bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
}
}
示例2: PossiblyDecompressedInputStream
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
throws IOException {
CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
CompressionCodec inputCodec = codecs.getCodec(inputPath);
FileSystem ifs = inputPath.getFileSystem(conf);
FSDataInputStream fileIn = ifs.open(inputPath);
if (inputCodec == null) {
decompressor = null;
coreInputStream = fileIn;
} else {
decompressor = CodecPool.getDecompressor(inputCodec);
coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
}
}
示例3: SingleFastqRecordReader
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
file = split.getPath();
start = split.getStart();
end = start + split.getLength();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(file);
if (codec == null) { // no codec. Uncompressed file.
positionAtFirstRecord(fileIn);
inputStream = fileIn;
} else {
// compressed file
if (start != 0) {
throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
}
inputStream = codec.createInputStream(fileIn);
end = Long.MAX_VALUE; // read until the end of the file
}
lineReader = new LineReader(inputStream);
}
示例4: decompress
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
}
示例5: decompress
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
// the correct codec will be discovered by the extension of the file
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
}
示例6: decompress
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static FileStatus decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
return fs.getFileStatus(new Path(outpath));
}
示例7: openPossiblyCompressedStream
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream openPossiblyCompressedStream(Path path) throws IOException {
CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext.
if (codec != null) {
return codec.createInputStream(open(path));
} else {
return open(path);
}
}
示例8: verifyCompressedFile
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private void verifyCompressedFile(Path f, int expectedNumLines)
throws IOException {
Configuration conf = new Configuration();
if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
}
FileSystem fs = FileSystem.get(conf);
InputStream is = fs.open(f);
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
LOG.info("gzip check codec is " + codec);
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (null == decompressor) {
LOG.info("Verifying gzip sanity with null decompressor");
} else {
LOG.info("Verifying gzip sanity with decompressor: "
+ decompressor.toString());
}
is = codec.createInputStream(is, decompressor);
BufferedReader r = new BufferedReader(new InputStreamReader(is));
int numLines = 0;
while (true) {
String ln = r.readLine();
if (ln == null) {
break;
}
numLines++;
}
r.close();
assertEquals("Did not read back correct number of lines",
expectedNumLines, numLines);
LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
}
示例9: wrapInputStreamForCompression
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public static InputStream wrapInputStreamForCompression(
Configuration conf, String codec, InputStream in) throws IOException {
if (codec.isEmpty())
return in;
FSImageCompression compression = FSImageCompression.createCompression(
conf, codec);
CompressionCodec imageCodec = compression.getImageCodec();
return imageCodec.createInputStream(in);
}
示例10: initialize
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit, TaskAttemptContext context) {
try {
FileSplit split = (FileSplit)genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
this.start = split.getStart();
this.end = this.start + split.getLength();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
this.fileIn = fs.open(file);
CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
if(null != codec) {
this.isCompressedInput = true;
this.decompressor = CodecPool.getDecompressor(codec);
if(codec instanceof SplittableCompressionCodec) {
SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
this.start = cIn.getAdjustedStart();
this.end = cIn.getAdjustedEnd();
this.filePosition = cIn;
} else {
this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
} else {
this.fileIn.seek(this.start);
this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
this.filePosition = this.fileIn;
}
if(this.start != 0L) {
this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
}
this.pos = this.start;
}catch(Exception ex){
LOG.warn("Exception occurred during initialization {}", ex, ex);
}
}
示例11: openPossiblyCompressedStream
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream openPossiblyCompressedStream(Path path) throws IOException {
try {
CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext.
if (codec != null) {
return codec.createInputStream(open(path));
} else {
return open(path);
}
} catch(FSError e) {
throw propagateFSError(e);
}
}
示例12: createDecompressionStream
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream createDecompressionStream(InputStream downStream, Decompressor decompressor,
int downStreamBufferSize) throws IOException {
CompressionCodec codec = getCodec(conf);
// Set the internal buffer size to read from down stream.
if (downStreamBufferSize > 0) {
((Configurable) codec).getConf().setInt("io.file.buffer.size", downStreamBufferSize);
}
CompressionInputStream cis = codec.createInputStream(downStream, decompressor);
BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
return bis2;
}
示例13: getInputStream
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
@Override
protected InputStream getInputStream(PathData item) throws IOException {
FSDataInputStream i = (FSDataInputStream)super.getInputStream(item);
// Handle 0 and 1-byte files
short leadBytes;
try {
leadBytes = i.readShort();
} catch (EOFException e) {
i.seek(0);
return i;
}
// Check type of stream first
switch(leadBytes) {
case 0x1f8b: { // RFC 1952
// Must be gzip
i.seek(0);
return new GZIPInputStream(i);
}
case 0x5345: { // 'S' 'E'
// Might be a SequenceFile
if (i.readByte() == 'Q') {
i.close();
return new TextRecordInputStream(item.stat);
}
}
default: {
// Check the type of compression instead, depending on Codec class's
// own detection methods, based on the provided path.
CompressionCodecFactory cf = new CompressionCodecFactory(getConf());
CompressionCodec codec = cf.getCodec(item.path);
if (codec != null) {
i.seek(0);
return codec.createInputStream(i);
}
break;
}
case 0x4f62: { // 'O' 'b'
if (i.readByte() == 'j') {
i.close();
return new AvroFileInputStream(item.stat);
}
break;
}
}
// File is non-compressed, or not a file container we know.
i.seek(0);
return i;
}
示例14: initialize
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(Configuration job, long splitStart, long splitLength,
Path file) throws IOException {
start = splitStart;
end = start + splitLength;
long partialRecordLength = start % recordLength;
long numBytesToSkip = 0;
if (partialRecordLength != 0) {
numBytesToSkip = recordLength - partialRecordLength;
}
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
fileIn = fs.open(file);
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
if (null != codec) {
isCompressedInput = true;
decompressor = CodecPool.getDecompressor(codec);
CompressionInputStream cIn
= codec.createInputStream(fileIn, decompressor);
filePosition = cIn;
inputStream = cIn;
numRecordsRemainingInSplit = Long.MAX_VALUE;
LOG.info(
"Compressed input; cannot compute number of records in the split");
} else {
fileIn.seek(start);
filePosition = fileIn;
inputStream = fileIn;
long splitSize = end - start - numBytesToSkip;
numRecordsRemainingInSplit = (splitSize + recordLength - 1)/recordLength;
if (numRecordsRemainingInSplit < 0) {
numRecordsRemainingInSplit = 0;
}
LOG.info("Expecting " + numRecordsRemainingInSplit
+ " records each with a length of " + recordLength
+ " bytes in the split with an effective size of "
+ splitSize + " bytes");
}
if (numBytesToSkip != 0) {
start += inputStream.skip(numBytesToSkip);
}
this.pos = start;
}
示例15: initialize
import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit,
TaskAttemptContext context) throws IOException {
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
// open the file and seek to the start of the split
final FileSystem fs = file.getFileSystem(job);
fileIn = fs.open(file);
CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
if (null!=codec) {
isCompressedInput = true;
decompressor = CodecPool.getDecompressor(codec);
if (codec instanceof SplittableCompressionCodec) {
final SplitCompressionInputStream cIn =
((SplittableCompressionCodec)codec).createInputStream(
fileIn, decompressor, start, end,
SplittableCompressionCodec.READ_MODE.BYBLOCK);
in = new CompressedSplitLineReader(cIn, job,
this.recordDelimiterBytes);
start = cIn.getAdjustedStart();
end = cIn.getAdjustedEnd();
filePosition = cIn;
} else {
in = new SplitLineReader(codec.createInputStream(fileIn,
decompressor), job, this.recordDelimiterBytes);
filePosition = fileIn;
}
} else {
fileIn.seek(start);
in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
filePosition = fileIn;
}
// If this is not the first split, we always throw away first record
// because we always (except the last split) read one extra line in
// next() method.
if (start != 0) {
start += in.readLine(new Text(), 0, maxBytesToConsume(start));
}
this.pos = start;
}