当前位置: 首页>>代码示例>>Java>>正文


Java CompressionCodec.createInputStream方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.compress.CompressionCodec.createInputStream方法的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodec.createInputStream方法的具体用法?Java CompressionCodec.createInputStream怎么用?Java CompressionCodec.createInputStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.compress.CompressionCodec的用法示例。


在下文中一共展示了CompressionCodec.createInputStream方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: Reader

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
/**
 * Construct an IFile Reader.
 * 
 * @param conf Configuration File 
 * @param in   The input stream
 * @param length Length of the data in the stream, including the checksum
 *               bytes.
 * @param codec codec
 * @param readsCounter Counter for records read from disk
 * @throws IOException
 */
public Reader(Configuration conf, FSDataInputStream in, long length, 
              CompressionCodec codec,
              Counters.Counter readsCounter) throws IOException {
  readRecordsCounter = readsCounter;
  checksumIn = new IFileInputStream(in,length, conf);
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
    if (decompressor != null) {
      this.in = codec.createInputStream(checksumIn, decompressor);
    } else {
      LOG.warn("Could not obtain decompressor from CodecPool");
      this.in = checksumIn;
    }
  } else {
    this.in = checksumIn;
  }
  this.dataIn = new DataInputStream(this.in);
  this.fileLength = length;
  
  if (conf != null) {
    bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:IFile.java

示例2: PossiblyDecompressedInputStream

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
    throws IOException {
  CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
  CompressionCodec inputCodec = codecs.getCodec(inputPath);

  FileSystem ifs = inputPath.getFileSystem(conf);
  FSDataInputStream fileIn = ifs.open(inputPath);

  if (inputCodec == null) {
    decompressor = null;
    coreInputStream = fileIn;
  } else {
    decompressor = CodecPool.getDecompressor(inputCodec);
    coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:PossiblyDecompressedInputStream.java

示例3: SingleFastqRecordReader

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec        = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}
 
开发者ID:PAA-NCIC,项目名称:SparkSeq,代码行数:27,代码来源:SingleFastqInputFormat.java

示例4: decompress

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {

    Configuration conf = new Configuration();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(new Path(in));
    //Decompressing zip file.
    InputStream is = codec.createInputStream(fs.open(new Path(in)));
    OutputStream out = fs.create(new Path(outpath));
    //Write decompressed out
    IOUtils.copyBytes(is, out, conf);
    is.close();
    out.close();
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:14,代码来源:InterleaveMulti.java

示例5: decompress

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
  Configuration conf = new Configuration();
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  // the correct codec will be discovered by the extension of the file

  CompressionCodec codec = factory.getCodec(new Path(in));
  //Decompressing zip file.
  InputStream is = codec.createInputStream(fs.open(new Path(in)));
  OutputStream out = fs.create(new Path(outpath));
  //Write decompressed out
  IOUtils.copyBytes(is, out, conf);
  is.close();
  out.close();
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:15,代码来源:Decompress.java

示例6: decompress

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private static FileStatus decompress(FileSystem fs, String in, String outpath) throws IOException {
   Configuration conf = new Configuration();
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  CompressionCodec codec = factory.getCodec(new Path(in));
  //Decompressing zip file.
  InputStream is = codec.createInputStream(fs.open(new Path(in)));
  OutputStream out = fs.create(new Path(outpath));
  //Write decompressed out
  IOUtils.copyBytes(is, out, conf);
  is.close();
  out.close();
  return fs.getFileStatus(new Path(outpath));
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:15,代码来源:DecompressInterleave.java

示例7: openPossiblyCompressedStream

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream openPossiblyCompressedStream(Path path) throws IOException {
  CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext.
  if (codec != null) {
    return codec.createInputStream(open(path));
  } else {
    return open(path);
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:9,代码来源:DrillFileSystem.java

示例8: verifyCompressedFile

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
private void verifyCompressedFile(Path f, int expectedNumLines)
    throws IOException {
  Configuration conf = new Configuration();
  if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
    conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
  }
  FileSystem fs = FileSystem.get(conf);
  InputStream is = fs.open(f);
  CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
  CompressionCodec codec = ccf.getCodec(f);
  LOG.info("gzip check codec is " + codec);
  Decompressor decompressor = CodecPool.getDecompressor(codec);
  if (null == decompressor) {
    LOG.info("Verifying gzip sanity with null decompressor");
  } else {
    LOG.info("Verifying gzip sanity with decompressor: "
        + decompressor.toString());
  }
  is = codec.createInputStream(is, decompressor);
  BufferedReader r = new BufferedReader(new InputStreamReader(is));
  int numLines = 0;
  while (true) {
    String ln = r.readLine();
    if (ln == null) {
      break;
    }
    numLines++;
  }

  r.close();
  assertEquals("Did not read back correct number of lines",
      expectedNumLines, numLines);
  LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:35,代码来源:TestExport.java

示例9: wrapInputStreamForCompression

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public static InputStream wrapInputStreamForCompression(
    Configuration conf, String codec, InputStream in) throws IOException {
  if (codec.isEmpty())
    return in;

  FSImageCompression compression = FSImageCompression.createCompression(
      conf, codec);
  CompressionCodec imageCodec = compression.getImageCodec();
  return imageCodec.createInputStream(in);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:11,代码来源:FSImageUtil.java

示例10: initialize

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit, TaskAttemptContext context)  {
    try {
        FileSplit split = (FileSplit)genericSplit;
        Configuration job = context.getConfiguration();
        this.maxLineLength = job.getInt("mapreduce.input.linerecordreader.line.maxlength", 2147483647);
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(job);
        this.fileIn = fs.open(file);
        CompressionCodec codec = (new CompressionCodecFactory(job)).getCodec(file);
        if(null != codec) {
            this.isCompressedInput = true;
            this.decompressor = CodecPool.getDecompressor(codec);
            if(codec instanceof SplittableCompressionCodec) {
                SplitCompressionInputStream cIn = ((SplittableCompressionCodec)codec).createInputStream(this.fileIn, this.decompressor, this.start, this.end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
                this.in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
                this.start = cIn.getAdjustedStart();
                this.end = cIn.getAdjustedEnd();
                this.filePosition = cIn;
            } else {
                this.in = new SplitLineReader(codec.createInputStream(this.fileIn, this.decompressor), job, this.recordDelimiterBytes);
                this.filePosition = this.fileIn;
            }
        } else {
            this.fileIn.seek(this.start);
            this.in = new SplitLineReader(this.fileIn, job, this.recordDelimiterBytes);
            this.filePosition = this.fileIn;
        }

        if(this.start != 0L) {
            this.start += (long)this.in.readLine(new Text(), 0, this.maxBytesToConsume(this.start));
        }

        this.pos = this.start;
    }catch(Exception ex){
        LOG.warn("Exception occurred during initialization {}", ex, ex);
    }

}
 
开发者ID:Comcast,项目名称:spark-util,代码行数:41,代码来源:ErrorHandlingLineRecordReader.java

示例11: openPossiblyCompressedStream

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream openPossiblyCompressedStream(Path path) throws IOException {
  try {
    CompressionCodec codec = codecFactory.getCodec(path); // infers from file ext.
    if (codec != null) {
      return codec.createInputStream(open(path));
    } else {
      return open(path);
    }
  } catch(FSError e) {
    throw propagateFSError(e);
  }
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:13,代码来源:FileSystemWrapper.java

示例12: createDecompressionStream

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public InputStream createDecompressionStream(InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  CompressionCodec codec = getCodec(conf);
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    ((Configurable) codec).getConf().setInt("io.file.buffer.size", downStreamBufferSize);
  }
  CompressionInputStream cis = codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;

}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:13,代码来源:Compression.java

示例13: getInputStream

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
@Override
protected InputStream getInputStream(PathData item) throws IOException {
  FSDataInputStream i = (FSDataInputStream)super.getInputStream(item);

  // Handle 0 and 1-byte files
  short leadBytes;
  try {
    leadBytes = i.readShort();
  } catch (EOFException e) {
    i.seek(0);
    return i;
  }

  // Check type of stream first
  switch(leadBytes) {
    case 0x1f8b: { // RFC 1952
      // Must be gzip
      i.seek(0);
      return new GZIPInputStream(i);
    }
    case 0x5345: { // 'S' 'E'
      // Might be a SequenceFile
      if (i.readByte() == 'Q') {
        i.close();
        return new TextRecordInputStream(item.stat);
      }
    }
    default: {
      // Check the type of compression instead, depending on Codec class's
      // own detection methods, based on the provided path.
      CompressionCodecFactory cf = new CompressionCodecFactory(getConf());
      CompressionCodec codec = cf.getCodec(item.path);
      if (codec != null) {
        i.seek(0);
        return codec.createInputStream(i);
      }
      break;
    }
    case 0x4f62: { // 'O' 'b'
      if (i.readByte() == 'j') {
        i.close();
        return new AvroFileInputStream(item.stat);
      }
      break;
    }
  }

  // File is non-compressed, or not a file container we know.
  i.seek(0);
  return i;
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:52,代码来源:Display.java

示例14: initialize

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(Configuration job, long splitStart, long splitLength,
                       Path file) throws IOException {
  start = splitStart;
  end = start + splitLength;
  long partialRecordLength = start % recordLength;
  long numBytesToSkip = 0;
  if (partialRecordLength != 0) {
    numBytesToSkip = recordLength - partialRecordLength;
  }

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);

  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null != codec) {
    isCompressedInput = true;	
    decompressor = CodecPool.getDecompressor(codec);
    CompressionInputStream cIn
        = codec.createInputStream(fileIn, decompressor);
    filePosition = cIn;
    inputStream = cIn;
    numRecordsRemainingInSplit = Long.MAX_VALUE;
    LOG.info(
        "Compressed input; cannot compute number of records in the split");
  } else {
    fileIn.seek(start);
    filePosition = fileIn;
    inputStream = fileIn;
    long splitSize = end - start - numBytesToSkip;
    numRecordsRemainingInSplit = (splitSize + recordLength - 1)/recordLength;
    if (numRecordsRemainingInSplit < 0) {
      numRecordsRemainingInSplit = 0;
    }
    LOG.info("Expecting " + numRecordsRemainingInSplit
        + " records each with a length of " + recordLength
        + " bytes in the split with an effective size of "
        + splitSize + " bytes");
  }
  if (numBytesToSkip != 0) {
    start += inputStream.skip(numBytesToSkip);
  }
  this.pos = start;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:45,代码来源:FixedLengthRecordReader.java

示例15: initialize

import org.apache.hadoop.io.compress.CompressionCodec; //导入方法依赖的package包/类
public void initialize(InputSplit genericSplit,
                       TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();

  // open the file and seek to the start of the split
  final FileSystem fs = file.getFileSystem(job);
  fileIn = fs.open(file);
  
  CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
  if (null!=codec) {
    isCompressedInput = true;	
    decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
      final SplitCompressionInputStream cIn =
        ((SplittableCompressionCodec)codec).createInputStream(
          fileIn, decompressor, start, end,
          SplittableCompressionCodec.READ_MODE.BYBLOCK);
      in = new CompressedSplitLineReader(cIn, job,
          this.recordDelimiterBytes);
      start = cIn.getAdjustedStart();
      end = cIn.getAdjustedEnd();
      filePosition = cIn;
    } else {
      in = new SplitLineReader(codec.createInputStream(fileIn,
          decompressor), job, this.recordDelimiterBytes);
      filePosition = fileIn;
    }
  } else {
    fileIn.seek(start);
    in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
    filePosition = fileIn;
  }
  // If this is not the first split, we always throw away first record
  // because we always (except the last split) read one extra line in
  // next() method.
  if (start != 0) {
    start += in.readLine(new Text(), 0, maxBytesToConsume(start));
  }
  this.pos = start;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:46,代码来源:LineRecordReader.java


注:本文中的org.apache.hadoop.io.compress.CompressionCodec.createInputStream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。