当前位置: 首页>>代码示例>>Java>>正文


Java CompressionCodecFactory.getCodec方法代码示例

本文整理汇总了Java中org.apache.hadoop.io.compress.CompressionCodecFactory.getCodec方法的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecFactory.getCodec方法的具体用法?Java CompressionCodecFactory.getCodec怎么用?Java CompressionCodecFactory.getCodec使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.io.compress.CompressionCodecFactory的用法示例。


在下文中一共展示了CompressionCodecFactory.getCodec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: PossiblyDecompressedInputStream

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
    throws IOException {
  CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
  CompressionCodec inputCodec = codecs.getCodec(inputPath);

  FileSystem ifs = inputPath.getFileSystem(conf);
  FSDataInputStream fileIn = ifs.open(inputPath);

  if (inputCodec == null) {
    decompressor = null;
    coreInputStream = fileIn;
  } else {
    decompressor = CodecPool.getDecompressor(inputCodec);
    coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:PossiblyDecompressedInputStream.java

示例2: SingleFastqRecordReader

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec        = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}
 
开发者ID:PAA-NCIC,项目名称:SparkSeq,代码行数:27,代码来源:SingleFastqInputFormat.java

示例3: getBufferedReader

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public static BufferedReader getBufferedReader(File file, MapredContext context)
        throws IOException {
    URI fileuri = file.toURI();
    Path path = new Path(fileuri);

    Configuration conf = context.getJobConf();
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(path);

    if (codec == null) {
        return new BufferedReader(new FileReader(file));
    } else {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        FileInputStream fis = new FileInputStream(file);
        CompressionInputStream cis = codec.createInputStream(fis, decompressor);
        BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
        return br;
    }
}
 
开发者ID:apache,项目名称:incubator-hivemall,代码行数:20,代码来源:HadoopUtils.java

示例4: getInputStream

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
@Override
public InputStream getInputStream() {
    if (inputstream == null) {
        try {
            inputstream = fs.open(path, 1000000);
            if (path.getName().endsWith(".lz4")) {
                inputstream = new LZ4FrameInputStream(inputstream);
                isCompressed = true;
            } else {
                CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(fs.getConf());
                CompressionCodec codec = compressionCodecs.getCodec(path);
                if (codec != null) {
                    inputstream = codec.createInputStream(inputstream);
                    isCompressed = true;
                }
            }
        } catch (IOException ex) {
            log.fatalexception(ex, "getInputStream()");
        }
    }
    return inputstream;
}
 
开发者ID:htools,项目名称:htools,代码行数:23,代码来源:HDFSIn.java

示例5: openCompressedFile

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
/**
 * This function opens a stream to read a compressed file. Stream is not
 * closed, the user has to close it when read is finished.
 *
 * @param filePath
 * @return
 */
public static InputStream openCompressedFile(Path filePath, Configuration conf) {
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  CompressionCodec codec = factory.getCodec(filePath);

  if (codec == null) {
    log.error("No codec found for file " + filePath.toString());
    return null;
  }

  try {
    FileSystem fs = filePath.getFileSystem(conf);
    Decompressor decompressor = codec.createDecompressor();
    return codec.createInputStream(fs.open(filePath), decompressor);
  } catch (Exception e) {
    log.error("Error opening compressed file: " + e.getMessage());
    e.printStackTrace();
  }
  return null;
}
 
开发者ID:lucidworks,项目名称:solr-hadoop-common,代码行数:27,代码来源:CompressionHelper.java

示例6: decompress

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {

    Configuration conf = new Configuration();
    CompressionCodecFactory factory = new CompressionCodecFactory(conf);
    CompressionCodec codec = factory.getCodec(new Path(in));
    //Decompressing zip file.
    InputStream is = codec.createInputStream(fs.open(new Path(in)));
    OutputStream out = fs.create(new Path(outpath));
    //Write decompressed out
    IOUtils.copyBytes(is, out, conf);
    is.close();
    out.close();
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:14,代码来源:InterleaveMulti.java

示例7: decompress

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
  Configuration conf = new Configuration();
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);
  // the correct codec will be discovered by the extension of the file

  CompressionCodec codec = factory.getCodec(new Path(in));
  //Decompressing zip file.
  InputStream is = codec.createInputStream(fs.open(new Path(in)));
  OutputStream out = fs.create(new Path(outpath));
  //Write decompressed out
  IOUtils.copyBytes(is, out, conf);
  is.close();
  out.close();
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:15,代码来源:Decompress.java

示例8: decompress

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static FileStatus decompress(FileSystem fs, String in, String outpath) throws IOException {
   Configuration conf = new Configuration();
  CompressionCodecFactory factory = new CompressionCodecFactory(conf);

  CompressionCodec codec = factory.getCodec(new Path(in));
  //Decompressing zip file.
  InputStream is = codec.createInputStream(fs.open(new Path(in)));
  OutputStream out = fs.create(new Path(outpath));
  //Write decompressed out
  IOUtils.copyBytes(is, out, conf);
  is.close();
  out.close();
  return fs.getFileStatus(new Path(outpath));
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:15,代码来源:DecompressInterleave.java

示例9: createTextFile

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
/**
 * Create a data file that gets exported to the db.
 * @param fileNum the number of the file (for multi-file export)
 * @param numRecords how many records to write to the file.
 * @param gzip is true if the file should be gzipped.
 */
protected void createTextFile(int fileNum, int numRecords, boolean gzip,
    ColumnGenerator... extraCols) throws IOException {
  int startId = fileNum * numRecords;

  String ext = ".txt";
  if (gzip) {
    ext = ext + ".gz";
  }
  Path tablePath = getTablePath();
  Path filePath = new Path(tablePath, "part" + fileNum + ext);

  Configuration conf = new Configuration();
  if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
    conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
  }
  FileSystem fs = FileSystem.get(conf);
  fs.mkdirs(tablePath);
  OutputStream os = fs.create(filePath);
  if (gzip) {
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(filePath);
    os = codec.createOutputStream(os);
  }
  BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
  for (int i = 0; i < numRecords; i++) {
    w.write(getRecordLine(startId + i, extraCols));
  }
  w.close();
  os.close();

  if (gzip) {
    verifyCompressedFile(filePath, numRecords);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:41,代码来源:TestExport.java

示例10: verifyCompressedFile

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private void verifyCompressedFile(Path f, int expectedNumLines)
    throws IOException {
  Configuration conf = new Configuration();
  if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
    conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
  }
  FileSystem fs = FileSystem.get(conf);
  InputStream is = fs.open(f);
  CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
  CompressionCodec codec = ccf.getCodec(f);
  LOG.info("gzip check codec is " + codec);
  Decompressor decompressor = CodecPool.getDecompressor(codec);
  if (null == decompressor) {
    LOG.info("Verifying gzip sanity with null decompressor");
  } else {
    LOG.info("Verifying gzip sanity with decompressor: "
        + decompressor.toString());
  }
  is = codec.createInputStream(is, decompressor);
  BufferedReader r = new BufferedReader(new InputStreamReader(is));
  int numLines = 0;
  while (true) {
    String ln = r.readLine();
    if (ln == null) {
      break;
    }
    numLines++;
  }

  r.close();
  assertEquals("Did not read back correct number of lines",
      expectedNumLines, numLines);
  LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:35,代码来源:TestExport.java

示例11: maybeUncompressedPath

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private LineReader maybeUncompressedPath(Path p)
    throws FileNotFoundException, IOException {
  CompressionCodecFactory codecs = new CompressionCodecFactory(getConf());
  inputCodec = codecs.getCodec(p);
  FileSystem fs = p.getFileSystem(getConf());
  FSDataInputStream fileIn = fs.open(p);

  if (inputCodec == null) {
    return new LineReader(fileIn, getConf());
  } else {
    inputDecompressor = CodecPool.getDecompressor(inputCodec);
    return new LineReader(inputCodec.createInputStream(fileIn,
        inputDecompressor), getConf());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:16,代码来源:HadoopLogsAnalyzer.java

示例12: isFileReadable

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
protected boolean isFileReadable(FileSystemWrapper fs, FileStatus status, CompressionCodecFactory codecFactory) throws IOException {
CompressionCodec codec = null;
  if (compressible) {
    // TODO: investigate if creating a new codec factory is expensive
    codec = codecFactory.getCodec(status.getPath());
  }
  String fileName = status.getPath().toString();
  String fileNameHacked = null;
  if (codec != null) {
    fileNameHacked = fileName.substring(0, fileName.lastIndexOf('.'));
  }

  // Check for a matching pattern for compressed and uncompressed file name
  for (Pattern p : patterns) {
    if (p.matcher(fileName).matches()) {
      return true;
    }
    if (fileNameHacked != null  &&  p.matcher(fileNameHacked).matches()) {
      return true;
    }
  }

  if (matcher.matches(fs, status)) {
    return true;
  }
  return false;
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:28,代码来源:BasicFormatMatcher.java

示例13: toReport

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public void toReport(BamQualityControlOptions options, FileSystem fs, Configuration conf, String sampleName) throws IOException {
	for(int i = 0; i < depths.length; i++) {
		Map<String, WrappedIntArray> sampleDepth = depths[i].laneDepth;
		for(String chrName : depths[i].laneDepth.keySet()) {
			StringBuffer cnvDepthFilePath = new StringBuffer();
			cnvDepthFilePath.append(options.getOutputPath());
			cnvDepthFilePath.append("/");
			cnvDepthFilePath.append("cnvDepth");
			cnvDepthFilePath.append("/");
			cnvDepthFilePath.append(sampleName);
			cnvDepthFilePath.append("-lane");
			cnvDepthFilePath.append(i);
			cnvDepthFilePath.append("/");
			cnvDepthFilePath.append(chrName);
			cnvDepthFilePath.append(".dep.gz");
			Path cnvDepthPath = new Path(cnvDepthFilePath.toString());
			FSDataOutputStream cnvDepthStream = fs.create(cnvDepthPath);
			CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
	        CompressionCodec codec = codecFactory.getCodec(cnvDepthPath);
	        CompressionOutputStream compressedOutput = codec.createOutputStream(cnvDepthStream);
	        //ChrLaneDepth laneChrDepths = depths[i].laneDepth.get(chrName);
	        //Map<Integer, Integer> depthLanePos = laneChrDepths.depth;
	        int[] depth = sampleDepth.get(chrName).getArray();
	        StringBuilder sb = new StringBuilder();
	        for(int j = 0; j < depth.length; j += 2) {
	        	sb.append(chrName);
	        	sb.append("\t");
				sb.append(depth[j] + 1);
				sb.append("\t");
				sb.append(depth[j + 1]);
				sb.append("\n");
			}
	        compressedOutput.write(sb.toString().getBytes());
	        compressedOutput.close();
	        cnvDepthStream.close();
		}
	}
}
 
开发者ID:BGI-flexlab,项目名称:SOAPgaea,代码行数:39,代码来源:CNVDepthReport.java

示例14: ImportRecordReader

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public ImportRecordReader(JobConf job, Path path) throws IOException {
  FileSystem fs = path.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(path);
  CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
  CompressionCodec codec = compressionCodecs.getCodec(path);
  if (null != codec) {
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    this.lineReader = new LineReader(codec.createInputStream(fileIn, decompressor), job);
  } else {
    this.lineReader = new LineReader(fileIn, job);
  }
}
 
开发者ID:awslabs,项目名称:emr-dynamodb-connector,代码行数:13,代码来源:ImportRecordReader.java

示例15: initialize

import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration job = context.getConfiguration();
  this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
  start = split.getStart();
  end = start + split.getLength();
  final Path file = split.getPath();
  compressionCodecs = new CompressionCodecFactory(job);
  final CompressionCodec codec = compressionCodecs.getCodec(file);
  
  // open the file and seek to the start of the split
  FileSystem fs = file.getFileSystem(job);
  FSDataInputStream fileIn = fs.open(split.getPath());
  boolean skipFirstLine = false;
  if (codec != null) {
    in = new LfLineReader(codec.createInputStream(fileIn), job);
    end = Long.MAX_VALUE;
  } else {
    if (start != 0) {
      skipFirstLine = true;
      --start;
      fileIn.seek(start);
    }
    in = new LfLineReader(fileIn, job);
  }
  if (skipFirstLine) { // skip first line and re-establish "start".
    start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
  }
  this.pos = start;
}
 
开发者ID:apache,项目名称:accumulo-wikisearch,代码行数:32,代码来源:LongLineRecordReader.java


注:本文中的org.apache.hadoop.io.compress.CompressionCodecFactory.getCodec方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。