本文整理汇总了Java中org.apache.hadoop.io.compress.CompressionCodecFactory.getCodec方法的典型用法代码示例。如果您正苦于以下问题:Java CompressionCodecFactory.getCodec方法的具体用法?Java CompressionCodecFactory.getCodec怎么用?Java CompressionCodecFactory.getCodec使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.compress.CompressionCodecFactory
的用法示例。
在下文中一共展示了CompressionCodecFactory.getCodec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: PossiblyDecompressedInputStream
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public PossiblyDecompressedInputStream(Path inputPath, Configuration conf)
throws IOException {
CompressionCodecFactory codecs = new CompressionCodecFactory(conf);
CompressionCodec inputCodec = codecs.getCodec(inputPath);
FileSystem ifs = inputPath.getFileSystem(conf);
FSDataInputStream fileIn = ifs.open(inputPath);
if (inputCodec == null) {
decompressor = null;
coreInputStream = fileIn;
} else {
decompressor = CodecPool.getDecompressor(inputCodec);
coreInputStream = inputCodec.createInputStream(fileIn, decompressor);
}
}
示例2: SingleFastqRecordReader
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public SingleFastqRecordReader(Configuration conf, FileSplit split) throws IOException {
file = split.getPath();
start = split.getStart();
end = start + split.getLength();
FileSystem fs = file.getFileSystem(conf);
FSDataInputStream fileIn = fs.open(file);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(file);
if (codec == null) { // no codec. Uncompressed file.
positionAtFirstRecord(fileIn);
inputStream = fileIn;
} else {
// compressed file
if (start != 0) {
throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
}
inputStream = codec.createInputStream(fileIn);
end = Long.MAX_VALUE; // read until the end of the file
}
lineReader = new LineReader(inputStream);
}
示例3: getBufferedReader
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public static BufferedReader getBufferedReader(File file, MapredContext context)
throws IOException {
URI fileuri = file.toURI();
Path path = new Path(fileuri);
Configuration conf = context.getJobConf();
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(path);
if (codec == null) {
return new BufferedReader(new FileReader(file));
} else {
Decompressor decompressor = CodecPool.getDecompressor(codec);
FileInputStream fis = new FileInputStream(file);
CompressionInputStream cis = codec.createInputStream(fis, decompressor);
BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
return br;
}
}
示例4: getInputStream
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
@Override
public InputStream getInputStream() {
if (inputstream == null) {
try {
inputstream = fs.open(path, 1000000);
if (path.getName().endsWith(".lz4")) {
inputstream = new LZ4FrameInputStream(inputstream);
isCompressed = true;
} else {
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(fs.getConf());
CompressionCodec codec = compressionCodecs.getCodec(path);
if (codec != null) {
inputstream = codec.createInputStream(inputstream);
isCompressed = true;
}
}
} catch (IOException ex) {
log.fatalexception(ex, "getInputStream()");
}
}
return inputstream;
}
示例5: openCompressedFile
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
/**
* This function opens a stream to read a compressed file. Stream is not
* closed, the user has to close it when read is finished.
*
* @param filePath
* @return
*/
public static InputStream openCompressedFile(Path filePath, Configuration conf) {
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(filePath);
if (codec == null) {
log.error("No codec found for file " + filePath.toString());
return null;
}
try {
FileSystem fs = filePath.getFileSystem(conf);
Decompressor decompressor = codec.createDecompressor();
return codec.createInputStream(fs.open(filePath), decompressor);
} catch (Exception e) {
log.error("Error opening compressed file: " + e.getMessage());
e.printStackTrace();
}
return null;
}
示例6: decompress
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
}
示例7: decompress
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static void decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
// the correct codec will be discovered by the extension of the file
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
}
示例8: decompress
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private static FileStatus decompress(FileSystem fs, String in, String outpath) throws IOException {
Configuration conf = new Configuration();
CompressionCodecFactory factory = new CompressionCodecFactory(conf);
CompressionCodec codec = factory.getCodec(new Path(in));
//Decompressing zip file.
InputStream is = codec.createInputStream(fs.open(new Path(in)));
OutputStream out = fs.create(new Path(outpath));
//Write decompressed out
IOUtils.copyBytes(is, out, conf);
is.close();
out.close();
return fs.getFileStatus(new Path(outpath));
}
示例9: createTextFile
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
/**
* Create a data file that gets exported to the db.
* @param fileNum the number of the file (for multi-file export)
* @param numRecords how many records to write to the file.
* @param gzip is true if the file should be gzipped.
*/
protected void createTextFile(int fileNum, int numRecords, boolean gzip,
ColumnGenerator... extraCols) throws IOException {
int startId = fileNum * numRecords;
String ext = ".txt";
if (gzip) {
ext = ext + ".gz";
}
Path tablePath = getTablePath();
Path filePath = new Path(tablePath, "part" + fileNum + ext);
Configuration conf = new Configuration();
if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
}
FileSystem fs = FileSystem.get(conf);
fs.mkdirs(tablePath);
OutputStream os = fs.create(filePath);
if (gzip) {
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(filePath);
os = codec.createOutputStream(os);
}
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
for (int i = 0; i < numRecords; i++) {
w.write(getRecordLine(startId + i, extraCols));
}
w.close();
os.close();
if (gzip) {
verifyCompressedFile(filePath, numRecords);
}
}
示例10: verifyCompressedFile
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private void verifyCompressedFile(Path f, int expectedNumLines)
throws IOException {
Configuration conf = new Configuration();
if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
}
FileSystem fs = FileSystem.get(conf);
InputStream is = fs.open(f);
CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
CompressionCodec codec = ccf.getCodec(f);
LOG.info("gzip check codec is " + codec);
Decompressor decompressor = CodecPool.getDecompressor(codec);
if (null == decompressor) {
LOG.info("Verifying gzip sanity with null decompressor");
} else {
LOG.info("Verifying gzip sanity with decompressor: "
+ decompressor.toString());
}
is = codec.createInputStream(is, decompressor);
BufferedReader r = new BufferedReader(new InputStreamReader(is));
int numLines = 0;
while (true) {
String ln = r.readLine();
if (ln == null) {
break;
}
numLines++;
}
r.close();
assertEquals("Did not read back correct number of lines",
expectedNumLines, numLines);
LOG.info("gzip sanity check returned " + numLines + " lines; ok.");
}
示例11: maybeUncompressedPath
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
private LineReader maybeUncompressedPath(Path p)
throws FileNotFoundException, IOException {
CompressionCodecFactory codecs = new CompressionCodecFactory(getConf());
inputCodec = codecs.getCodec(p);
FileSystem fs = p.getFileSystem(getConf());
FSDataInputStream fileIn = fs.open(p);
if (inputCodec == null) {
return new LineReader(fileIn, getConf());
} else {
inputDecompressor = CodecPool.getDecompressor(inputCodec);
return new LineReader(inputCodec.createInputStream(fileIn,
inputDecompressor), getConf());
}
}
示例12: isFileReadable
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
protected boolean isFileReadable(FileSystemWrapper fs, FileStatus status, CompressionCodecFactory codecFactory) throws IOException {
CompressionCodec codec = null;
if (compressible) {
// TODO: investigate if creating a new codec factory is expensive
codec = codecFactory.getCodec(status.getPath());
}
String fileName = status.getPath().toString();
String fileNameHacked = null;
if (codec != null) {
fileNameHacked = fileName.substring(0, fileName.lastIndexOf('.'));
}
// Check for a matching pattern for compressed and uncompressed file name
for (Pattern p : patterns) {
if (p.matcher(fileName).matches()) {
return true;
}
if (fileNameHacked != null && p.matcher(fileNameHacked).matches()) {
return true;
}
}
if (matcher.matches(fs, status)) {
return true;
}
return false;
}
示例13: toReport
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public void toReport(BamQualityControlOptions options, FileSystem fs, Configuration conf, String sampleName) throws IOException {
for(int i = 0; i < depths.length; i++) {
Map<String, WrappedIntArray> sampleDepth = depths[i].laneDepth;
for(String chrName : depths[i].laneDepth.keySet()) {
StringBuffer cnvDepthFilePath = new StringBuffer();
cnvDepthFilePath.append(options.getOutputPath());
cnvDepthFilePath.append("/");
cnvDepthFilePath.append("cnvDepth");
cnvDepthFilePath.append("/");
cnvDepthFilePath.append(sampleName);
cnvDepthFilePath.append("-lane");
cnvDepthFilePath.append(i);
cnvDepthFilePath.append("/");
cnvDepthFilePath.append(chrName);
cnvDepthFilePath.append(".dep.gz");
Path cnvDepthPath = new Path(cnvDepthFilePath.toString());
FSDataOutputStream cnvDepthStream = fs.create(cnvDepthPath);
CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
CompressionCodec codec = codecFactory.getCodec(cnvDepthPath);
CompressionOutputStream compressedOutput = codec.createOutputStream(cnvDepthStream);
//ChrLaneDepth laneChrDepths = depths[i].laneDepth.get(chrName);
//Map<Integer, Integer> depthLanePos = laneChrDepths.depth;
int[] depth = sampleDepth.get(chrName).getArray();
StringBuilder sb = new StringBuilder();
for(int j = 0; j < depth.length; j += 2) {
sb.append(chrName);
sb.append("\t");
sb.append(depth[j] + 1);
sb.append("\t");
sb.append(depth[j + 1]);
sb.append("\n");
}
compressedOutput.write(sb.toString().getBytes());
compressedOutput.close();
cnvDepthStream.close();
}
}
}
示例14: ImportRecordReader
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
public ImportRecordReader(JobConf job, Path path) throws IOException {
FileSystem fs = path.getFileSystem(job);
FSDataInputStream fileIn = fs.open(path);
CompressionCodecFactory compressionCodecs = new CompressionCodecFactory(job);
CompressionCodec codec = compressionCodecs.getCodec(path);
if (null != codec) {
Decompressor decompressor = CodecPool.getDecompressor(codec);
this.lineReader = new LineReader(codec.createInputStream(fileIn, decompressor), job);
} else {
this.lineReader = new LineReader(fileIn, job);
}
}
示例15: initialize
import org.apache.hadoop.io.compress.CompressionCodecFactory; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
FileSplit split = (FileSplit) genericSplit;
Configuration job = context.getConfiguration();
this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
start = split.getStart();
end = start + split.getLength();
final Path file = split.getPath();
compressionCodecs = new CompressionCodecFactory(job);
final CompressionCodec codec = compressionCodecs.getCodec(file);
// open the file and seek to the start of the split
FileSystem fs = file.getFileSystem(job);
FSDataInputStream fileIn = fs.open(split.getPath());
boolean skipFirstLine = false;
if (codec != null) {
in = new LfLineReader(codec.createInputStream(fileIn), job);
end = Long.MAX_VALUE;
} else {
if (start != 0) {
skipFirstLine = true;
--start;
fileIn.seek(start);
}
in = new LfLineReader(fileIn, job);
}
if (skipFirstLine) { // skip first line and re-establish "start".
start += in.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
}
this.pos = start;
}