本文整理汇总了Java中org.apache.hadoop.fs.ChecksumFileSystem.getChecksumLength方法的典型用法代码示例。如果您正苦于以下问题:Java ChecksumFileSystem.getChecksumLength方法的具体用法?Java ChecksumFileSystem.getChecksumLength怎么用?Java ChecksumFileSystem.getChecksumLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.ChecksumFileSystem
的用法示例。
在下文中一共展示了ChecksumFileSystem.getChecksumLength方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public void merge(List<Segment<K,V>> segments) throws IOException {
// sanity check
if (segments == null || segments.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();
long approxOutputSize = 0;
int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + segments.size()
+ " map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (Segment<K,V> segment : segments) {
approxOutputSize += segment.getLength();
}
// add the checksum length
approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath = new Path(reduceDir, "file-" + (numPasses++)).suffix(Task.MERGED_OUTPUT_PREFIX);
Writer<K, V> writer = new Writer<K, V>(jobConf, lustrefs.create(outputPath),
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, null, true);
RawKeyValueIterator iter = null;
try {
iter = Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, ioSortFactor, mergeTempDir,
comparator, reporter, spilledRecordsCounter, mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
} catch (IOException e) {
lustrefs.delete(outputPath, true);
throw e;
}
addSegmentToMerge(new Segment<K, V>(jobConf, lustrefs, outputPath, codec, false, null));
LOG.info(reduceId + " Finished merging " + segments.size()
+ " map output files on disk of total-size " + approxOutputSize + "."
+ " Local output file is " + outputPath + " of size "
+ lustrefs.getFileStatus(outputPath).getLen());
}
示例2: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
// sanity check
if (inputs == null || inputs.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
long approxOutputSize = 0;
int bytesPerSum =
jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + inputs.size() +
" map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (CompressAwarePath file : inputs) {
approxOutputSize += localFS.getFileStatus(file).getLen();
}
// add the checksum length
approxOutputSize +=
ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath =
localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
FSDataOutputStream out = CryptoUtils.wrapIfNecessary(jobConf, rfs.create(outputPath));
Writer<K, V> writer = new Writer<K, V>(jobConf, out,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(), codec, null, true);
RawKeyValueIterator iter = null;
CompressAwarePath compressAwarePath;
Path tmpDir = new Path(reduceId.toString());
try {
iter = Merger.merge(jobConf, rfs,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, inputs.toArray(new Path[inputs.size()]),
true, ioSortFactor, tmpDir,
(RawComparator<K>) jobConf.getOutputKeyComparator(),
reporter, spilledRecordsCounter, null,
mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
compressAwarePath = new CompressAwarePath(outputPath,
writer.getRawLength(), writer.getCompressedLength());
} catch (IOException e) {
localFS.delete(outputPath, true);
throw e;
}
closeOnDiskFile(compressAwarePath);
LOG.info(reduceId +
" Finished merging " + inputs.size() +
" map output files on disk of total-size " +
approxOutputSize + "." +
" Local output file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
}
示例3: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
// sanity check
if (inputs == null || inputs.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
long approxOutputSize = 0;
int bytesPerSum =
jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + inputs.size() +
" map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (CompressAwarePath file : inputs) {
approxOutputSize += localFS.getFileStatus(file).getLen();
}
// add the checksum length
approxOutputSize +=
ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath =
localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
Writer<K,V> writer =
new Writer<K,V>(jobConf, rfs, outputPath,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, null);
RawKeyValueIterator iter = null;
CompressAwarePath compressAwarePath;
Path tmpDir = new Path(reduceId.toString());
try {
iter = Merger.merge(jobConf, rfs,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, inputs.toArray(new Path[inputs.size()]),
true, ioSortFactor, tmpDir,
(RawComparator<K>) jobConf.getOutputKeyComparator(),
reporter, spilledRecordsCounter, null,
mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
compressAwarePath = new CompressAwarePath(outputPath,
writer.getRawLength(), writer.getCompressedLength());
} catch (IOException e) {
localFS.delete(outputPath, true);
throw e;
}
closeOnDiskFile(compressAwarePath);
LOG.info(reduceId +
" Finished merging " + inputs.size() +
" map output files on disk of total-size " +
approxOutputSize + "." +
" Local output file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
}
示例4: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@Override
public void merge(List<CompressAwarePath> inputs) throws IOException {
// sanity check
if (inputs == null || inputs.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
long approxOutputSize = 0;
int bytesPerSum =
jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + inputs.size() +
" map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (CompressAwarePath file : inputs) {
approxOutputSize += file.getCompressedSize();
}
// add the checksum length
approxOutputSize +=
ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath =
localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
Writer<K,V> writer =
new Writer<K,V>(jobConf, rfs, outputPath,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, null);
RawKeyValueIterator iter = null;
CompressAwarePath compressAwarePath;
Path tmpDir = new Path(reduceId.toString());
try {
iter = Merger.merge(jobConf, rfs,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, inputs.toArray(new Path[inputs.size()]),
true, ioSortFactor, tmpDir,
(RawComparator<K>) jobConf.getOutputKeyComparator(),
reporter, spilledRecordsCounter, null,
mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
compressAwarePath = new CompressAwarePath(outputPath,
writer.getRawLength(), writer.getCompressedLength());
} catch (IOException e) {
localFS.delete(outputPath, true);
throw e;
}
closeOnDiskFile(compressAwarePath);
LOG.info(reduceId +
" Finished merging " + inputs.size() +
" map output files on disk of total-size " +
approxOutputSize + "." +
" Local output file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
}
示例5: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@Override
public void merge(List<Path> inputs) throws IOException {
// sanity check
if (inputs == null || inputs.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
numDiskToDiskMerges.increment(1);
long approxOutputSize = 0;
int bytesPerSum =
conf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + inputs.size() +
" map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (Path file : inputs) {
approxOutputSize += localFS.getFileStatus(file).getLen();
}
// add the checksum length
approxOutputSize +=
ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath =
localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
approxOutputSize, conf).suffix(Constants.MERGED_OUTPUT_PREFIX);
Writer writer =
new Writer(conf, rfs, outputPath,
(Class)ConfigUtils.getIntermediateInputKeyClass(conf),
(Class)ConfigUtils.getIntermediateInputValueClass(conf),
codec, null, null);
TezRawKeyValueIterator iter = null;
Path tmpDir = new Path(inputContext.getUniqueIdentifier());
try {
iter = TezMerger.merge(conf, rfs,
(Class)ConfigUtils.getIntermediateInputKeyClass(conf),
(Class)ConfigUtils.getIntermediateInputValueClass(conf),
codec, ifileReadAhead, ifileReadAheadLength, ifileBufferSize,
inputs.toArray(new Path[inputs.size()]), true, ioSortFactor, tmpDir,
(RawComparator)ConfigUtils.getIntermediateInputKeyComparator(conf),
nullProgressable, spilledRecordsCounter, null,
mergedMapOutputsCounter, null);
// TODO Maybe differentiate between data written because of Merges and
// the finalMerge (i.e. final mem available may be different from
// initial merge mem)
TezMerger.writeFile(iter, writer, nullProgressable, TezJobConfig.TEZ_RUNTIME_RECORDS_BEFORE_PROGRESS_DEFAULT);
writer.close();
additionalBytesWritten.increment(writer.getCompressedLength());
} catch (IOException e) {
localFS.delete(outputPath, true);
throw e;
}
closeOnDiskFile(outputPath);
LOG.info(inputContext.getUniqueIdentifier() +
" Finished merging " + inputs.size() +
" map output files on disk of total-size " +
approxOutputSize + "." +
" Local output file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
}
示例6: merge
import org.apache.hadoop.fs.ChecksumFileSystem; //导入方法依赖的package包/类
@Override
public void merge(List<Path> inputs) throws IOException {
// sanity check
if (inputs == null || inputs.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
long approxOutputSize = 0;
int bytesPerSum =
jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + inputs.size() +
" map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (Path file : inputs) {
approxOutputSize += localFS.getFileStatus(file).getLen();
}
// add the checksum length
approxOutputSize +=
ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath =
localDirAllocator.getLocalPathForWrite(inputs.get(0).toString(),
approxOutputSize, jobConf).suffix(Task.MERGED_OUTPUT_PREFIX);
Writer<K,V> writer =
new Writer<K,V>(jobConf, rfs, outputPath,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, null);
RawKeyValueIterator iter = null;
Path tmpDir = new Path(reduceId.toString());
try {
iter = Merger.merge(jobConf, rfs,
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, inputs.toArray(new Path[inputs.size()]),
true, ioSortFactor, tmpDir,
(RawComparator<K>) jobConf.getOutputKeyComparator(),
reporter, spilledRecordsCounter, null,
mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
} catch (IOException e) {
localFS.delete(outputPath, true);
throw e;
}
closeOnDiskFile(outputPath);
LOG.info(reduceId +
" Finished merging " + inputs.size() +
" map output files on disk of total-size " +
approxOutputSize + "." +
" Local output file is " + outputPath + " of size " +
localFS.getFileStatus(outputPath).getLen());
}