本文整理汇总了Java中org.apache.hadoop.mapred.Merger.Segment.getLength方法的典型用法代码示例。如果您正苦于以下问题:Java Segment.getLength方法的具体用法?Java Segment.getLength怎么用?Java Segment.getLength使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.Merger.Segment
的用法示例。
在下文中一共展示了Segment.getLength方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: compare
import org.apache.hadoop.mapred.Merger.Segment; //导入方法依赖的package包/类
@Override
public int compare(Segment<K, V> seg1, Segment<K, V> seg2) {
if(seg1.getLength() < seg2.getLength()) {
return -1;
}
else if(seg1.getLength() > seg2.getLength()) {
return 1;
}
return SharedFsPlugins.getSegmentPath(seg1)
.compareTo(SharedFsPlugins.getSegmentPath(seg2));
}
示例2: finish
import org.apache.hadoop.mapred.Merger.Segment; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public RawKeyValueIterator finish() throws Throwable {
// merge config params
Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();
// Wait for on-going merges to complete
merger.close();
LOG.info("finalMerge called with " + segmentsToBeMerged.size() + " on-disk map-outputs");
List<Segment<K, V>> segments = new ArrayList<Segment<K, V>>();
long onDiskBytes = 0;
for (Segment<K, V> segment : segmentsToBeMerged) {
long fileLength = segment.getLength();
onDiskBytes += fileLength;
LOG.debug("Disk file: " + segment + " Length is " + fileLength);
segments.add(segment);
}
segmentsToBeMerged.clear();
LOG.info("Merging " + segmentsToBeMerged.size() + " files, " + onDiskBytes + " bytes from disk");
Collections.sort(segments, new Comparator<Segment<K, V>>() {
public int compare(Segment<K, V> o1, Segment<K, V> o2) {
if (o1.getLength() == o2.getLength()) {
return 0;
}
return o1.getLength() < o2.getLength() ? -1 : 1;
}
});
return Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, segments.size(), mergeTempDir,
comparator, reporter, spilledRecordsCounter, null, null);
}
示例3: clearSegmentList
import org.apache.hadoop.mapred.Merger.Segment; //导入方法依赖的package包/类
private void clearSegmentList() throws IOException {
for (Segment<K,V> segment: segmentList) {
long len = segment.getLength();
segment.close();
if (segment.inMemory()) {
memCache.unreserve(len);
}
}
segmentList.clear();
}
示例4: merge
import org.apache.hadoop.mapred.Merger.Segment; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public void merge(List<Segment<K,V>> segments) throws IOException {
// sanity check
if (segments == null || segments.isEmpty()) {
LOG.info("No ondisk files to merge...");
return;
}
Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();
long approxOutputSize = 0;
int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512);
LOG.info("OnDiskMerger: We have " + segments.size()
+ " map outputs on disk. Triggering merge...");
// 1. Prepare the list of files to be merged.
for (Segment<K,V> segment : segments) {
approxOutputSize += segment.getLength();
}
// add the checksum length
approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
// 2. Start the on-disk merge process
Path outputPath = new Path(reduceDir, "file-" + (numPasses++)).suffix(Task.MERGED_OUTPUT_PREFIX);
Writer<K, V> writer = new Writer<K, V>(jobConf, lustrefs.create(outputPath),
(Class<K>) jobConf.getMapOutputKeyClass(),
(Class<V>) jobConf.getMapOutputValueClass(),
codec, null, true);
RawKeyValueIterator iter = null;
try {
iter = Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, ioSortFactor, mergeTempDir,
comparator, reporter, spilledRecordsCounter, mergedMapOutputsCounter, null);
Merger.writeFile(iter, writer, reporter, jobConf);
writer.close();
} catch (IOException e) {
lustrefs.delete(outputPath, true);
throw e;
}
addSegmentToMerge(new Segment<K, V>(jobConf, lustrefs, outputPath, codec, false, null));
LOG.info(reduceId + " Finished merging " + segments.size()
+ " map output files on disk of total-size " + approxOutputSize + "."
+ " Local output file is " + outputPath + " of size "
+ lustrefs.getFileStatus(outputPath).getLen());
}