当前位置: 首页>>代码示例>>Java>>正文


Java Merger.merge方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.Merger.merge方法的典型用法代码示例。如果您正苦于以下问题:Java Merger.merge方法的具体用法?Java Merger.merge怎么用?Java Merger.merge使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.Merger的用法示例。


在下文中一共展示了Merger.merge方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: finish

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public RawKeyValueIterator finish() throws Throwable {
    // merge config params
    Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
    Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
    final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();
    
    // Wait for on-going merges to complete
    merger.close();
    
    LOG.info("finalMerge called with " + segmentsToBeMerged.size() + " on-disk map-outputs");
    
    List<Segment<K, V>> segments = new ArrayList<Segment<K, V>>();
    long onDiskBytes = 0;
    
    for (Segment<K, V> segment : segmentsToBeMerged) {
        long fileLength = segment.getLength();
        onDiskBytes += fileLength;
        LOG.debug("Disk file: " + segment + " Length is " + fileLength);
        segments.add(segment);
    }
    segmentsToBeMerged.clear();
    
    LOG.info("Merging " + segmentsToBeMerged.size() + " files, " + onDiskBytes + " bytes from disk");
    Collections.sort(segments, new Comparator<Segment<K, V>>() {
        
        public int compare(Segment<K, V> o1, Segment<K, V> o2) {
            if (o1.getLength() == o2.getLength()) {
                return 0;
            }
            return o1.getLength() < o2.getLength() ? -1 : 1;
        }
    });
    return Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, segments.size(), mergeTempDir,
                        comparator, reporter, spilledRecordsCounter, null, null);
}
 
开发者ID:intel-hpdd,项目名称:lustre-connector-for-hadoop,代码行数:37,代码来源:LustreFsShuffle.java

示例2: merge

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@Override
public void merge(List<InMemoryMapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  InMemoryMapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:40,代码来源:MergeManagerImpl.java

示例3: testMergeShouldReturnProperProgress

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@SuppressWarnings( { "deprecation", "unchecked" })
public void testMergeShouldReturnProperProgress(
    List<Segment<Text, Text>> segments) throws IOException {
  Path tmpDir = new Path("localpath");
  Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
  Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
  RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
  Counter readsCounter = new Counter();
  Counter writesCounter = new Counter();
  Progress mergePhase = new Progress();
  RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
      valueClass, segments, 2, tmpDir, comparator, getReporter(),
      readsCounter, writesCounter, mergePhase);
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get());
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:16,代码来源:TestMerger.java

示例4: merge

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@Override
public void merge(List<MapOutput<K, V>> inputs) throws IOException {
  if (inputs == null || inputs.size() == 0) {
    return;
  }

  TaskAttemptID dummyMapId = inputs.get(0).getMapId(); 
  List<Segment<K, V>> inMemorySegments = new ArrayList<Segment<K, V>>();
  long mergeOutputSize = 
    createInMemorySegments(inputs, inMemorySegments, 0);
  int noInMemorySegments = inMemorySegments.size();
  
  MapOutput<K, V> mergedMapOutputs = 
    unconditionalReserve(dummyMapId, mergeOutputSize, false);
  
  Writer<K, V> writer = 
    new InMemoryWriter<K, V>(mergedMapOutputs.getArrayStream());
  
  LOG.info("Initiating Memory-to-Memory merge with " + noInMemorySegments +
           " segments of total-size: " + mergeOutputSize);

  RawKeyValueIterator rIter = 
    Merger.merge(jobConf, rfs,
                 (Class<K>)jobConf.getMapOutputKeyClass(),
                 (Class<V>)jobConf.getMapOutputValueClass(),
                 inMemorySegments, inMemorySegments.size(),
                 new Path(reduceId.toString()),
                 (RawComparator<K>)jobConf.getOutputKeyComparator(),
                 reporter, null, null, null);
  Merger.writeFile(rIter, writer, reporter, jobConf);
  writer.close();

  LOG.info(reduceId +  
           " Memory-to-Memory merge of the " + noInMemorySegments +
           " files in-memory complete.");

  // Note the output of the merge
  closeInMemoryMergedFile(mergedMapOutputs);
}
 
开发者ID:rekhajoshm,项目名称:mapreduce-fork,代码行数:40,代码来源:MergeManager.java

示例5: merge

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
@Override
public void merge(List<Segment<K,V>> segments) throws IOException {
    // sanity check
    if (segments == null || segments.isEmpty()) {
        LOG.info("No ondisk files to merge...");
        return;
    }
    
    Class<K> keyClass = (Class<K>) jobConf.getMapOutputKeyClass();
    Class<V> valueClass = (Class<V>) jobConf.getMapOutputValueClass();
    final RawComparator<K> comparator = (RawComparator<K>) jobConf.getOutputKeyComparator();
    
    long approxOutputSize = 0;
    int bytesPerSum = jobConf.getInt("io.bytes.per.checksum", 512);
    
    LOG.info("OnDiskMerger: We have  " + segments.size()
             + " map outputs on disk. Triggering merge...");
    
    // 1. Prepare the list of files to be merged.
    for (Segment<K,V> segment : segments) {
        approxOutputSize += segment.getLength();
    }
    
    // add the checksum length
    approxOutputSize += ChecksumFileSystem.getChecksumLength(approxOutputSize, bytesPerSum);
    
    // 2. Start the on-disk merge process
    Path outputPath = new Path(reduceDir, "file-" + (numPasses++)).suffix(Task.MERGED_OUTPUT_PREFIX);
    
    Writer<K, V> writer = new Writer<K, V>(jobConf, lustrefs.create(outputPath),
                                           (Class<K>) jobConf.getMapOutputKeyClass(), 
                                           (Class<V>) jobConf.getMapOutputValueClass(),
                                           codec, null, true);
    RawKeyValueIterator iter = null;
    try {
        iter = Merger.merge(jobConf, lustrefs, keyClass, valueClass, segments, ioSortFactor, mergeTempDir,
                            comparator, reporter, spilledRecordsCounter, mergedMapOutputsCounter, null);
        Merger.writeFile(iter, writer, reporter, jobConf);
        writer.close();
    } catch (IOException e) {
        lustrefs.delete(outputPath, true);
        throw e;
    }
    addSegmentToMerge(new Segment<K, V>(jobConf, lustrefs, outputPath, codec, false, null));
    LOG.info(reduceId + " Finished merging " + segments.size()
             + " map output files on disk of total-size " + approxOutputSize + "."
             + " Local output file is " + outputPath + " of size "
             + lustrefs.getFileStatus(outputPath).getLen());
}
 
开发者ID:intel-hpdd,项目名称:lustre-connector-for-hadoop,代码行数:51,代码来源:LustreFsShuffle.java

示例6: testMergeShouldReturnProperProgress

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@SuppressWarnings( { "unchecked" })
public void testMergeShouldReturnProperProgress(
    List<Segment<Text, Text>> segments) throws IOException {
  Path tmpDir = new Path("localpath");
  Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
  Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
  RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
  Counter readsCounter = new Counter();
  Counter writesCounter = new Counter();
  Progress mergePhase = new Progress();
  RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
      valueClass, segments, 2, tmpDir, comparator, getReporter(),
      readsCounter, writesCounter, mergePhase);
  final float epsilon = 0.00001f;

  // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
  // progress forward 1/6th of the way. Initially the first keys from each
  // segment have been read as part of the merge setup, so progress = 2/6.
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // The first next() returns one of the keys already read during merge setup
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // At this point we've exhausted all of the keys in one segment
  // so getting the next key will return the already cached key from the
  // other segment
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);

  // Now there should be no more input
  Assert.assertFalse(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.getKey() == null);
  Assert.assertEquals(0, mergeQueue.getValue().getData().length);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:49,代码来源:TestMerger.java

示例7: testMergeShouldReturnProperProgress

import org.apache.hadoop.mapred.Merger; //导入方法依赖的package包/类
@SuppressWarnings( { "unchecked" })
public void testMergeShouldReturnProperProgress(
    List<Segment<Text, Text>> segments) throws IOException {
  Path tmpDir = new Path("localpath");
  Class<Text> keyClass = (Class<Text>) jobConf.getMapOutputKeyClass();
  Class<Text> valueClass = (Class<Text>) jobConf.getMapOutputValueClass();
  RawComparator<Text> comparator = jobConf.getOutputKeyComparator();
  Counter readsCounter = new Counter();
  Counter writesCounter = new Counter();
  Progress mergePhase = new Progress();
  RawKeyValueIterator mergeQueue = Merger.merge(conf, fs, keyClass,
      valueClass, segments, 2, tmpDir, comparator, getReporter(),
      readsCounter, writesCounter, mergePhase);
  final float epsilon = 0.00001f;

  // Reading 6 keys total, 3 each in 2 segments, so each key read moves the
  // progress forward 1/6th of the way. Initially the first keys from each
  // segment have been read as part of the merge setup, so progress = 2/6.
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // The first next() returns one of the keys already read during merge setup
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(2/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(3/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // At this point we've exhausted all of the keys in one segment
  // so getting the next key will return the already cached key from the
  // other segment
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(4/6.0f, mergeQueue.getProgress().get(), epsilon);

  // Subsequent next() calls should read one key and move progress
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(5/6.0f, mergeQueue.getProgress().get(), epsilon);
  Assert.assertTrue(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);

  // Now there should be no more input
  Assert.assertFalse(mergeQueue.next());
  Assert.assertEquals(1.0f, mergeQueue.getProgress().get(), epsilon);
}
 
开发者ID:yncxcw,项目名称:FlexMap,代码行数:47,代码来源:TestMerger.java


注:本文中的org.apache.hadoop.mapred.Merger.merge方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。