当前位置: 首页>>代码示例>>Java>>正文


Java DistCpUtils.publish方法代码示例

本文整理汇总了Java中org.apache.hadoop.tools.util.DistCpUtils.publish方法的典型用法代码示例。如果您正苦于以下问题:Java DistCpUtils.publish方法的具体用法?Java DistCpUtils.publish怎么用?Java DistCpUtils.publish使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.tools.util.DistCpUtils的用法示例。


在下文中一共展示了DistCpUtils.publish方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: createSplits

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
                                      List<DynamicInputChunk> chunks)
        throws IOException {
  int numMaps = getNumMapTasks(jobContext.getConfiguration());

  final int nSplits = Math.min(numMaps, chunks.size());
  List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
  
  for (int i=0; i< nSplits; ++i) {
    TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
    chunks.get(i).assignTo(taskId);
    splits.add(new FileSplit(chunks.get(i).getPath(), 0,
        // Setting non-zero length for FileSplit size, to avoid a possible
        // future when 0-sized file-splits are considered "empty" and skipped
        // over.
        getMinRecordsPerChunk(jobContext.getConfiguration()),
        null));
  }
  DistCpUtils.publish(jobContext.getConfiguration(),
                      CONF_LABEL_NUM_SPLITS, splits.size());
  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:23,代码来源:DynamicInputFormat.java

示例2: createSplits

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
                                      List<DynamicInputChunk> chunks)
        throws IOException {
  int numMaps = getNumMapTasks(jobContext.getConfiguration());

  final int nSplits = Math.min(numMaps, chunks.size());
  List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
  
  for (int i=0; i< nSplits; ++i) {
    TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
    chunks.get(i).assignTo(taskId);
    splits.add(new FileSplit(chunks.get(i).getPath(), 0,
        // Setting non-zero length for FileSplit size, to avoid a possible
        // future when 0-sized file-splits are considered "empty" and skipped
        // over.
        MIN_RECORDS_PER_CHUNK,
        null));
  }
  DistCpUtils.publish(jobContext.getConfiguration(),
                      CONF_LABEL_NUM_SPLITS, splits.size());
  return splits;
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:23,代码来源:DynamicInputFormat.java

示例3: getNumberOfRecords

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
                                  (JobContext context) throws IOException {

  final Configuration configuration = context.getConfiguration();
  int numRecords = getNumberOfRecords(configuration);
  int numMaps = getNumMapTasks(configuration);
  int maxChunksTolerable = getMaxChunksTolerable(configuration);

  // Number of chunks each map will process, on average.
  int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
  validateNumChunksUsing(splitRatio, numMaps, maxChunksTolerable);

  int numEntriesPerChunk = (int)Math.ceil((float)numRecords
                                        /(splitRatio * numMaps));
  DistCpUtils.publish(context.getConfiguration(),
                      CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
                      numEntriesPerChunk);

  final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
  int nChunksOpenAtOnce
          = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);

  Path listingPath = getListingFilePath(configuration);
  SequenceFile.Reader reader
          = new SequenceFile.Reader(configuration,
                                    SequenceFile.Reader.file(listingPath));

  List<DynamicInputChunk> openChunks
                = new ArrayList<DynamicInputChunk>();
  
  List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();

  CopyListingFileStatus fileStatus = new CopyListingFileStatus();
  Text relPath = new Text();
  int recordCounter = 0;
  int chunkCount = 0;

  try {

    while (reader.next(relPath, fileStatus)) {
      if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
        // All chunks full. Create new chunk-set.
        closeAll(openChunks);
        chunksFinal.addAll(openChunks);

        openChunks = createChunks(
                configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);

        chunkCount += openChunks.size();

        nChunksOpenAtOnce = openChunks.size();
        recordCounter = 0;
      }

      // Shuffle into open chunks.
      openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
      ++recordCounter;
    }

  } finally {
    closeAll(openChunks);
    chunksFinal.addAll(openChunks);
    IOUtils.closeStream(reader);
  }

  LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
  return chunksFinal;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:69,代码来源:DynamicInputFormat.java

示例4: getNumberOfRecords

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
                                  (JobContext context) throws IOException {

  final Configuration configuration = context.getConfiguration();
  int numRecords = getNumberOfRecords(configuration);
  int numMaps = getNumMapTasks(configuration);
  int maxChunksTolerable = getMaxChunksTolerable(configuration);

  // Number of chunks each map will process, on average.
  int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
  validateNumChunksUsing(splitRatio, numMaps, maxChunksTolerable);

  int numEntriesPerChunk = (int)Math.ceil((float)numRecords
                                        /(splitRatio * numMaps));
  DistCpUtils.publish(context.getConfiguration(),
                      CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
                      numEntriesPerChunk);

  final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
  int nChunksOpenAtOnce
          = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);

  Path listingPath = getListingFilePath(configuration);
  SequenceFile.Reader reader
          = new SequenceFile.Reader(configuration,
                                    SequenceFile.Reader.file(listingPath));

  List<DynamicInputChunk> openChunks
                = new ArrayList<DynamicInputChunk>();
  
  List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();

  CopyListingFileStatus fileStatus = new CopyListingFileStatus();
  Text relPath = new Text();
  int recordCounter = 0;
  int chunkCount = 0;

  try {

    while (reader.next(relPath, fileStatus)) {
      if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
        // All chunks full. Create new chunk-set.
        closeAll(openChunks);
        chunksFinal.addAll(openChunks);

        openChunks = createChunks(chunkCount, nChunksTotal,
            nChunksOpenAtOnce);

        chunkCount += openChunks.size();

        nChunksOpenAtOnce = openChunks.size();
        recordCounter = 0;
      }

      // Shuffle into open chunks.
      openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
      ++recordCounter;
    }

  } finally {
    closeAll(openChunks);
    chunksFinal.addAll(openChunks);
    IOUtils.closeStream(reader);
  }

  LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
  return chunksFinal;
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:69,代码来源:DynamicInputFormat.java

示例5: getNumberOfRecords

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private List<DynamicInputChunk> splitCopyListingIntoChunksWithShuffle
                                  (JobContext context) throws IOException {

  final Configuration configuration = context.getConfiguration();
  int numRecords = getNumberOfRecords(configuration);
  int numMaps = getNumMapTasks(configuration);
  // Number of chunks each map will process, on average.
  int splitRatio = getListingSplitRatio(configuration, numMaps, numRecords);
  validateNumChunksUsing(splitRatio, numMaps);

  int numEntriesPerChunk = (int)Math.ceil((float)numRecords
                                        /(splitRatio * numMaps));
  DistCpUtils.publish(context.getConfiguration(),
                      CONF_LABEL_NUM_ENTRIES_PER_CHUNK,
                      numEntriesPerChunk);

  final int nChunksTotal = (int)Math.ceil((float)numRecords/numEntriesPerChunk);
  int nChunksOpenAtOnce
          = Math.min(N_CHUNKS_OPEN_AT_ONCE_DEFAULT, nChunksTotal);

  Path listingPath = getListingFilePath(configuration);
  SequenceFile.Reader reader
          = new SequenceFile.Reader(configuration,
                                    SequenceFile.Reader.file(listingPath));

  List<DynamicInputChunk> openChunks
                = new ArrayList<DynamicInputChunk>();
  
  List<DynamicInputChunk> chunksFinal = new ArrayList<DynamicInputChunk>();

  FileStatus fileStatus = new FileStatus();
  Text relPath = new Text();
  int recordCounter = 0;
  int chunkCount = 0;

  try {

    while (reader.next(relPath, fileStatus)) {
      if (recordCounter % (nChunksOpenAtOnce*numEntriesPerChunk) == 0) {
        // All chunks full. Create new chunk-set.
        closeAll(openChunks);
        chunksFinal.addAll(openChunks);

        openChunks = createChunks(
                configuration, chunkCount, nChunksTotal, nChunksOpenAtOnce);

        chunkCount += openChunks.size();

        nChunksOpenAtOnce = openChunks.size();
        recordCounter = 0;
      }

      // Shuffle into open chunks.
      openChunks.get(recordCounter%nChunksOpenAtOnce).write(relPath, fileStatus);
      ++recordCounter;
    }

  } finally {
    closeAll(openChunks);
    chunksFinal.addAll(openChunks);
    IOUtils.closeStream(reader);
  }

  LOG.info("Number of dynamic-chunk-files created: " + chunksFinal.size()); 
  return chunksFinal;
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:67,代码来源:DynamicInputFormat.java


注:本文中的org.apache.hadoop.tools.util.DistCpUtils.publish方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。