当前位置: 首页>>代码示例>>Java>>正文


Java JobConf.getLong方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobConf.getLong方法的典型用法代码示例。如果您正苦于以下问题:Java JobConf.getLong方法的具体用法?Java JobConf.getLong怎么用?Java JobConf.getLong使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.JobConf的用法示例。


在下文中一共展示了JobConf.getLong方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:67,代码来源:GenerateDistCacheData.java

示例2: ShuffleSchedulerImpl

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public ShuffleSchedulerImpl(JobConf job, TaskStatus status,
                        TaskAttemptID reduceId,
                        ExceptionReporter reporter,
                        Progress progress,
                        Counters.Counter shuffledMapsCounter,
                        Counters.Counter reduceShuffleBytes,
                        Counters.Counter failedShuffleCounter) {
  totalMaps = job.getNumMapTasks();
  abortFailureLimit = Math.max(30, totalMaps / 10);
  copyTimeTracker = new CopyTimeTracker();
  remainingMaps = totalMaps;
  finishedMaps = new boolean[remainingMaps];
  this.reporter = reporter;
  this.status = status;
  this.reduceId = reduceId;
  this.progress = progress;
  this.shuffledMapsCounter = shuffledMapsCounter;
  this.reduceShuffleBytes = reduceShuffleBytes;
  this.failedShuffleCounter = failedShuffleCounter;
  this.startTime = Time.monotonicNow();
  lastProgressTime = startTime;
  referee.start();
  this.maxFailedUniqueFetches = Math.min(totalMaps, 5);
  this.maxFetchFailuresBeforeReporting = job.getInt(
      MRJobConfig.SHUFFLE_FETCH_FAILURES, REPORT_FAILURE_LIMIT);
  this.reportReadErrorImmediately = job.getBoolean(
      MRJobConfig.SHUFFLE_NOTIFY_READERROR, true);

  this.maxDelay = job.getLong(MRJobConfig.MAX_SHUFFLE_FETCH_RETRY_DELAY,
      MRJobConfig.DEFAULT_MAX_SHUFFLE_FETCH_RETRY_DELAY);
  this.maxHostFailures = job.getInt(
      MRJobConfig.MAX_SHUFFLE_FETCH_HOST_FAILURES,
      MRJobConfig.DEFAULT_MAX_SHUFFLE_FETCH_HOST_FAILURES);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:ShuffleSchedulerImpl.java

示例3: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:HadoopArchives.java

示例4: setMapCount

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Calculate how many maps to run.
 * Number of maps is bounded by a minimum of the cumulative size of the
 * copy / (distcp.bytes.per.map, default BYTES_PER_MAP or -m on the
 * command line) and at most (distcp.max.map.tasks, default
 * MAX_MAPS_PER_NODE * nodes in the cluster).
 * @param totalBytes Count of total bytes for job
 * @param job The job to configure
 * @return Count of maps to run.
 */
private static int setMapCount(long totalBytes, JobConf job) 
    throws IOException {
  int numMaps =
    (int)(totalBytes / job.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP));
  numMaps = Math.min(numMaps, 
      job.getInt(MAX_MAPS_LABEL, MAX_MAPS_PER_NODE *
        new JobClient(job).getClusterStatus().getTaskTrackers()));
  numMaps = Math.max(numMaps, 1);
  job.setNumMapTasks(numMaps);
  return numMaps;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:DistCpV1.java

示例5: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Override // Mapper
public void configure(JobConf conf) {
  super.configure(conf);
  skipSize = conf.getLong("test.io.skip.size", 0);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:6,代码来源:TestDFSIO.java

示例6: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * get the input file name.
 * 
 * @param job a job configuration object
 */
public void configure(JobConf job) {
  super.configure(job);
  maxNumItems = job.getLong("aggregate.max.num.unique.values",
                            Long.MAX_VALUE);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:11,代码来源:ValueAggregatorBaseDescriptor.java

示例7: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public InputSplit[] getSplits(JobConf jconf, int numSplits)
throws IOException {
  String srcfilelist = jconf.get(SRC_LIST_LABEL, "");
  if ("".equals(srcfilelist)) {
      throw new IOException("Unable to get the " +
          "src file for archive generation.");
  }
  long totalSize = jconf.getLong(TOTAL_SIZE_LABEL, -1);
  if (totalSize == -1) {
    throw new IOException("Invalid size of files to archive");
  }
  //we should be safe since this is set by our own code
  Path src = new Path(srcfilelist);
  FileSystem fs = src.getFileSystem(jconf);
  FileStatus fstatus = fs.getFileStatus(src);
  ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
  LongWritable key = new LongWritable();
  final HarEntry value = new HarEntry();
  // the remaining bytes in the file split
  long remaining = fstatus.getLen();
  // the count of sizes calculated till now
  long currentCount = 0L;
  // the endposition of the split
  long lastPos = 0L;
  // the start position of the split
  long startPos = 0L;
  long targetSize = totalSize/numSplits;
  // create splits of size target size so that all the maps 
  // have equals sized data to read and write to.
  try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, src, jconf)) {
    while(reader.next(key, value)) {
      if (currentCount + key.get() > targetSize && currentCount != 0){
        long size = lastPos - startPos;
        splits.add(new FileSplit(src, startPos, size, (String[]) null));
        remaining = remaining - size;
        startPos = lastPos;
        currentCount = 0L;
      }
      currentCount += key.get();
      lastPos = reader.getPosition();
    }
    // the remaining not equal to the target size.
    if (remaining != 0) {
      splits.add(new FileSplit(src, startPos, remaining, (String[])null));
    }
  }
  return splits.toArray(new FileSplit[splits.size()]);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:49,代码来源:HadoopArchives.java

示例8: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Produce splits such that each is no greater than the quotient of the
 * total size and the number of splits requested.
 * @param job The handle to the JobConf object
 * @param numSplits Number of splits requested
 */
public InputSplit[] getSplits(JobConf job, int numSplits)
    throws IOException {
  int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
  long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
  String srcfilelist = job.get(SRC_LIST_LABEL, "");
  if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
    throw new RuntimeException("Invalid metadata: #files(" + cnfiles +
                               ") total_size(" + cbsize + ") listuri(" +
                               srcfilelist + ")");
  }
  Path src = new Path(srcfilelist);
  FileSystem fs = src.getFileSystem(job);
  FileStatus srcst = fs.getFileStatus(src);

  ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
  LongWritable key = new LongWritable();
  FilePair value = new FilePair();
  final long targetsize = cbsize / numSplits;
  long pos = 0L;
  long last = 0L;
  long acc = 0L;
  long cbrem = srcst.getLen();
  try (SequenceFile.Reader sl =
      new SequenceFile.Reader(job, Reader.file(src))) {
    for (; sl.next(key, value); last = sl.getPosition()) {
      // if adding this split would put this split past the target size,
      // cut the last split and put this next file in the next split.
      if (acc + key.get() > targetsize && acc != 0) {
        long splitsize = last - pos;
        splits.add(new FileSplit(src, pos, splitsize, (String[])null));
        cbrem -= splitsize;
        pos = last;
        acc = 0L;
      }
      acc += key.get();
    }
  }
  if (cbrem != 0) {
    splits.add(new FileSplit(src, pos, cbrem, (String[])null));
  }

  return splits.toArray(new FileSplit[splits.size()]);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:50,代码来源:DistCpV1.java

示例9: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf job) {
  super.configure(job);
  this.job = job;
  this.maxNumOfValuesPerGroup = job.getLong("datajoin.maxNumOfValuesPerGroup", 100);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:6,代码来源:DataJoinReducerBase.java


注:本文中的org.apache.hadoop.mapred.JobConf.getLong方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。