当前位置: 首页>>代码示例>>Java>>正文


Java JobConf.getInt方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobConf.getInt方法的典型用法代码示例。如果您正苦于以下问题:Java JobConf.getInt方法的具体用法?Java JobConf.getInt怎么用?Java JobConf.getInt使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapred.JobConf的用法示例。


在下文中一共展示了JobConf.getInt方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
public void configure(JobConf jobConf) {
  int numberOfThreads =
    jobConf.getInt(MultithreadedMapper.NUM_THREADS, 10);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Configuring jobConf " + jobConf.getJobName() +
              " to use " + numberOfThreads + " threads");
  }

  this.job = jobConf;
  //increment processed counter only if skipping feature is enabled
  this.incrProcCount = SkipBadRecords.getMapperMaxSkipRecords(job)>0 && 
    SkipBadRecords.getAutoIncrMapperProcCount(job);
  this.mapper = ReflectionUtils.newInstance(jobConf.getMapperClass(),
      jobConf);

  // Creating a threadpool of the configured size to execute the Mapper
  // map method in parallel.
  executorService = new ThreadPoolExecutor(numberOfThreads, numberOfThreads, 
                                           0L, TimeUnit.MILLISECONDS,
                                           new BlockingArrayQueue
                                             (numberOfThreads));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:24,代码来源:MultithreadedMapRunner.java

示例2: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/** Mapper configuration.
 * Extracts source and destination file system, as well as
 * top-level paths on source and destination directories.
 * Gets the named file systems, to be used later in map.
 */
public void configure(JobConf job)
{
  destPath = new Path(job.get(DST_DIR_LABEL, "/"));
  try {
    destFileSys = destPath.getFileSystem(job);
  } catch (IOException ex) {
    throw new RuntimeException("Unable to get the named file system.", ex);
  }
  sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
  buffer = new byte[sizeBuf];
  ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
  preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
  if (preserve_status) {
    preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
  }
  update = job.getBoolean(Options.UPDATE.propertyname, false);
  overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
  skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
  this.job = job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:DistCpV1.java

示例3: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:67,代码来源:GenerateDistCacheData.java

示例4: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf job) {
  super.configure(job);
  //disable the auto increment of the counter. For streaming, no of 
  //processed records could be different(equal or less) than the no of 
  //records input.
  SkipBadRecords.setAutoIncrMapperProcCount(job, false);
  skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false);
  if (mapInputWriterClass_.getCanonicalName().equals(TextInputWriter.class.getCanonicalName())) {
    String inputFormatClassName = job.getClass("mapred.input.format.class", TextInputFormat.class).getCanonicalName();
    ignoreKey = job.getBoolean("stream.map.input.ignoreKey", 
      inputFormatClassName.equals(TextInputFormat.class.getCanonicalName()));
  }
  
  try {
    mapOutputFieldSeparator = job.get("stream.map.output.field.separator", "\t").getBytes("UTF-8");
    mapInputFieldSeparator = job.get("stream.map.input.field.separator", "\t").getBytes("UTF-8");
    numOfMapOutputKeyFields = job.getInt("stream.num.map.output.key.fields", 1);
  } catch (UnsupportedEncodingException e) {
    throw new RuntimeException("The current system does not support UTF-8 encoding!", e);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:PipeMapper.java

示例5: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public InputSplit[] getSplits(JobConf conf, int numSplits) {
  numSplits = conf.getInt("LG.numMapTasks", 1);
  InputSplit[] ret = new InputSplit[numSplits];
  for (int i = 0; i < numSplits; ++i) {
    ret[i] = new EmptySplit();
  }
  return ret;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:9,代码来源:LoadGeneratorMR.java

示例6: getAggregatorDescriptors

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
private static ArrayList<ValueAggregatorDescriptor> getAggregatorDescriptors(JobConf job) {
  String advn = "aggregator.descriptor";
  int num = job.getInt(advn + ".num", 0);
  ArrayList<ValueAggregatorDescriptor> retv = new ArrayList<ValueAggregatorDescriptor>(num);
  for (int i = 0; i < num; i++) {
    String spec = job.get(advn + "." + i);
    ValueAggregatorDescriptor ad = getValueAggregatorDescriptor(spec, job);
    if (ad != null) {
      retv.add(ad);
    }
  }
  return retv;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:14,代码来源:ValueAggregatorJobBase.java

示例7: ShuffleClientMetrics

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
ShuffleClientMetrics(TaskAttemptID reduceId, JobConf jobConf) {
  this.numCopiers = jobConf.getInt(MRJobConfig.SHUFFLE_PARALLEL_COPIES, 5);

  MetricsContext metricsContext = MetricsUtil.getContext("mapred");
  this.shuffleMetrics = 
    MetricsUtil.createRecord(metricsContext, "shuffleInput");
  this.shuffleMetrics.setTag("user", jobConf.getUser());
  this.shuffleMetrics.setTag("jobName", jobConf.getJobName());
  this.shuffleMetrics.setTag("jobId", reduceId.getJobID().toString());
  this.shuffleMetrics.setTag("taskId", reduceId.toString());
  this.shuffleMetrics.setTag("sessionId", jobConf.getSessionId());
  metricsContext.registerUpdater(this);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:14,代码来源:ShuffleClientMetrics.java

示例8: ShuffleSchedulerImpl

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public ShuffleSchedulerImpl(JobConf job, TaskStatus status,
                        TaskAttemptID reduceId,
                        ExceptionReporter reporter,
                        Progress progress,
                        Counters.Counter shuffledMapsCounter,
                        Counters.Counter reduceShuffleBytes,
                        Counters.Counter failedShuffleCounter) {
  totalMaps = job.getNumMapTasks();
  abortFailureLimit = Math.max(30, totalMaps / 10);
  copyTimeTracker = new CopyTimeTracker();
  remainingMaps = totalMaps;
  finishedMaps = new boolean[remainingMaps];
  this.reporter = reporter;
  this.status = status;
  this.reduceId = reduceId;
  this.progress = progress;
  this.shuffledMapsCounter = shuffledMapsCounter;
  this.reduceShuffleBytes = reduceShuffleBytes;
  this.failedShuffleCounter = failedShuffleCounter;
  this.startTime = Time.monotonicNow();
  lastProgressTime = startTime;
  referee.start();
  this.maxFailedUniqueFetches = Math.min(totalMaps, 5);
  this.maxFetchFailuresBeforeReporting = job.getInt(
      MRJobConfig.SHUFFLE_FETCH_FAILURES, REPORT_FAILURE_LIMIT);
  this.reportReadErrorImmediately = job.getBoolean(
      MRJobConfig.SHUFFLE_NOTIFY_READERROR, true);

  this.maxDelay = job.getLong(MRJobConfig.MAX_SHUFFLE_FETCH_RETRY_DELAY,
      MRJobConfig.DEFAULT_MAX_SHUFFLE_FETCH_RETRY_DELAY);
  this.maxHostFailures = job.getInt(
      MRJobConfig.MAX_SHUFFLE_FETCH_HOST_FAILURES,
      MRJobConfig.DEFAULT_MAX_SHUFFLE_FETCH_HOST_FAILURES);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:ShuffleSchedulerImpl.java

示例9: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf conf) {
  this.conf = conf;

  // this is tightly tied to map reduce
  // since it does not expose an api 
  // to get the partition
  partId = conf.getInt(MRJobConfig.TASK_PARTITION, -1);
  // create a file name using the partition
  // we need to write to this directory
  tmpOutputDir = FileOutputFormat.getWorkOutputPath(conf);
  blockSize = conf.getLong(HAR_BLOCKSIZE_LABEL, blockSize);
  // get the output path and write to the tmp 
  // directory 
  partname = "part-" + partId;
  tmpOutput = new Path(tmpOutputDir, partname);
  rootPath = (conf.get(SRC_PARENT_LABEL, null) == null) ? null :
              new Path(conf.get(SRC_PARENT_LABEL));
  if (rootPath == null) {
    throw new RuntimeException("Unable to read parent " +
    		"path for har from config");
  }
  try {
    destFs = tmpOutput.getFileSystem(conf);
    //this was a stale copy
    if (destFs.exists(tmpOutput)) {
      destFs.delete(tmpOutput, false);
    } 
    partStream = destFs.create(tmpOutput, false, conf.getInt("io.file.buffer.size", 4096), 
        destFs.getDefaultReplication(tmpOutput), blockSize);
  } catch(IOException ie) {
    throw new RuntimeException("Unable to open output file " + tmpOutput, ie);
  }
  buffer = new byte[buf_size];
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:HadoopArchives.java

示例10: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Produce splits such that each is no greater than the quotient of the
 * total size and the number of splits requested.
 * @param job The handle to the JobConf object
 * @param numSplits Number of splits requested
 */
public InputSplit[] getSplits(JobConf job, int numSplits
    ) throws IOException {
  final int srcCount = job.getInt(OP_COUNT_LABEL, -1);
  final int targetcount = srcCount / numSplits;
  String srclist = job.get(OP_LIST_LABEL, "");
  if (srcCount < 0 || "".equals(srclist)) {
    throw new RuntimeException("Invalid metadata: #files(" + srcCount +
                               ") listuri(" + srclist + ")");
  }
  Path srcs = new Path(srclist);
  FileSystem fs = srcs.getFileSystem(job);

  List<FileSplit> splits = new ArrayList<FileSplit>(numSplits);

  Text key = new Text();
  FileOperation value = new FileOperation();
  long prev = 0L;
  int count = 0; //count src
  try (SequenceFile.Reader in = new SequenceFile.Reader(fs, srcs, job)) {
    for ( ; in.next(key, value); ) {
      long curr = in.getPosition();
      long delta = curr - prev;
      if (++count > targetcount) {
        count = 0;
        splits.add(new FileSplit(srcs, prev, delta, (String[])null));
        prev = curr;
      }
    }
  }
  long remaining = fs.getFileStatus(srcs).getLen() - prev;
  if (remaining != 0) {
    splits.add(new FileSplit(srcs, prev, remaining, (String[])null));
  }
  LOG.info("numSplits="  + numSplits + ", splits.size()=" + splits.size());
  return splits.toArray(new FileSplit[splits.size()]);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:43,代码来源:DistCh.java

示例11: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf job) {
  srcs = job.getInt("testdatamerge.sources", 0);
  assertTrue("Invalid src count: " + srcs, srcs > 0);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:5,代码来源:TestDatamerge.java

示例12: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf conf) {
  N = conf.getInt("mapreduce.input.lineinputformat.linespermap", 1);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:4,代码来源:NLineInputFormat.java

示例13: configure

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
public void configure(JobConf job) {
  pattern = Pattern.compile(job.get(org.apache.hadoop.mapreduce.lib.map.
              RegexMapper.PATTERN));
  group = job.getInt(org.apache.hadoop.mapreduce.lib.map.
            RegexMapper.GROUP, 0);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:7,代码来源:RegexMapper.java

示例14: getSplits

import org.apache.hadoop.mapred.JobConf; //导入方法依赖的package包/类
/**
 * Produce splits such that each is no greater than the quotient of the
 * total size and the number of splits requested.
 * @param job The handle to the JobConf object
 * @param numSplits Number of splits requested
 */
public InputSplit[] getSplits(JobConf job, int numSplits)
    throws IOException {
  int cnfiles = job.getInt(SRC_COUNT_LABEL, -1);
  long cbsize = job.getLong(TOTAL_SIZE_LABEL, -1);
  String srcfilelist = job.get(SRC_LIST_LABEL, "");
  if (cnfiles < 0 || cbsize < 0 || "".equals(srcfilelist)) {
    throw new RuntimeException("Invalid metadata: #files(" + cnfiles +
                               ") total_size(" + cbsize + ") listuri(" +
                               srcfilelist + ")");
  }
  Path src = new Path(srcfilelist);
  FileSystem fs = src.getFileSystem(job);
  FileStatus srcst = fs.getFileStatus(src);

  ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits);
  LongWritable key = new LongWritable();
  FilePair value = new FilePair();
  final long targetsize = cbsize / numSplits;
  long pos = 0L;
  long last = 0L;
  long acc = 0L;
  long cbrem = srcst.getLen();
  try (SequenceFile.Reader sl =
      new SequenceFile.Reader(job, Reader.file(src))) {
    for (; sl.next(key, value); last = sl.getPosition()) {
      // if adding this split would put this split past the target size,
      // cut the last split and put this next file in the next split.
      if (acc + key.get() > targetsize && acc != 0) {
        long splitsize = last - pos;
        splits.add(new FileSplit(src, pos, splitsize, (String[])null));
        cbrem -= splitsize;
        pos = last;
        acc = 0L;
      }
      acc += key.get();
    }
  }
  if (cbrem != 0) {
    splits.add(new FileSplit(src, pos, cbrem, (String[])null));
  }

  return splits.toArray(new FileSplit[splits.size()]);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:50,代码来源:DistCpV1.java


注:本文中的org.apache.hadoop.mapred.JobConf.getInt方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。