当前位置: 首页>>代码示例>>Java>>正文


Java JobContext.getConfiguration方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.JobContext.getConfiguration方法的典型用法代码示例。如果您正苦于以下问题:Java JobContext.getConfiguration方法的具体用法?Java JobContext.getConfiguration怎么用?Java JobContext.getConfiguration使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.JobContext的用法示例。


在下文中一共展示了JobContext.getConfiguration方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {

  long maxSize = 0;
  Configuration conf = job.getConfiguration();

  maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);

  // all the files in input set
  List<FileStatus> stats = listStatus(job);
  List<InputSplit> splits = new ArrayList<>();
  if (stats.size() == 0) {
    return splits;
  }

  getMoreSplits(conf, stats, maxSize, 0, 0, splits);

  return splits;

}
 
开发者ID:Tencent,项目名称:angel,代码行数:21,代码来源:BalanceInputFormat.java

示例2: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:27,代码来源:GenerateData.java

示例3: checkOutputSpecs

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  DBConfiguration dbConf = new DBConfiguration(conf);

  // Sanity check all the configuration values we need.
  if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
    throw new IOException("Database connection URL is not set.");
  } else if (null == dbConf.getOutputTableName()) {
    throw new IOException("Procedure name is not set for export");
  } else if (null == dbConf.getOutputFieldNames()
      && 0 == dbConf.getOutputFieldCount()) {
    throw new IOException(
        "Output field names are null and zero output field count set.");
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:19,代码来源:ExportCallOutputFormat.java

示例4: checkOutputSpecs

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  DBConfiguration dbConf = new DBConfiguration(conf);

  // Sanity check all the configuration values we need.
  if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
    throw new IOException("Database connection URL is not set.");
  } else if (null == dbConf.getOutputTableName()) {
    throw new IOException("Table name is not set for export.");
  } else if (null == dbConf.getOutputFieldNames()) {
    throw new IOException(
        "Output field names are null.");
  } else if (null == conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY)) {
    throw new IOException("Update key column is not set for export.");
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:20,代码来源:SQLServerResilientUpdateOutputFormat.java

示例5: checkOutputSpecs

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  DBConfiguration dbConf = new DBConfiguration(conf);

  // Sanity check all the configuration values we need.
  if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
    throw new IOException("Database connection URL is not set.");
  } else if (null == dbConf.getOutputTableName()) {
    throw new IOException("Table name is not set for export");
  } else if (null == dbConf.getOutputFieldNames()
      && 0 == dbConf.getOutputFieldCount()) {
    throw new IOException(
        "Output field names are null and zero output field count set.");
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:19,代码来源:ExportOutputFormat.java

示例6: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
  FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);

  List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
  int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
  if (mappers == 0 && snapshotFiles.size() > 0) {
    mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
    mappers = Math.min(mappers, snapshotFiles.size());
    conf.setInt(CONF_NUM_SPLITS, mappers);
    conf.setInt(MR_NUM_MAPS, mappers);
  }

  List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
  List<InputSplit> splits = new ArrayList(groups.size());
  for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
    splits.add(new ExportSnapshotInputSplit(files));
  }
  return splits;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:23,代码来源:ExportSnapshot.java

示例7: getOutputCompressorClass

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
 * Get the {@link CompressionCodec} for compressing the job outputs.
 * @param job the {@link Job} to look in
 * @param defaultValue the {@link CompressionCodec} to return if not set
 * @return the {@link CompressionCodec} to be used to compress the 
 *         job outputs
 * @throws IllegalArgumentException if the class was specified, but not found
 */
public static Class<? extends CompressionCodec> 
getOutputCompressorClass(JobContext job, 
                       Class<? extends CompressionCodec> defaultValue) {
  Class<? extends CompressionCodec> codecClass = defaultValue;
  Configuration conf = job.getConfiguration();
  String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
  if (name != null) {
    try {
      codecClass = 
      	conf.getClassByName(name).asSubclass(CompressionCodec.class);
    } catch (ClassNotFoundException e) {
      throw new IllegalArgumentException("Compression codec " + name + 
                                         " was not found.", e);
    }
  }
  return codecClass;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:FileOutputFormat.java

示例8: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
 * implementation shared with deprecated HLogInputFormat
 */
List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
    throws IOException, InterruptedException {
  Configuration conf = context.getConfiguration();
  Path inputDir = new Path(conf.get("mapreduce.input.fileinputformat.inputdir"));

  long startTime = conf.getLong(startKey, Long.MIN_VALUE);
  long endTime = conf.getLong(endKey, Long.MAX_VALUE);

  FileSystem fs = inputDir.getFileSystem(conf);
  List<FileStatus> files = getFiles(fs, inputDir, startTime, endTime);

  List<InputSplit> splits = new ArrayList<InputSplit>(files.size());
  for (FileStatus file : files) {
    splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
  }
  return splits;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:21,代码来源:WALInputFormat.java

示例9: commitJob

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/** @inheritDoc */
@Override
public void commitJob(JobContext jobContext) throws IOException {
  Configuration conf = jobContext.getConfiguration();

  super.commitJob(jobContext);

  try {
    taskAttemptContext.setStatus("Commit Successful");
  } finally {
    cleanup(conf);
  }
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:14,代码来源:CopyCommitter.java

示例10: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
 * Implementation of InputFormat::getSplits(). Returns a list of InputSplits, such that the number of bytes to be
 * copied for all the splits are approximately equal.
 *
 * @param context JobContext for the job.
 * @return The list of uniformly-distributed input-splits.
 * @throws IOException: On failure.
 * @throws InterruptedException
 */
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
  Configuration configuration = context.getConfiguration();
  int numSplits = ConfigurationUtil.getInt(configuration, MRJobConfig.NUM_MAPS);

  if (numSplits == 0) {
    return new ArrayList<>();
  }

  return getSplits(configuration, numSplits,
      ConfigurationUtil.getLong(configuration, S3MapReduceCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:22,代码来源:UniformSizeInputFormat.java

示例11: checkOutputSpecs

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public void checkOutputSpecs(JobContext job
                            ) throws InvalidJobConfException, IOException {
  // Ensure that the output directory is set
  Path outDir = getOutputPath(job);
  if (outDir == null) {
    throw new InvalidJobConfException("Output directory not set in JobConf.");
  }

  final Configuration jobConf = job.getConfiguration();

  // get delegation token for outDir's file system
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outDir }, jobConf);

  final FileSystem fs = outDir.getFileSystem(jobConf);

  if (fs.exists(outDir)) {
    // existing output dir is considered empty iff its only content is the
    // partition file.
    //
    final FileStatus[] outDirKids = fs.listStatus(outDir);
    boolean empty = false;
    if (outDirKids != null && outDirKids.length == 1) {
      final FileStatus st = outDirKids[0];
      final String fname = st.getPath().getName();
      empty =
        !st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
    }
    if (TeraSort.getUseSimplePartitioner(job) || !empty) {
      throw new FileAlreadyExistsException("Output directory " + outDir
          + " already exists");
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:36,代码来源:TeraOutputFormat.java

示例12: checkOutputSpecs

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public void checkOutputSpecs(JobContext context) throws IOException,
    InterruptedException {

  super.checkOutputSpecs(context);

  Configuration conf = context.getConfiguration();

  // This code is now running on a Datanode in the Hadoop cluster, so we
  // need to enable debug logging in this JVM...
  OraOopUtilities.enableDebugLoggingIfRequired(conf);
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:13,代码来源:OraOopOutputFormatBase.java

示例13: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/** @return a list containing a single split of summation */
@Override
public List<InputSplit> getSplits(JobContext context) {
  //read sigma from conf
  final Configuration conf = context.getConfiguration();
  final Summation sigma = SummationWritable.read(DistSum.class, conf); 
  
  //create splits
  final List<InputSplit> splits = new ArrayList<InputSplit>(1);
  splits.add(new SummationSplit(sigma));
  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:13,代码来源:DistSum.java

示例14: getSplits

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
  List<InputSplit> splits = new ArrayList<InputSplit>();
  Configuration conf = job.getConfiguration();
  String dsName
      = conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME);
  LOG.info("Datasets to transfer from: " + dsName);
  List<String> datasets = retrieveDatasets(dsName, conf);
  if (datasets.isEmpty()) {
    throw new IOException ("No sequential datasets retrieved from " + dsName);
  } else {
    int count = datasets.size();
    int chunks = Math.min(count, ConfigurationHelper.getJobNumMaps(job));
    for (int i = 0; i < chunks; i++) {
      splits.add(new MainframeDatasetInputSplit());
    }

    int j = 0;
    while(j < count) {
      for (InputSplit sp : splits) {
        if (j == count) {
          break;
        }
        ((MainframeDatasetInputSplit)sp).addDataset(datasets.get(j));
        j++;
      }
    }
  }
  return splits;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:31,代码来源:MainframeDatasetInputFormat.java

示例15: getInputPathFilter

import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
 * Get a PathFilter instance of the filter set for the input paths.
 *
 * @return the PathFilter instance set for the job, NULL if none has been set.
 */
public static PathFilter getInputPathFilter(JobContext context) {
  Configuration conf = context.getConfiguration();
  Class<?> filterClass = conf.getClass(PATHFILTER_CLASS, null,
      PathFilter.class);
  return (filterClass != null) ?
      (PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:13,代码来源:FileInputFormat.java


注:本文中的org.apache.hadoop.mapreduce.JobContext.getConfiguration方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。