当前位置: 首页>>代码示例>>Java>>正文


Java Job.getConfiguration方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.getConfiguration方法的典型用法代码示例。如果您正苦于以下问题:Java Job.getConfiguration方法的具体用法?Java Job.getConfiguration怎么用?Java Job.getConfiguration使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.mapreduce.Job的用法示例。


在下文中一共展示了Job.getConfiguration方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configurePartitioner

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
 * <code>splitPoints</code>. Cleans up the partitions file after job exists.
 */
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
    throws IOException {
  Configuration conf = job.getConfiguration();
  // create the partitions file
  FileSystem fs = FileSystem.get(conf);
  String hbaseTmpFsDir =
      conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
        HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
  Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
  fs.makeQualified(partitionsPath);
  writePartitions(conf, partitionsPath, splitPoints);
  fs.deleteOnExit(partitionsPath);

  // configure job to use it
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:22,代码来源:HFileOutputFormat2.java

示例2: testNewCounterB

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test
public void testNewCounterB() throws Exception {
  final Job job = createJob();
  final Configuration conf = job.getConfiguration();
  conf.setInt(JobContext.IO_SORT_FACTOR, 2);
  createWordsFile(inFiles[3], conf);
  removeWordsFile(inFiles[4], conf);
  long inputSize = 0;
  inputSize += getFileSize(inFiles[0]);
  inputSize += getFileSize(inFiles[1]);
  inputSize += getFileSize(inFiles[2]);
  inputSize += getFileSize(inFiles[3]);
  org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
      job, IN_DIR);
  org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
      job, new Path(OUT_DIR, "outputN1"));
  assertTrue(job.waitForCompletion(true));
  final Counters c1 = Counters.downgrade(job.getCounters());
  validateCounters(c1, 98304, 20480, 81920);
  validateFileCounters(c1, inputSize, 0, 0, 0);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:TestJobCounters.java

示例3: readSplit

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static List<Text> readSplit(KeyValueTextInputFormat format, 
    InputSplit split, Job job) throws IOException, InterruptedException {
  List<Text> result = new ArrayList<Text>();
  Configuration conf = job.getConfiguration();
  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(conf);
  RecordReader<Text, Text> reader = format.createRecordReader(split, 
    MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
  MapContext<Text, Text, Text, Text> mcontext = 
    new MapContextImpl<Text, Text, Text, Text>(conf, 
    context.getTaskAttemptID(), reader, null, null,
    MapReduceTestUtil.createDummyReporter(), 
    split);
  reader.initialize(split, mcontext);
  while (reader.nextKeyValue()) {
    result.add(new Text(reader.getCurrentValue()));
  }
  reader.close();
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:21,代码来源:TestMRKeyValueTextInputFormat.java

示例4: testEmptyOutput

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void testEmptyOutput() throws Exception {
  Job job = Job.getInstance();
  FileOutputFormat.setOutputPath(job, outDir);
  Configuration conf = job.getConfiguration();
  conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
  JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
  TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
  FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);

  // setup
  committer.setupJob(jContext);
  committer.setupTask(tContext);

  // Do not write any output

  // do commit
  committer.commitTask(tContext);
  committer.commitJob(jContext);
  
  FileUtil.fullyDelete(new File(outDir.toString()));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:TestMRCJCFileOutputCommitter.java

示例5: readSplit

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static List<Text> readSplit(InputFormat<LongWritable,Text> format,
  InputSplit split, Job job) throws IOException, InterruptedException {
  List<Text> result = new ArrayList<Text>();
  Configuration conf = job.getConfiguration();
  TaskAttemptContext context = MapReduceTestUtil.
    createDummyMapTaskAttemptContext(conf);
  RecordReader<LongWritable, Text> reader = format.createRecordReader(split,
    MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
  MapContext<LongWritable,Text,LongWritable,Text> mcontext =
    new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf,
    context.getTaskAttemptID(), reader, null, null,
    MapReduceTestUtil.createDummyReporter(),
    split);
  reader.initialize(split, mcontext);
  while (reader.nextKeyValue()) {
    result.add(new Text(reader.getCurrentValue()));
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:20,代码来源:TestCombineTextInputFormat.java

示例6: propagateOptionsToJob

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
protected void propagateOptionsToJob(Job job) {
  super.propagateOptionsToJob(job);
  Configuration conf = job.getConfiguration();

  // This is export job where re-trying failed mapper mostly don't make sense. By
  // default we will force MR to run only one attempt per mapper. User or connector
  // developer can override this behavior by setting SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS:
  //
  // * Positive number - we will allow specified number of attempts
  // * Negative number - we will default to Hadoop's default number of attempts
  //
  // This is important for most connectors as they are directly committing data to
  // final table and hence re-running one mapper will lead to a misleading errors
  // of inserting duplicate rows.
  int sqoopMaxAttempts = conf.getInt(SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS, 1);
  if (sqoopMaxAttempts > 1) {
    conf.setInt(HADOOP_MAP_TASK_MAX_ATTEMTPS, sqoopMaxAttempts);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:21,代码来源:ExportJobBase.java

示例7: initScans

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
	Configuration conf = job.getConfiguration();
	// 获取运行时间: yyyy-MM-dd
	String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
	long startDate = TimeUtil.parseString2Long(date);
	long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

	Scan scan = new Scan();
	// 定义hbase扫描的开始rowkey和结束rowkey
	scan.setStartRow(Bytes.toBytes("" + startDate));
	scan.setStopRow(Bytes.toBytes("" + endDate));

	FilterList filterList = new FilterList();
	// 定义mapper中需要获取的列名
	String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
			EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
			EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
	};
	filterList.addFilter(this.getColumnFilter(columns));

	scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
	scan.setFilter(filterList);
	return Lists.newArrayList(scan);
}
 
开发者ID:liuhaozzu,项目名称:big_data,代码行数:33,代码来源:ActiveUserRunner.java

示例8: initScans

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * 初始化scan集合
 * 
 * @param job
 * @return
 */
private List<Scan> initScans(Job job) {
	// 时间戳+....
	Configuration conf = job.getConfiguration();
	// 获取运行时间: yyyy-MM-dd
	String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
	long startDate = TimeUtil.parseString2Long(date);
	long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;

	Scan scan = new Scan();
	// 定义hbase扫描的开始rowkey和结束rowkey
	scan.setStartRow(Bytes.toBytes(Long.toString(startDate)));
	scan.setStopRow(Bytes.toBytes(Long.toString(endDate)));

	FilterList filterList = new FilterList();
	// 过滤数据,只分析launch事件
	filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(EventLogConstants.EVENT_LOGS_FAMILY_NAME),
			Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME), CompareOp.EQUAL,
			Bytes.toBytes(EventEnum.LAUNCH.alias)));
	// 定义mapper中需要获取的列名
	String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME,
			EventLogConstants.LOG_COLUMN_NAME_UUID, EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME,
			EventLogConstants.LOG_COLUMN_NAME_PLATFORM, EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME,
			EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION };
	// scan.addColumn(family, qualifier)
	filterList.addFilter(this.getColumnFilter(columns));

	scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
	scan.setFilter(filterList);
	return Lists.newArrayList(scan);
}
 
开发者ID:liuhaozzu,项目名称:big_data,代码行数:37,代码来源:NewInstallUserRunner.java

示例9: completeImport

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Perform the loading of Hfiles.
 */
@Override
protected void completeImport(Job job) throws IOException, ImportException {
  super.completeImport(job);

  FileSystem fileSystem = FileSystem.get(job.getConfiguration());

  // Make the bulk load files source directory accessible to the world
  // so that the hbase user can deal with it
  Path bulkLoadDir = getContext().getDestination();
  setPermission(fileSystem, fileSystem.getFileStatus(bulkLoadDir),
    FsPermission.createImmutable((short) 00777));

  HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());

  // Load generated HFiles into table
  try {
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
      job.getConfiguration());
    loader.doBulkLoad(bulkLoadDir, hTable);
  }
  catch (Exception e) {
    String errorMessage = String.format("Unrecoverable error while " +
      "performing the bulk load of files in [%s]",
      bulkLoadDir.toString());
    throw new ImportException(errorMessage, e);
  }
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:31,代码来源:HBaseBulkImportJob.java

示例10: testNegativeRecordLength

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Test with record length set to a negative value
 */
@Test (timeout=5000)
public void testNegativeRecordLength() throws Exception {
  localFs.delete(workDir, true);
  Path file = new Path(workDir, new String("testFormat.txt"));
  createFile(file, null, 10, 10);
  // Set the fixed length record length config property 
  Job job = Job.getInstance(defaultConf);
  FixedLengthInputFormat format = new FixedLengthInputFormat();
  format.setRecordLength(job.getConfiguration(), -10);
  FileInputFormat.setInputPaths(job, workDir);
  List<InputSplit> splits = format.getSplits(job);
  boolean exceptionThrown = false;
  for (InputSplit split : splits) {
    try {
      TaskAttemptContext context = MapReduceTestUtil.
          createDummyMapTaskAttemptContext(job.getConfiguration());
      RecordReader<LongWritable, BytesWritable> reader = 
          format.createRecordReader(split, context);
      MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
          mcontext =
          new MapContextImpl<LongWritable, BytesWritable, LongWritable,
          BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
          reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
      reader.initialize(split, mcontext);
    } catch(IOException ioe) {
      exceptionThrown = true;
      LOG.info("Exception message:" + ioe.getMessage());
    }
  }
  assertTrue("Exception for negative record length:", exceptionThrown);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:TestFixedLengthInputFormat.java

示例11: addInputPath

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Add a {@link Path} with a custom {@link InputFormat} to the list of
 * inputs for the map-reduce job.
 * 
 * @param job The {@link Job}
 * @param path {@link Path} to be added to the list of inputs for the job
 * @param inputFormatClass {@link InputFormat} class to use for this path
 */
@SuppressWarnings("unchecked")
public static void addInputPath(Job job, Path path,
    Class<? extends InputFormat> inputFormatClass) {
  String inputFormatMapping = path.toString() + ";"
     + inputFormatClass.getName();
  Configuration conf = job.getConfiguration();
  String inputFormats = conf.get(DIR_FORMATS);
  conf.set(DIR_FORMATS,
     inputFormats == null ? inputFormatMapping : inputFormats + ","
         + inputFormatMapping);

  job.setInputFormatClass(DelegatingInputFormat.class);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:22,代码来源:MultipleInputs.java

示例12: getSample

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * From each split sampled, take the first numSamples / numSplits records.
 */
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job) 
    throws IOException, InterruptedException {
  List<InputSplit> splits = inf.getSplits(job);
  ArrayList<K> samples = new ArrayList<K>(numSamples);
  int splitsToSample = Math.min(maxSplitsSampled, splits.size());
  int samplesPerSplit = numSamples / splitsToSample;
  long records = 0;
  for (int i = 0; i < splitsToSample; ++i) {
    TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
        job.getConfiguration(), new TaskAttemptID());
    RecordReader<K,V> reader = inf.createRecordReader(
        splits.get(i), samplingContext);
    reader.initialize(splits.get(i), samplingContext);
    while (reader.nextKeyValue()) {
      samples.add(ReflectionUtils.copy(job.getConfiguration(),
                                       reader.getCurrentKey(), null));
      ++records;
      if ((i+1) * samplesPerSplit <= records) {
        break;
      }
    }
    reader.close();
  }
  return (K[])samples.toArray();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:InputSampler.java

示例13: addInputPath

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Add a {@link Path} to the list of inputs for the map-reduce job.
 * 
 * @param job The {@link Job} to modify
 * @param path {@link Path} to be added to the list of inputs for 
 *            the map-reduce job.
 */
public static void addInputPath(Job job, 
                                Path path) throws IOException {
  Configuration conf = job.getConfiguration();
  path = path.getFileSystem(conf).makeQualified(path);
  String dirStr = StringUtils.escapeString(path.toString());
  String dirs = conf.get(INPUT_DIR);
  conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:16,代码来源:FileInputFormat.java

示例14: testFormat

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws Exception {
  Job job = Job.getInstance(new Configuration(defaultConf));

  Random random = new Random();
  long seed = random.nextLong();
  LOG.info("seed = " + seed);
  random.setSeed(seed);

  localFs.delete(workDir, true);
  FileInputFormat.setInputPaths(job, workDir);

  final int length = 10000;
  final int numFiles = 10;

  // create files with various lengths
  createFiles(length, numFiles, random);

  // create a combined split for the files
  CombineTextInputFormat format = new CombineTextInputFormat();
  for (int i = 0; i < 3; i++) {
    int numSplits = random.nextInt(length/20) + 1;
    LOG.info("splitting: requesting = " + numSplits);
    List<InputSplit> splits = format.getSplits(job);
    LOG.info("splitting: got =        " + splits.size());

    // we should have a single split as the length is comfortably smaller than
    // the block size
    assertEquals("We got more than one splits!", 1, splits.size());
    InputSplit split = splits.get(0);
    assertEquals("It should be CombineFileSplit",
      CombineFileSplit.class, split.getClass());

    // check the split
    BitSet bits = new BitSet(length);
    LOG.debug("split= " + split);
    TaskAttemptContext context = MapReduceTestUtil.
      createDummyMapTaskAttemptContext(job.getConfiguration());
    RecordReader<LongWritable, Text> reader =
      format.createRecordReader(split, context);
    assertEquals("reader class is CombineFileRecordReader.",
      CombineFileRecordReader.class, reader.getClass());
    MapContext<LongWritable,Text,LongWritable,Text> mcontext =
      new MapContextImpl<LongWritable,Text,LongWritable,Text>(job.getConfiguration(),
      context.getTaskAttemptID(), reader, null, null,
      MapReduceTestUtil.createDummyReporter(), split);
    reader.initialize(split, mcontext);

    try {
      int count = 0;
      while (reader.nextKeyValue()) {
        LongWritable key = reader.getCurrentKey();
        assertNotNull("Key should not be null.", key);
        Text value = reader.getCurrentValue();
        final int v = Integer.parseInt(value.toString());
        LOG.debug("read " + v);
        assertFalse("Key in multiple partitions.", bits.get(v));
        bits.set(v);
        count++;
      }
      LOG.debug("split=" + split + " count=" + count);
    } finally {
      reader.close();
    }
    assertEquals("Some keys in no partition.", length, bits.cardinality());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:68,代码来源:TestCombineTextInputFormat.java

示例15: configureIncrementalLoad

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor,
    RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException,
    UnsupportedEncodingException {
  Configuration conf = job.getConfiguration();
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(KeyValue.class);
  job.setOutputFormatClass(cls);

  // Based on the configured map output class, set the correct reducer to properly
  // sort the incoming values.
  // TODO it would be nice to pick one or the other of these formats.
  if (KeyValue.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(KeyValueSortReducer.class);
  } else if (Put.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(PutSortReducer.class);
  } else if (Text.class.equals(job.getMapOutputValueClass())) {
    job.setReducerClass(TextSortReducer.class);
  } else {
    LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
  }

  conf.setStrings("io.serializations", conf.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName(),
      KeyValueSerialization.class.getName());

  // Use table's region boundaries for TOP split points.
  LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
  List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
  LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
      "to match current region count");
  job.setNumReduceTasks(startKeys.size());

  configurePartitioner(job, startKeys);
  // Set compression algorithms based on column families
  configureCompression(conf, tableDescriptor);
  configureBloomType(tableDescriptor, conf);
  configureBlockSize(tableDescriptor, conf);
  configureDataBlockEncoding(tableDescriptor, conf);

  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.initCredentials(job);
  LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:44,代码来源:HFileOutputFormat2.java


注:本文中的org.apache.hadoop.mapreduce.Job.getConfiguration方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。