当前位置: 首页>>代码示例>>Java>>正文


Java JobConf类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobConf的典型用法代码示例。如果您正苦于以下问题:Java JobConf类的具体用法?Java JobConf怎么用?Java JobConf使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


JobConf类属于org.apache.hadoop.mapred包,在下文中一共展示了JobConf类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setup

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Before
@SuppressWarnings("unchecked") // mocked generics
public void setup() {
  LOG.info(">>>> " + name.getMethodName());
  job = new JobConf();
  job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false);
  jobWithRetry = new JobConf();
  jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true);
  id = TaskAttemptID.forName("attempt_0_1_r_1_1");
  ss = mock(ShuffleSchedulerImpl.class);
  mm = mock(MergeManagerImpl.class);
  r = mock(Reporter.class);
  metrics = mock(ShuffleClientMetrics.class);
  except = mock(ExceptionReporter.class);
  key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
  connection = mock(HttpURLConnection.class);

  allErrs = mock(Counters.Counter.class);
  when(r.getCounter(anyString(), anyString())).thenReturn(allErrs);

  ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
  maps.add(map1ID);
  maps.add(map2ID);
  when(ss.getMapsForHost(host)).thenReturn(maps);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:TestFetcher.java

示例2: testInputFormat

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:23,代码来源:TestTableInputFormat.java

示例3: shouldCreateAndRunSubmittableJob

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateAndRunSubmittableJob() throws Exception {
  RowCounter rCounter = new RowCounter();
  rCounter.setConf(HBaseConfiguration.create());
  String[] args = new String[] { "\temp", "tableA", "column1", "column2",
      "column3" };
  JobConf jobConfig = rCounter.createSubmittableJob(args);

  assertNotNull(jobConfig);
  assertEquals(0, jobConfig.getNumReduceTasks());
  assertEquals("rowcounter", jobConfig.getJobName());
  assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
  assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
  assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
      .join("column1", "column2", "column3"));
  assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:TestRowCounter.java

示例4: configure

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/** Mapper configuration.
 * Extracts source and destination file system, as well as
 * top-level paths on source and destination directories.
 * Gets the named file systems, to be used later in map.
 */
public void configure(JobConf job)
{
  destPath = new Path(job.get(DST_DIR_LABEL, "/"));
  try {
    destFileSys = destPath.getFileSystem(job);
  } catch (IOException ex) {
    throw new RuntimeException("Unable to get the named file system.", ex);
  }
  sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
  buffer = new byte[sizeBuf];
  ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
  preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
  if (preserve_status) {
    preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
  }
  update = job.getBoolean(Options.UPDATE.propertyname, false);
  overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
  skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
  this.job = job;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:DistCpV1.java

示例5: shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
 * Check what the given number of reduce tasks for the given job configuration
 * does not exceed the number of regions for the given table.
 */
@Test
public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
    throws IOException {
  Assert.assertNotNull(presidentsTable);
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.setScannerCaching(jobConf, 100);
  assertEquals(1, jobConf.getNumReduceTasks());
  assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));

  jobConf.setNumReduceTasks(10);
  TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
  TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
  assertEquals(1, jobConf.getNumReduceTasks());
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:22,代码来源:TestTableMapReduceUtil.java

示例6: writePasswordFile

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
 * Writes the user's password to a tmp file with 0600 permissions.
 * @return the filename used.
 */
public static String writePasswordFile(Configuration conf)
    throws IOException {
  // Create the temp file to hold the user's password.
  String tmpDir = conf.get(
      ConfigurationConstants.PROP_JOB_LOCAL_DIRECTORY, "/tmp/");
  File tempFile = File.createTempFile("mysql-cnf", ".cnf", new File(tmpDir));

  // Make the password file only private readable.
  DirectImportUtils.setFilePermissions(tempFile, "0600");

  // If we're here, the password file is believed to be ours alone.  The
  // inability to set chmod 0600 inside Java is troublesome. We have to
  // trust that the external 'chmod' program in the path does the right
  // thing, and returns the correct exit status. But given our inability to
  // re-read the permissions associated with a file, we'll have to make do
  // with this.
  String password = DBConfiguration.getPassword((JobConf) conf);
  BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
      new FileOutputStream(tempFile)));
  w.write("[client]\n");
  w.write("password=" + password + "\n");
  w.close();

  return tempFile.toString();
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:30,代码来源:MySQLUtils.java

示例7: initTableReduceJob

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf.
 *
 * @param table  The output table.
 * @param reducer  The reducer class to use.
 * @param job  The current job configuration to adjust.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 */
public static void initTableReduceJob(String table,
  Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
  boolean addDependencyJars) throws IOException {
  job.setOutputFormat(TableOutputFormat.class);
  job.setReducerClass(reducer);
  job.set(TableOutputFormat.OUTPUT_TABLE, table);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(Put.class);
  job.setStrings("io.serializations", job.get("io.serializations"),
      MutationSerialization.class.getName(), ResultSerialization.class.getName());
  if (partitioner == HRegionPartitioner.class) {
    job.setPartitionerClass(HRegionPartitioner.class);
    int regions =
      MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
    if (job.getNumReduceTasks() > regions) {
      job.setNumReduceTasks(regions);
    }
  } else if (partitioner != null) {
    job.setPartitionerClass(partitioner);
  }
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  initCredentials(job);
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:39,代码来源:TableMapReduceUtil.java

示例8: testMapredSplitSampler

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
 * Verify SplitSampler contract in mapred.lib.InputSampler, which is added
 * back for binary compatibility of M/R 1.x
 */
@Test (timeout = 30000)
@SuppressWarnings("unchecked") // IntWritable comparator not typesafe
public void testMapredSplitSampler() throws Exception {
  final int TOT_SPLITS = 15;
  final int NUM_SPLITS = 5;
  final int STEP_SAMPLE = 5;
  final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
  org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable,NullWritable>
      sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler
          <IntWritable,NullWritable>(NUM_SAMPLES, NUM_SPLITS);
  int inits[] = new int[TOT_SPLITS];
  for (int i = 0; i < TOT_SPLITS; ++i) {
    inits[i] = i * STEP_SAMPLE;
  }
  Object[] samples = sampler.getSample(
      new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits),
      new JobConf());
  assertEquals(NUM_SAMPLES, samples.length);
  Arrays.sort(samples, new IntWritable.Comparator());
  for (int i = 0; i < NUM_SAMPLES; ++i) {
    // mapred.lib.InputSampler.SplitSampler has a sampling step
    assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE),
        ((IntWritable)samples[i]).get());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:TestInputSampler.java

示例9: runJob

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:33,代码来源:DataJoinJob.java

示例10: LocalFetcher

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public LocalFetcher(JobConf job, TaskAttemptID reduceId,
               ShuffleSchedulerImpl<K, V> scheduler,
               MergeManager<K,V> merger,
               Reporter reporter, ShuffleClientMetrics metrics,
               ExceptionReporter exceptionReporter,
               SecretKey shuffleKey,
               Map<TaskAttemptID, MapOutputFile> localMapFiles) {
  super(job, reduceId, scheduler, merger, reporter, metrics,
      exceptionReporter, shuffleKey);

  this.job = job;
  this.localMapFiles = localMapFiles;

  setName("localfetcher#" + id);
  setDaemon(true);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:LocalFetcher.java

示例11: StreamXmlRecordReader

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
                             JobConf job, FileSystem fs) throws IOException {
  super(in, split, reporter, job, fs);

  beginMark_ = checkJobGet(CONF_NS + "begin");
  endMark_ = checkJobGet(CONF_NS + "end");

  maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
  lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
  synched_ = false;

  slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
  if (slowMatch_) {
    beginPat_ = makePatternCDataOrMark(beginMark_);
    endPat_ = makePatternCDataOrMark(endMark_);
  }
  init();
}
 
开发者ID:naver,项目名称:hadoop,代码行数:19,代码来源:StreamXmlRecordReader.java

示例12: getSplits

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:67,代码来源:GenerateDistCacheData.java

示例13: configure

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public void configure(JobConf job) {
  try {
    HTable exampleTable = new HTable(HBaseConfiguration.create(job),
      Bytes.toBytes("exampleDeprecatedTable"));
    // mandatory
    setHTable(exampleTable);
    byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
      Bytes.toBytes("columnB") };
    // mandatory
    setInputColumns(inputColumns);
    Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
    // optional
    setRowFilter(exampleFilter);
  } catch (IOException exception) {
    throw new RuntimeException("Failed to configure for job.", exception);
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:19,代码来源:TestTableInputFormat.java

示例14: setConf

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public void setConf(Configuration conf) {
  if (conf instanceof JobConf) {
    this.conf = (JobConf) conf;
  } else {
    this.conf = new JobConf(conf);
  }
  
  //Initialize the specification for *comparision*
  String sortColumns = this.conf.get(SORT_COLUMNS, null);
  if (sortColumns != null) {
    sortSpec = sortColumns.split(",");
  }
  
  //Column-separator
  columnSeparator = this.conf.get(COLUMN_SEPARATOR, "");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:Logalyzer.java

示例15: obtainAuthTokenForJob

import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public void obtainAuthTokenForJob(JobConf job)
    throws IOException, InterruptedException {
  try {
    Class<?> c = Class.forName(
        "org.apache.hadoop.hbase.security.token.TokenUtil");
    Methods.call(c, null, "obtainTokenForJob",
        new Class[]{JobConf.class, UserGroupInformation.class},
        new Object[]{job, ugi});
  } catch (ClassNotFoundException cnfe) {
    throw new RuntimeException("Failure loading TokenUtil class, "
        +"is secure RPC available?", cnfe);
  } catch (IOException ioe) {
    throw ioe;
  } catch (InterruptedException ie) {
    throw ie;
  } catch (RuntimeException re) {
    throw re;
  } catch (Exception e) {
    throw new UndeclaredThrowableException(e,
        "Unexpected error calling TokenUtil.obtainAndCacheToken()");
  }
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:24,代码来源:User.java


注:本文中的org.apache.hadoop.mapred.JobConf类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。