Java JobClient类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.JobClient类的典型用法代码示例。如果您正苦于以下问题：Java JobClient类的具体用法？Java JobClient怎么用？Java JobClient使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

JobClient类属于org.apache.hadoop.mapred包，在下文中一共展示了JobClient类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: runTests

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
/**
 * Run the test
 * 
 * @throws IOException on error
 */
public static void runTests() throws IOException {
  config.setLong("io.bytes.per.checksum", bytesPerChecksum);
  
  JobConf job = new JobConf(config, NNBench.class);

  job.setJobName("NNBench-" + operation);
  FileInputFormat.setInputPaths(job, new Path(baseDir, CONTROL_DIR_NAME));
  job.setInputFormat(SequenceFileInputFormat.class);
  
  // Explicitly set number of max map attempts to 1.
  job.setMaxMapAttempts(1);
  
  // Explicitly turn off speculative execution
  job.setSpeculativeExecution(false);

  job.setMapperClass(NNBenchMapper.class);
  job.setReducerClass(NNBenchReducer.class);

  FileOutputFormat.setOutputPath(job, new Path(baseDir, OUTPUT_DIR_NAME));
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks((int) numberOfReduces);
  JobClient.runJob(job);
}

开发者ID:naver，项目名称:hadoop，代码行数:30，代码来源:NNBench.java

示例2: testInputFormat

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
  final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
  job.setInputFormat(clazz);
  job.setOutputFormat(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);
  LOG.debug("submitting job.");
  final RunningJob run = JobClient.runJob(job);
  assertTrue("job failed!", run.isSuccessful());
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
  assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
  assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
  assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:23，代码来源:TestTableInputFormat.java

示例3: runIOTest

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
private void runIOTest(
        Class<? extends Mapper<Text, LongWritable, Text, Text>> mapperClass, 
        Path outputDir) throws IOException {
  JobConf job = new JobConf(config, TestDFSIO.class);

  FileInputFormat.setInputPaths(job, getControlDir(config));
  job.setInputFormat(SequenceFileInputFormat.class);

  job.setMapperClass(mapperClass);
  job.setReducerClass(AccumulatingReducer.class);

  FileOutputFormat.setOutputPath(job, outputDir);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setNumReduceTasks(1);
  JobClient.runJob(job);
}

开发者ID:naver，项目名称:hadoop，代码行数:18，代码来源:TestDFSIO.java

示例4: joinAs

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
private static void joinAs(String jointype,
    Class<? extends SimpleCheckerBase> c) throws Exception {
  final int srcs = 4;
  Configuration conf = new Configuration();
  JobConf job = new JobConf(conf, c);
  Path base = cluster.getFileSystem().makeQualified(new Path("/"+jointype));
  Path[] src = writeSimpleSrc(base, conf, srcs);
  job.set("mapreduce.join.expr", CompositeInputFormat.compose(jointype,
      SequenceFileInputFormat.class, src));
  job.setInt("testdatamerge.sources", srcs);
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(c);
  job.setReducerClass(c);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(IntWritable.class);
  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

开发者ID:naver，项目名称:hadoop，代码行数:21，代码来源:TestDatamerge.java

示例5: testEmptyJoin

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
public void testEmptyJoin() throws Exception {
  JobConf job = new JobConf();
  Path base = cluster.getFileSystem().makeQualified(new Path("/empty"));
  Path[] src = { new Path(base,"i0"), new Path("i1"), new Path("i2") };
  job.set("mapreduce.join.expr", CompositeInputFormat.compose("outer",
      Fake_IF.class, src));
  job.setInputFormat(CompositeInputFormat.class);
  FileOutputFormat.setOutputPath(job, new Path(base, "out"));

  job.setMapperClass(IdentityMapper.class);
  job.setReducerClass(IdentityReducer.class);
  job.setOutputKeyClass(IncomparableKey.class);
  job.setOutputValueClass(NullWritable.class);

  JobClient.runJob(job);
  base.getFileSystem(job).delete(base, true);
}

开发者ID:naver，项目名称:hadoop，代码行数:18，代码来源:TestDatamerge.java

示例6: configure

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
public void configure(String keySpec, int expect) throws Exception {
  Path testdir = new Path(TEST_DIR.getAbsolutePath());
  Path inDir = new Path(testdir, "in");
  Path outDir = new Path(testdir, "out");
  FileSystem fs = getFileSystem();
  fs.delete(testdir, true);
  conf.setInputFormat(TextInputFormat.class);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);

  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(1);

  conf.setOutputFormat(TextOutputFormat.class);
  conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
  conf.setKeyFieldComparatorOptions(keySpec);
  conf.setKeyFieldPartitionerOptions("-k1.1,1.1");
  conf.set(JobContext.MAP_OUTPUT_KEY_FIELD_SEPERATOR, " ");
  conf.setMapperClass(InverseMapper.class);
  conf.setReducerClass(IdentityReducer.class);
  if (!fs.mkdirs(testdir)) {
    throw new IOException("Mkdirs failed to create " + testdir.toString());
  }
  if (!fs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  // set up input data in 2 files 
  Path inFile = new Path(inDir, "part0");
  FileOutputStream fos = new FileOutputStream(inFile.toString());
  fos.write((line1 + "\n").getBytes());
  fos.write((line2 + "\n").getBytes());
  fos.close();
  JobClient jc = new JobClient(conf);
  RunningJob r_job = jc.submitJob(conf);
  while (!r_job.isComplete()) {
    Thread.sleep(1000);
  }
  
  if (!r_job.isSuccessful()) {
    fail("Oops! The job broke due to an unexpected error");
  }
  Path[] outputFiles = FileUtil.stat2Paths(
      getFileSystem().listStatus(outDir,
      new Utils.OutputFileUtils.OutputFilesFilter()));
  if (outputFiles.length > 0) {
    InputStream is = getFileSystem().open(outputFiles[0]);
    BufferedReader reader = new BufferedReader(new InputStreamReader(is));
    String line = reader.readLine();
    //make sure we get what we expect as the first line, and also
    //that we have two lines
    if (expect == 1) {
      assertTrue(line.startsWith(line1));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line2));
    }
    line = reader.readLine();
    if (expect == 1) {
      assertTrue(line.startsWith(line2));
    } else if (expect == 2) {
      assertTrue(line.startsWith(line1));
    }
    reader.close();
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:67，代码来源:TestKeyFieldBasedComparator.java

示例7: confRandom

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
/**
 * When no input dir is specified, generate random data.
 */
protected static void confRandom(Job job)
    throws IOException {
  // from RandomWriter
  job.setInputFormatClass(RandomInputFormat.class);
  job.setMapperClass(RandomMapOutput.class);

  Configuration conf = job.getConfiguration();
  final ClusterStatus cluster = new JobClient(conf).getClusterStatus();
  int numMapsPerHost = conf.getInt(RandomTextWriter.MAPS_PER_HOST, 10);
  long numBytesToWritePerMap =
    conf.getLong(RandomTextWriter.BYTES_PER_MAP, 1*1024*1024*1024);
  if (numBytesToWritePerMap == 0) {
    throw new IOException(
        "Cannot have " + RandomTextWriter.BYTES_PER_MAP + " set to 0");
  }
  long totalBytesToWrite = conf.getLong(RandomTextWriter.TOTAL_BYTES,
       numMapsPerHost * numBytesToWritePerMap * cluster.getTaskTrackers());
  int numMaps = (int)(totalBytesToWrite / numBytesToWritePerMap);
  if (numMaps == 0 && totalBytesToWrite > 0) {
    numMaps = 1;
    conf.setLong(RandomTextWriter.BYTES_PER_MAP, totalBytesToWrite);
  }
  conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
}

开发者ID:naver，项目名称:hadoop，代码行数:28，代码来源:GenericMRLoadGenerator.java

示例8: mrRun

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
private void mrRun() throws Exception {
  FileSystem fs = FileSystem.get(getJobConf());
  Path inputDir = new Path("input");
  fs.mkdirs(inputDir);
  Writer writer = new OutputStreamWriter(fs.create(new Path(inputDir, "data.txt")));
  writer.write("hello");
  writer.close();

  Path outputDir = new Path("output", "output");

  JobConf jobConf = new JobConf(getJobConf());
  jobConf.setInt("mapred.map.tasks", 1);
  jobConf.setInt("mapred.map.max.attempts", 1);
  jobConf.setInt("mapred.reduce.max.attempts", 1);
  jobConf.set("mapred.input.dir", inputDir.toString());
  jobConf.set("mapred.output.dir", outputDir.toString());

  JobClient jobClient = new JobClient(jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  runJob.waitForCompletion();
  assertTrue(runJob.isComplete());
  assertTrue(runJob.isSuccessful());
}

开发者ID:naver，项目名称:hadoop，代码行数:24，代码来源:TestMiniMRProxyUser.java

示例9: setReplication

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
/**
 * Increase the replication factor of _distcp_src_files to
 * sqrt(min(maxMapsOnCluster, numMaps)). This is to reduce the chance of
 * failing of distcp because of "not having a replication of _distcp_src_files
 * available for reading for some maps".
 */
private static void setReplication(Configuration conf, JobConf jobConf,
                       Path srcfilelist, int numMaps) throws IOException {
  int numMaxMaps = new JobClient(jobConf).getClusterStatus().getMaxMapTasks();
  short replication = (short) Math.ceil(
                              Math.sqrt(Math.min(numMaxMaps, numMaps)));
  FileSystem fs = srcfilelist.getFileSystem(conf);
  FileStatus srcStatus = fs.getFileStatus(srcfilelist);

  if (srcStatus.getReplication() < replication) {
    if (!fs.setReplication(srcfilelist, replication)) {
      throw new IOException("Unable to increase the replication of file " +
                            srcfilelist);
    }
  }
}

开发者ID:naver，项目名称:hadoop，代码行数:22，代码来源:DistCpV1.java

示例10: getSplits

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobClient client =
    new JobClient(new JobConf(jobCtxt.getConfiguration()));
  ClusterStatus stat = client.getClusterStatus(true);
  final long toGen =
    jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
  if (toGen < 0) {
    throw new IOException("Invalid/missing generation bytes: " + toGen);
  }
  final int nTrackers = stat.getTaskTrackers();
  final long bytesPerTracker = toGen / nTrackers;
  final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
  final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
  final Matcher m = trackerPattern.matcher("");
  for (String tracker : stat.getActiveTrackerNames()) {
    m.reset(tracker);
    if (!m.find()) {
      System.err.println("Skipping node: " + tracker);
      continue;
    }
    final String name = m.group(1);
    splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
  }
  return splits;
}

开发者ID:naver，项目名称:hadoop，代码行数:27，代码来源:GenerateData.java

示例11: getSplits

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
  final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
  final JobClient client = new JobClient(jobConf);
  ClusterStatus stat = client.getClusterStatus(true);
  int numTrackers = stat.getTaskTrackers();
  final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);

  // Total size of distributed cache files to be generated
  final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
  // Get the path of the special file
  String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
  if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
    throw new RuntimeException("Invalid metadata: #files (" + fileCount
        + "), total_size (" + totalSize + "), filelisturi ("
        + distCacheFileList + ")");
  }

  Path sequenceFile = new Path(distCacheFileList);
  FileSystem fs = sequenceFile.getFileSystem(jobConf);
  FileStatus srcst = fs.getFileStatus(sequenceFile);
  // Consider the number of TTs * mapSlotsPerTracker as number of mappers.
  int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
  int numSplits = numTrackers * numMapSlotsPerTracker;

  List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
  LongWritable key = new LongWritable();
  BytesWritable value = new BytesWritable();

  // Average size of data to be generated by each map task
  final long targetSize = Math.max(totalSize / numSplits,
                            DistributedCacheEmulator.AVG_BYTES_PER_MAP);
  long splitStartPosition = 0L;
  long splitEndPosition = 0L;
  long acc = 0L;
  long bytesRemaining = srcst.getLen();
  SequenceFile.Reader reader = null;
  try {
    reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
    while (reader.next(key, value)) {

      // If adding this file would put this split past the target size,
      // cut the last split and put this file in the next split.
      if (acc + key.get() > targetSize && acc != 0) {
        long splitSize = splitEndPosition - splitStartPosition;
        splits.add(new FileSplit(
            sequenceFile, splitStartPosition, splitSize, (String[])null));
        bytesRemaining -= splitSize;
        splitStartPosition = splitEndPosition;
        acc = 0L;
      }
      acc += key.get();
      splitEndPosition = reader.getPosition();
    }
  } finally {
    if (reader != null) {
      reader.close();
    }
  }
  if (bytesRemaining != 0) {
    splits.add(new FileSplit(
        sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
  }

  return splits;
}

开发者ID:naver，项目名称:hadoop，代码行数:67，代码来源:GenerateDistCacheData.java

示例12: runDataGenJob

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) 
throws IOException, ClassNotFoundException, InterruptedException {
  JobClient client = new JobClient(conf);
  
  // get the local job runner
  conf.setInt(MRJobConfig.NUM_MAPS, 1);
  
  Job job = Job.getInstance(conf);
  
  CompressionEmulationUtil.configure(job);
  job.setInputFormatClass(CustomInputFormat.class);
  
  // set the output path
  FileOutputFormat.setOutputPath(job, tempDir);
  
  // submit and wait for completion
  job.submit();
  int ret = job.waitForCompletion(true) ? 0 : 1;

  assertEquals("Job Failed", 0, ret);
}

开发者ID:naver，项目名称:hadoop，代码行数:25，代码来源:TestCompressionEmulationUtils.java

示例13: runJob

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
/**
 * Submit/run a map/reduce job.
 * 
 * @param job
 * @return true for success
 * @throws IOException
 */
public static boolean runJob(JobConf job) throws IOException {
  JobClient jc = new JobClient(job);
  boolean sucess = true;
  RunningJob running = null;
  try {
    running = jc.submitJob(job);
    JobID jobId = running.getID();
    System.out.println("Job " + jobId + " is submitted");
    while (!running.isComplete()) {
      System.out.println("Job " + jobId + " is still running.");
      try {
        Thread.sleep(60000);
      } catch (InterruptedException e) {
      }
      running = jc.getJob(jobId);
    }
    sucess = running.isSuccessful();
  } finally {
    if (!sucess && (running != null)) {
      running.killJob();
    }
    jc.close();
  }
  return sucess;
}

开发者ID:naver，项目名称:hadoop，代码行数:33，代码来源:DataJoinJob.java

示例14: shoudBeValidMapReduceEvaluation

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceEvaluation() throws Exception {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(1);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);
    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:21，代码来源:TestTableMapReduceUtil.java

示例15: shoudBeValidMapReduceWithPartitionerEvaluation

import org.apache.hadoop.mapred.JobClient; //导入依赖的package包/类
@Test
@SuppressWarnings("deprecation")
public void shoudBeValidMapReduceWithPartitionerEvaluation()
    throws IOException {
  Configuration cfg = UTIL.getConfiguration();
  JobConf jobConf = new JobConf(cfg);
  try {
    jobConf.setJobName("process row task");
    jobConf.setNumReduceTasks(2);
    TableMapReduceUtil.initTableMapJob(TABLE_NAME, new String(COLUMN_FAMILY),
        ClassificatorMapper.class, ImmutableBytesWritable.class, Put.class,
        jobConf);

    TableMapReduceUtil.initTableReduceJob(TABLE_NAME,
        ClassificatorRowReduce.class, jobConf, HRegionPartitioner.class);
    RunningJob job = JobClient.runJob(jobConf);
    assertTrue(job.isSuccessful());
  } finally {
    if (jobConf != null)
      FileUtil.fullyDelete(new File(jobConf.get("hadoop.tmp.dir")));
  }
}

开发者ID:fengchen8086，项目名称:ditb，代码行数:23，代码来源:TestTableMapReduceUtil.java

注：本文中的org.apache.hadoop.mapred.JobClient类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。