Java Job.addCacheFile方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.addCacheFile方法的典型用法代码示例。如果您正苦于以下问题：Java Job.addCacheFile方法的具体用法？Java Job.addCacheFile怎么用？Java Job.addCacheFile使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job的用法示例。

在下文中一共展示了Job.addCacheFile方法的5个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: testDetermineTimestamps

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test
public void testDetermineTimestamps() throws IOException {
  Job job = Job.getInstance(conf);
  job.addCacheFile(firstCacheFile.toUri());
  job.addCacheFile(secondCacheFile.toUri());
  Configuration jobConf = job.getConfiguration();
  
  Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
  ClientDistributedCacheManager.determineTimestamps(jobConf, statCache);
  
  FileStatus firstStatus = statCache.get(firstCacheFile.toUri());
  FileStatus secondStatus = statCache.get(secondCacheFile.toUri());
  
  Assert.assertNotNull(firstStatus);
  Assert.assertNotNull(secondStatus);
  Assert.assertEquals(2, statCache.size());
  String expected = firstStatus.getModificationTime() + ","
      + secondStatus.getModificationTime();
  Assert.assertEquals(expected, jobConf.get(MRJobConfig.CACHE_FILE_TIMESTAMPS));
}

开发者ID:naver，项目名称:hadoop，代码行数:21，代码来源:TestClientDistributedCacheManager.java

示例2: testWithConf

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testWithConf(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException, URISyntaxException {
  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);


  Job job = Job.getInstance(conf);
  job.setMapperClass(DistributedCacheCheckerMapper.class);
  job.setReducerClass(DistributedCacheCheckerReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
    new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  assertTrue(job.waitForCompletion(false));
}

开发者ID:naver，项目名称:hadoop，代码行数:30，代码来源:TestMRWithDistributedCache.java

示例3: run

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  LOG.info("starting");
  Job job = Job.getInstance(getConf());
  Path inputDir = new Path(args[0]);
  Path outputDir = new Path(args[1]);
  boolean useSimplePartitioner = getUseSimplePartitioner(job);
  TeraInputFormat.setInputPaths(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(TeraInputFormat.class);
  job.setOutputFormatClass(TeraOutputFormat.class);
  if (useSimplePartitioner) {
    job.setPartitionerClass(SimplePartitioner.class);
  } else {
    long start = System.currentTimeMillis();
    Path partitionFile = new Path(outputDir, 
                                  TeraInputFormat.PARTITION_FILENAME);
    URI partitionUri = new URI(partitionFile.toString() +
                               "#" + TeraInputFormat.PARTITION_FILENAME);
    try {
      TeraInputFormat.writePartitionFile(job, partitionFile);
    } catch (Throwable e) {
      LOG.error(e.getMessage());
      return -1;
    }
    job.addCacheFile(partitionUri);  
    long end = System.currentTimeMillis();
    System.out.println("Spent " + (end - start) + "ms computing partitions.");
    job.setPartitionerClass(TotalOrderPartitioner.class);
  }
  
  job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
  TeraOutputFormat.setFinalSync(job, true);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  LOG.info("done");
  return ret;
}

开发者ID:naver，项目名称:hadoop，代码行数:41，代码来源:TeraSort.java

示例4: addSSLFilesToDistCache

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
 * Add SSL files to distributed cache. Trust store, key store and ssl config xml
 *
 * @param job - Job handle
 * @param sslConfigPath - ssl Configuration file specified through options
 * @throws IOException - If any
 */
private void addSSLFilesToDistCache(Job job,
                                    Path sslConfigPath) throws IOException {
  Configuration configuration = job.getConfiguration();
  FileSystem localFS = FileSystem.getLocal(configuration);

  Configuration sslConf = new Configuration(false);
  sslConf.addResource(sslConfigPath);

  Path localStorePath = getLocalStorePath(sslConf,
                          DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
  job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());
  configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
                    localStorePath.getName());

  localStorePath = getLocalStorePath(sslConf,
                           DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
  job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());
  configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
                                    localStorePath.getName());

  job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());

}

开发者ID:naver，项目名称:hadoop，代码行数:34，代码来源:DistCp.java

示例5: _testDistributedCache

import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}

开发者ID:naver，项目名称:hadoop，代码行数:53，代码来源:TestMRJobs.java

注：本文中的org.apache.hadoop.mapreduce.Job.addCacheFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。