Java Job.addCacheFile方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.Job.addCacheFile方法的典型用法代碼示例。如果您正苦於以下問題：Java Job.addCacheFile方法的具體用法？Java Job.addCacheFile怎麽用？Java Job.addCacheFile使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapreduce.Job的用法示例。

在下文中一共展示了Job.addCacheFile方法的5個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: testDetermineTimestamps

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
@Test
public void testDetermineTimestamps() throws IOException {
  Job job = Job.getInstance(conf);
  job.addCacheFile(firstCacheFile.toUri());
  job.addCacheFile(secondCacheFile.toUri());
  Configuration jobConf = job.getConfiguration();
  
  Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
  ClientDistributedCacheManager.determineTimestamps(jobConf, statCache);
  
  FileStatus firstStatus = statCache.get(firstCacheFile.toUri());
  FileStatus secondStatus = statCache.get(secondCacheFile.toUri());
  
  Assert.assertNotNull(firstStatus);
  Assert.assertNotNull(secondStatus);
  Assert.assertEquals(2, statCache.size());
  String expected = firstStatus.getModificationTime() + ","
      + secondStatus.getModificationTime();
  Assert.assertEquals(expected, jobConf.get(MRJobConfig.CACHE_FILE_TIMESTAMPS));
}

開發者ID:naver，項目名稱:hadoop，代碼行數:21，代碼來源:TestClientDistributedCacheManager.java

示例2: testWithConf

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
private void testWithConf(Configuration conf) throws IOException,
    InterruptedException, ClassNotFoundException, URISyntaxException {
  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);


  Job job = Job.getInstance(conf);
  job.setMapperClass(DistributedCacheCheckerMapper.class);
  job.setReducerClass(DistributedCacheCheckerReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
    new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  assertTrue(job.waitForCompletion(false));
}

開發者ID:naver，項目名稱:hadoop，代碼行數:30，代碼來源:TestMRWithDistributedCache.java

示例3: run

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public int run(String[] args) throws Exception {
  LOG.info("starting");
  Job job = Job.getInstance(getConf());
  Path inputDir = new Path(args[0]);
  Path outputDir = new Path(args[1]);
  boolean useSimplePartitioner = getUseSimplePartitioner(job);
  TeraInputFormat.setInputPaths(job, inputDir);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(TeraInputFormat.class);
  job.setOutputFormatClass(TeraOutputFormat.class);
  if (useSimplePartitioner) {
    job.setPartitionerClass(SimplePartitioner.class);
  } else {
    long start = System.currentTimeMillis();
    Path partitionFile = new Path(outputDir, 
                                  TeraInputFormat.PARTITION_FILENAME);
    URI partitionUri = new URI(partitionFile.toString() +
                               "#" + TeraInputFormat.PARTITION_FILENAME);
    try {
      TeraInputFormat.writePartitionFile(job, partitionFile);
    } catch (Throwable e) {
      LOG.error(e.getMessage());
      return -1;
    }
    job.addCacheFile(partitionUri);  
    long end = System.currentTimeMillis();
    System.out.println("Spent " + (end - start) + "ms computing partitions.");
    job.setPartitionerClass(TotalOrderPartitioner.class);
  }
  
  job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
  TeraOutputFormat.setFinalSync(job, true);
  int ret = job.waitForCompletion(true) ? 0 : 1;
  LOG.info("done");
  return ret;
}

開發者ID:naver，項目名稱:hadoop，代碼行數:41，代碼來源:TeraSort.java

示例4: addSSLFilesToDistCache

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Add SSL files to distributed cache. Trust store, key store and ssl config xml
 *
 * @param job - Job handle
 * @param sslConfigPath - ssl Configuration file specified through options
 * @throws IOException - If any
 */
private void addSSLFilesToDistCache(Job job,
                                    Path sslConfigPath) throws IOException {
  Configuration configuration = job.getConfiguration();
  FileSystem localFS = FileSystem.getLocal(configuration);

  Configuration sslConf = new Configuration(false);
  sslConf.addResource(sslConfigPath);

  Path localStorePath = getLocalStorePath(sslConf,
                          DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
  job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());
  configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
                    localStorePath.getName());

  localStorePath = getLocalStorePath(sslConf,
                           DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
  job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());
  configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
                                    localStorePath.getName());

  job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
                                    localFS.getWorkingDirectory()).toUri());

}

開發者ID:naver，項目名稱:hadoop，代碼行數:34，代碼來源:DistCp.java

示例5: _testDistributedCache

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public void _testDistributedCache(String jobJarPath) throws Exception {
  if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
    LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
         + " not found. Not running test.");
    return;
  }

  // Create a temporary file of length 1.
  Path first = createTempFile("distributed.first", "x");
  // Create two jars with a single file inside them.
  Path second =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
  Path third =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
  Path fourth =
      makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);

  Job job = Job.getInstance(mrCluster.getConfig());
  
  // Set the job jar to a new "dummy" jar so we can check that its extracted 
  // properly
  job.setJar(jobJarPath);
  // Because the job jar is a "dummy" jar, we need to include the jar with
  // DistributedCacheChecker or it won't be able to find it
  Path distributedCacheCheckerJar = new Path(
          JarFinder.getJar(DistributedCacheChecker.class));
  job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
          localFs.getUri(), distributedCacheCheckerJar.getParent()));
  
  job.setMapperClass(DistributedCacheChecker.class);
  job.setOutputFormatClass(NullOutputFormat.class);

  FileInputFormat.setInputPaths(job, first);
  // Creates the Job Configuration
  job.addCacheFile(
      new URI(first.toUri().toString() + "#distributed.first.symlink"));
  job.addFileToClassPath(second);
  // The AppMaster jar itself
  job.addFileToClassPath(
          APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent())); 
  job.addArchiveToClassPath(third);
  job.addCacheArchive(fourth.toUri());
  job.setMaxMapAttempts(1); // speed up failures

  job.submit();
  String trackingUrl = job.getTrackingURL();
  String jobId = job.getJobID().toString();
  Assert.assertTrue(job.waitForCompletion(false));
  Assert.assertTrue("Tracking URL was " + trackingUrl +
                    " but didn't Match Job ID " + jobId ,
        trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}

開發者ID:naver，項目名稱:hadoop，代碼行數:53，代碼來源:TestMRJobs.java

注：本文中的org.apache.hadoop.mapreduce.Job.addCacheFile方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。