本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.addCacheFile方法的典型用法代码示例。如果您正苦于以下问题:Java Job.addCacheFile方法的具体用法?Java Job.addCacheFile怎么用?Java Job.addCacheFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.addCacheFile方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: testDetermineTimestamps
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test
public void testDetermineTimestamps() throws IOException {
Job job = Job.getInstance(conf);
job.addCacheFile(firstCacheFile.toUri());
job.addCacheFile(secondCacheFile.toUri());
Configuration jobConf = job.getConfiguration();
Map<URI, FileStatus> statCache = new HashMap<URI, FileStatus>();
ClientDistributedCacheManager.determineTimestamps(jobConf, statCache);
FileStatus firstStatus = statCache.get(firstCacheFile.toUri());
FileStatus secondStatus = statCache.get(secondCacheFile.toUri());
Assert.assertNotNull(firstStatus);
Assert.assertNotNull(secondStatus);
Assert.assertEquals(2, statCache.size());
String expected = firstStatus.getModificationTime() + ","
+ secondStatus.getModificationTime();
Assert.assertEquals(expected, jobConf.get(MRJobConfig.CACHE_FILE_TIMESTAMPS));
}
示例2: testWithConf
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private void testWithConf(Configuration conf) throws IOException,
InterruptedException, ClassNotFoundException, URISyntaxException {
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second =
makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third =
makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth =
makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(conf);
job.setMapperClass(DistributedCacheCheckerMapper.class);
job.setReducerClass(DistributedCacheCheckerReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
job.submit();
assertTrue(job.waitForCompletion(false));
}
示例3: run
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
LOG.info("starting");
Job job = Job.getInstance(getConf());
Path inputDir = new Path(args[0]);
Path outputDir = new Path(args[1]);
boolean useSimplePartitioner = getUseSimplePartitioner(job);
TeraInputFormat.setInputPaths(job, inputDir);
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraSort");
job.setJarByClass(TeraSort.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TeraInputFormat.class);
job.setOutputFormatClass(TeraOutputFormat.class);
if (useSimplePartitioner) {
job.setPartitionerClass(SimplePartitioner.class);
} else {
long start = System.currentTimeMillis();
Path partitionFile = new Path(outputDir,
TeraInputFormat.PARTITION_FILENAME);
URI partitionUri = new URI(partitionFile.toString() +
"#" + TeraInputFormat.PARTITION_FILENAME);
try {
TeraInputFormat.writePartitionFile(job, partitionFile);
} catch (Throwable e) {
LOG.error(e.getMessage());
return -1;
}
job.addCacheFile(partitionUri);
long end = System.currentTimeMillis();
System.out.println("Spent " + (end - start) + "ms computing partitions.");
job.setPartitionerClass(TotalOrderPartitioner.class);
}
job.getConfiguration().setInt("dfs.replication", getOutputReplication(job));
TeraOutputFormat.setFinalSync(job, true);
int ret = job.waitForCompletion(true) ? 0 : 1;
LOG.info("done");
return ret;
}
示例4: addSSLFilesToDistCache
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Add SSL files to distributed cache. Trust store, key store and ssl config xml
*
* @param job - Job handle
* @param sslConfigPath - ssl Configuration file specified through options
* @throws IOException - If any
*/
private void addSSLFilesToDistCache(Job job,
Path sslConfigPath) throws IOException {
Configuration configuration = job.getConfiguration();
FileSystem localFS = FileSystem.getLocal(configuration);
Configuration sslConf = new Configuration(false);
sslConf.addResource(sslConfigPath);
Path localStorePath = getLocalStorePath(sslConf,
DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION);
job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
configuration.set(DistCpConstants.CONF_LABEL_SSL_TRUST_STORE_LOCATION,
localStorePath.getName());
localStorePath = getLocalStorePath(sslConf,
DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION);
job.addCacheFile(localStorePath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
configuration.set(DistCpConstants.CONF_LABEL_SSL_KEY_STORE_LOCATION,
localStorePath.getName());
job.addCacheFile(sslConfigPath.makeQualified(localFS.getUri(),
localFS.getWorkingDirectory()).toUri());
}
示例5: _testDistributedCache
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void _testDistributedCache(String jobJarPath) throws Exception {
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second =
makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third =
makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth =
makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(mrCluster.getConfig());
// Set the job jar to a new "dummy" jar so we can check that its extracted
// properly
job.setJar(jobJarPath);
// Because the job jar is a "dummy" jar, we need to include the jar with
// DistributedCacheChecker or it won't be able to find it
Path distributedCacheCheckerJar = new Path(
JarFinder.getJar(DistributedCacheChecker.class));
job.addFileToClassPath(distributedCacheCheckerJar.makeQualified(
localFs.getUri(), distributedCacheCheckerJar.getParent()));
job.setMapperClass(DistributedCacheChecker.class);
job.setOutputFormatClass(NullOutputFormat.class);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(
new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClassPath(second);
// The AppMaster jar itself
job.addFileToClassPath(
APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent()));
job.addArchiveToClassPath(third);
job.addCacheArchive(fourth.toUri());
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
Assert.assertTrue(job.waitForCompletion(false));
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}