本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.createSymlink方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.createSymlink方法的具体用法?Java DistributedCache.createSymlink怎么用?Java DistributedCache.createSymlink使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.filecache.DistributedCache
的用法示例。
在下文中一共展示了DistributedCache.createSymlink方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
LOG.info("starting");
JobConf job = (JobConf) getConf();
Path inputDir = new Path(args[0]);
inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
URI partitionUri = new URI(partitionFile.toString() +
"#" + TeraInputFormat.PARTITION_FILENAME);
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSort");
job.setJarByClass(TeraSort.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormat(TeraInputFormat.class);
job.setOutputFormat(TeraOutputFormat.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
TeraInputFormat.writePartitionFile(job, partitionFile);
DistributedCache.addCacheFile(partitionUri, job);
DistributedCache.createSymlink(job);
job.setInt("dfs.replication", 1);
TeraOutputFormat.setFinalSync(job, true);
JobClient.runJob(job);
LOG.info("done");
return 0;
}
示例2: addLocalFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Add local non-jar files the job depends on to DistributedCache.
*/
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
DistributedCache.createSymlink(this.conf);
for (String jobFile : SPLITTER.split(jobFileList)) {
Path srcJobFile = new Path(jobFile);
// DistributedCache requires absolute path, so we need to use makeQualified.
Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
// Copy the file from local file system to HDFS
this.fs.copyFromLocalFile(srcJobFile, destJobFile);
// Create a URI that is in the form path#symlink
URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
// Finally add the file to DistributedCache with a symlink named after the file name
DistributedCache.addCacheFile(destFileUri, this.conf);
}
}
示例3: shareMapFile
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
FileSystem fs = FileSystem.get(job);
if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {
DistributedCache.createSymlink(job);
FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
LongWritable key = new LongWritable();
Text value = new Text();
for (int i=0; i<fstats.length; i++) {
Path curMap = fstats[i].getPath();
MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
if (mreader.next(key, value)) {
int rid = (int) (key.get() % slots);
String uriWithLink =
curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
DistributedCache.addCacheFile(new URI(uriWithLink), job);
} else {
System.exit(-1);
}
mreader.close();
}
}
job.setInt(symbol, slots);
}
示例4: shareZipfCore
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareZipfCore(String fname, DataOptions options, JobConf job) throws URISyntaxException {
DistributedCache.createSymlink(job);
Path zipfPath = new Path(options.getWorkPath(), fname);
String uriWithLink = zipfPath.toString() + "#" + fname;
DistributedCache.addCacheFile(new URI(uriWithLink), job);
}
示例5: task3
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Extracts CF for each found anchor.
*
* @param inputPath
* @param mapPath
* @param outputPath
* @throws IOException
*/
private void task3(String inputPath, String mapPath, String outputPath) throws IOException {
LOG.info("Extracting anchor text (phase 3)...");
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
LOG.info(" - mapping: " + mapPath);
JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
conf.setJobName(String.format("ExtractWikipediaAnchorText:phase3[input: %s, output: %s]", inputPath, outputPath));
conf.setNumReduceTasks(1);
String location = "map.dat";
try {
DistributedCache.addCacheFile(new URI(mapPath + "/part-00000/data" + "#" + location), conf);
//DistributedCache.addCacheFile(new URI(mapPath + "/singleentitymap.data" + "#" + location), conf);
DistributedCache.createSymlink(conf);
} catch (URISyntaxException e) {
e.printStackTrace();
}
FileInputFormat.addInputPath(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(MapFileOutputFormat.class);
// conf.setOutputFormat(TextOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(MyMapper3.class);
conf.setCombinerClass(MyReducer3.class);
conf.setReducerClass(MyReducer3.class);
JobClient.runJob(conf);
}
示例6: testCacheFilesLocalization
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Run the job with two distributed cache files and verify
* whether job is succeeded or not.
* @throws Exception
*/
@Test
public void testCacheFilesLocalization() throws Exception {
conf = wovenClient.getDaemonConf();
SleepJob job = new SleepJob();
job.setConf(conf);
JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
DistributedCache.createSymlink(jobConf);
DistributedCache.addCacheFile(cacheFileURI1, jobConf);
DistributedCache.addCacheFile(cacheFileURI2, jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
JobID jobId = runJob.getID();
Assert.assertTrue("Job has not been started for 1 min.",
jtClient.isJobStarted(jobId));
TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
Assert.assertTrue("Cache File1 has not been localize",
checkLocalization(taskInfos,cacheFile1));
Assert.assertTrue("Cache File2 has not been localize",
checkLocalization(taskInfos,cacheFile2));
JobInfo jInfo = wovenClient.getJobInfo(jobId);
LOG.info("Waiting till the job is completed...");
while (!jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(100);
jInfo = wovenClient.getJobInfo(jobId);
}
Assert.assertEquals("Job has not been succeeded",
jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED);
}
示例7: testDeleteCacheFileInDFSAfterLocalized
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Run the job with distributed cache files and remove one cache
* file from the DFS when it is localized.verify whether the job
* is failed or not.
* @throws Exception
*/
@Test
public void testDeleteCacheFileInDFSAfterLocalized() throws Exception {
conf = wovenClient.getDaemonConf();
SleepJob job = new SleepJob();
job.setConf(conf);
JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
cacheFileURI3 = createCacheFile(tmpFolderPath, cacheFile3);
DistributedCache.createSymlink(jobConf);
DistributedCache.addCacheFile(cacheFileURI3, jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
JobID jobId = runJob.getID();
Assert.assertTrue("Job has not been started for 1 min.",
jtClient.isJobStarted(jobId));
TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
boolean iscacheFileLocalized = checkLocalization(taskInfos,cacheFile3);
Assert.assertTrue("CacheFile has not been localized",
iscacheFileLocalized);
deleteCacheFile(new Path(tmpFolderPath, cacheFile3));
JobInfo jInfo = wovenClient.getJobInfo(jobId);
LOG.info("Waiting till the job is completed...");
while (!jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(100);
jInfo = wovenClient.getJobInfo(jobId);
}
Assert.assertEquals("Job has not been failed",
jInfo.getStatus().getRunState(), JobStatus.FAILED);
}
示例8: addHDFSFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Add non-jar files already on HDFS that the job depends on to DistributedCache.
*/
private void addHDFSFiles(String jobFileList) throws IOException {
DistributedCache.createSymlink(this.conf);
for (String jobFile : SPLITTER.split(jobFileList)) {
Path srcJobFile = new Path(jobFile);
// Create a URI that is in the form path#symlink
URI srcFileUri = URI.create(srcJobFile.toUri().getPath() + "#" + srcJobFile.getName());
LOG.info(String.format("Adding %s to DistributedCache", srcFileUri));
// Finally add the file to DistributedCache with a symlink named after the file name
DistributedCache.addCacheFile(srcFileUri, this.conf);
}
}
示例9: run
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
final CmdLineParser parser = new CmdLineParser(this);
try {
parser.parseArgument(args);
} catch (final CmdLineException e) {
System.err.println(e.getMessage());
System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
parser.printUsage(System.err);
return -1;
}
final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
final Path outputPath = new Path(output);
if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
outputPath.getFileSystem(this.getConf()).delete(outputPath, true);
final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
job.setJarByClass(this.getClass());
job.setMapperClass(PqPcaVladMapper.class);
job.setNumReduceTasks(0);
MultipleOutputs.addNamedOutput(job, "pcavlad", SequenceFileOutputFormat.class, Text.class, BytesWritable.class);
DistributedCache.createSymlink(job.getConfiguration());
DistributedCache.addCacheFile(new URI(indexerData + "#vlad-data.bin"), job.getConfiguration());
SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
job.waitForCompletion(true);
return 0;
}
示例10: shareArray
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareArray(String symbol, Path fdict, int words, JobConf job) throws URISyntaxException {
DistributedCache.createSymlink(job);
String uridict = fdict.toUri().toString() + "#" + symbol;
DistributedCache.addCacheFile(new URI(uridict), job);
job.setInt(symbol, words);
}
示例11: task2
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
*
* Maps from (srcID, (targetID, anchor) to (targetID, (anchor, count)).
*
* @param inputPath
* @param outputPath
* @throws IOException
*/
private void task2(String inputPath, String outputPath, String redirPath) throws IOException {
LOG.info("Extracting anchor text (phase 2)...");
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
Random r = new Random( );
//String tmpOutput = "tmp-" + this.getClass().getCanonicalName() + "-" + r.nextInt(10000);
//LOG.info( "intermediate folder for merge " + tmpOutput );
JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
conf.setJobName(String.format("ExtractWikipediaAnchorText:phase2[input: %s, output: %s]", inputPath, outputPath));
// Gathers everything together for convenience; feasible for Wikipedia.
conf.setNumReduceTasks(1);
try {
DistributedCache.addCacheFile(new URI(redirPath + "/part-00000" + "#" + "redirs.dat"), conf);
DistributedCache.createSymlink(conf);
} catch (URISyntaxException e) {
e.printStackTrace();
}
FileInputFormat.addInputPath(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
//FileOutputFormat.setOutputPath(conf, new Path(tmpOutput));
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(MapFileOutputFormat.class);
// conf.setOutputFormat(TextOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(Text.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(HMapSIW.class);
conf.setMapperClass(MyMapper2.class);
conf.setReducerClass(MyReducer2.class);
// Delete the output directory if it exists already.
FileSystem.get(conf).delete(new Path(outputPath), true);
JobClient.runJob(conf);
// Clean up intermediate data.
FileSystem.get(conf).delete(new Path(inputPath), true);
/*
//merge
String finalO = outputPath+"/part-00000/data";
FileSystem.get(conf).mkdirs( new Path( outputPath + "part-00000") );
getMergeInHdfs( tmpOutput, finalO, conf );
FileSystem.get(conf).delete(new Path(tmpOutput), true);
*/
}
示例12: runDictionaryJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
boolean jobOK;
Job job = null;
BufferedWriter bufferedWriter;
// if output path exists...
if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
System.out.println("Select other path or use option -dd to overwrite");
System.exit(-1);
}
}
// Sample the SequenceInputFormat to do TotalSort and create final output
job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
System.out.println("samples = " + this.conf.getDictionarySamplesPath());
System.out.println("output = " + this.conf.getDictionaryOutputPath());
FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
// Identity Mapper
// job.setMapperClass(Mapper.class);
job.setCombinerClass(DictionaryCombiner.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setReducerClass(DictionaryReducer.class);
job.setNumReduceTasks(this.conf.getDictionaryReducers());
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();
bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));
bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");
bufferedWriter.close();
return jobOK;
}
示例13: runTriplesJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
Job job = null;
boolean jobOK;
// if triples output path exists...
if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
} else { // ... and option not provided, fail
System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
System.out.println("Select other path or use option -dt to overwrite");
System.exit(-1);
}
}
job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");
job.setJarByClass(HDTBuilderDriver.class);
FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());
job.setInputFormatClass(SequenceFileInputFormat.class);
LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);
job.setSortComparatorClass(TripleSPOComparator.class);
job.setGroupingComparatorClass(TripleSPOComparator.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
job.setOutputKeyClass(TripleSPOWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setNumReduceTasks(this.conf.getTriplesReducers());
System.out.println("Sampling started");
InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
DistributedCache.createSymlink(job.getConfiguration());
System.out.println("Sampling finished");
SequenceFileOutputFormat.setCompressOutput(job, true);
SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
jobOK = job.waitForCompletion(true);
return jobOK;
}
示例14: setupPipesJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static void setupPipesJob(JobConf conf) throws IOException {
// default map output types to Text
if (!getIsJavaMapper(conf)) {
conf.setMapRunnerClass(PipesMapRunner.class);
// Save the user's partitioner and hook in our's.
setJavaPartitioner(conf, conf.getPartitionerClass());
conf.setPartitionerClass(PipesPartitioner.class);
}
if (!getIsJavaReducer(conf)) {
conf.setReducerClass(PipesReducer.class);
if (!getIsJavaRecordWriter(conf)) {
conf.setOutputFormat(NullOutputFormat.class);
}
}
String textClassname = Text.class.getName();
setIfUnset(conf, "mapred.mapoutput.key.class", textClassname);
setIfUnset(conf, "mapred.mapoutput.value.class", textClassname);
setIfUnset(conf, "mapred.output.key.class", textClassname);
setIfUnset(conf, "mapred.output.value.class", textClassname);
// Use PipesNonJavaInputFormat if necessary to handle progress reporting
// from C++ RecordReaders ...
if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
conf.setClass("mapred.pipes.user.inputformat",
conf.getInputFormat().getClass(), InputFormat.class);
conf.setInputFormat(PipesNonJavaInputFormat.class);
}
String exec = getExecutable(conf);
if (exec == null) {
throw new IllegalArgumentException("No application program defined.");
}
// add default debug script only when executable is expressed as
// <path>#<executable>
if (exec.contains("#")) {
DistributedCache.createSymlink(conf);
// set default gdb commands for map and reduce task
String defScript = "$HADOOP_HOME/src/c++/pipes/debug/pipes-default-script";
setIfUnset(conf,"mapred.map.task.debug.script",defScript);
setIfUnset(conf,"mapred.reduce.task.debug.script",defScript);
}
URI[] fileCache = DistributedCache.getCacheFiles(conf);
if (fileCache == null) {
fileCache = new URI[1];
} else {
URI[] tmp = new URI[fileCache.length+1];
System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
fileCache = tmp;
}
try {
fileCache[0] = new URI(exec);
} catch (URISyntaxException e) {
IOException ie = new IOException("Problem parsing execable URI " + exec);
ie.initCause(e);
throw ie;
}
DistributedCache.setCacheFiles(fileCache, conf);
}
示例15: launchFailMapAndDebug
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Launches failed map task and debugs the failed task
* @param conf configuration for the mapred job
* @param inDir input path
* @param outDir output path
* @param debugDir debug directory where script is present
* @param debugCommand The command to execute script
* @param input Input text
* @return the output of debug script
* @throws IOException
*/
public String launchFailMapAndDebug(JobConf conf,
Path inDir,
Path outDir,
Path debugDir,
String debugScript,
String input)
throws IOException {
// set up the input file system and write input text.
FileSystem inFs = inDir.getFileSystem(conf);
FileSystem outFs = outDir.getFileSystem(conf);
outFs.delete(outDir, true);
if (!inFs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
{
// write input into input file
DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
file.writeBytes(input);
file.close();
}
// configure the mapred Job for failing map task.
conf.setJobName("failmap");
conf.setMapperClass(MapClass.class);
conf.setReducerClass(IdentityReducer.class);
conf.setNumMapTasks(1);
conf.setNumReduceTasks(0);
conf.setMapDebugScript(debugScript);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
"/tmp")).toString().replace(' ', '+');
conf.set("test.build.data", TEST_ROOT_DIR);
// copy debug script to cache from local file system.
FileSystem debugFs = debugDir.getFileSystem(conf);
Path scriptPath = new Path(debugDir,"testscript.txt");
Path cachePath = new Path("/cacheDir");
if (!debugFs.mkdirs(cachePath)) {
throw new IOException("Mkdirs failed to create " + cachePath.toString());
}
debugFs.copyFromLocalFile(scriptPath,cachePath);
URI uri = debugFs.getUri().resolve(cachePath+"/testscript.txt#testscript");
DistributedCache.createSymlink(conf);
DistributedCache.addCacheFile(uri, conf);
RunningJob job =null;
// run the job. It will fail with IOException.
try {
job = new JobClient(conf).submitJob(conf);
} catch (IOException e) {
LOG.info("Running Job failed", e);
}
JobID jobId = job.getID();
// construct the task id of first map task of failmap
TaskAttemptID taskId = new TaskAttemptID(new TaskID(jobId,true, 0), 0);
// wait for the job to finish.
while (!job.isComplete()) ;
// return the output of debugout log.
return readTaskLog(TaskLog.LogName.DEBUGOUT,taskId, false);
}