当前位置: 首页>>代码示例>>Java>>正文


Java DistributedCache.createSymlink方法代码示例

本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.createSymlink方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.createSymlink方法的具体用法?Java DistributedCache.createSymlink怎么用?Java DistributedCache.createSymlink使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.filecache.DistributedCache的用法示例。


在下文中一共展示了DistributedCache.createSymlink方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:27,代码来源:TeraSort.java

示例2: addLocalFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Add local non-jar files the job depends on to DistributedCache.
 */
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
  DistributedCache.createSymlink(this.conf);
  for (String jobFile : SPLITTER.split(jobFileList)) {
    Path srcJobFile = new Path(jobFile);
    // DistributedCache requires absolute path, so we need to use makeQualified.
    Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
    // Copy the file from local file system to HDFS
    this.fs.copyFromLocalFile(srcJobFile, destJobFile);
    // Create a URI that is in the form path#symlink
    URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
    LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
    // Finally add the file to DistributedCache with a symlink named after the file name
    DistributedCache.addCacheFile(destFileUri, this.conf);
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:19,代码来源:MRJobLauncher.java

示例3: shareMapFile

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
	
	FileSystem fs = FileSystem.get(job);
	if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {

		DistributedCache.createSymlink(job);
		
		FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
		
		LongWritable key = new LongWritable();
		Text value = new Text();
		for (int i=0; i<fstats.length; i++) {
			Path curMap = fstats[i].getPath();
			MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
			if (mreader.next(key, value)) {
				int rid = (int) (key.get() % slots);
				String uriWithLink =
						curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
				DistributedCache.addCacheFile(new URI(uriWithLink), job);
			} else {
				System.exit(-1);
			}
			mreader.close();
		}
	}
	
	job.setInt(symbol, slots);
}
 
开发者ID:thrill,项目名称:fst-bench,代码行数:29,代码来源:Utils.java

示例4: shareZipfCore

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareZipfCore(String fname, DataOptions options, JobConf job) throws URISyntaxException {

		DistributedCache.createSymlink(job);
		
		Path zipfPath = new Path(options.getWorkPath(), fname);
		String uriWithLink = zipfPath.toString() + "#" + fname;
		DistributedCache.addCacheFile(new URI(uriWithLink), job);
	}
 
开发者ID:thrill,项目名称:fst-bench,代码行数:9,代码来源:Utils.java

示例5: task3

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Extracts CF for each found anchor.
 *
 * @param inputPath
 * @param mapPath
 * @param outputPath
 * @throws IOException
 */
private void task3(String inputPath, String mapPath, String outputPath) throws IOException {
	LOG.info("Extracting anchor text (phase 3)...");
	LOG.info(" - input:   " + inputPath);
	LOG.info(" - output:  " + outputPath);
	LOG.info(" - mapping: " + mapPath);

	JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
	conf.setJobName(String.format("ExtractWikipediaAnchorText:phase3[input: %s, output: %s]", inputPath, outputPath));

	conf.setNumReduceTasks(1);
	String location = "map.dat";

	try {
		DistributedCache.addCacheFile(new URI(mapPath + "/part-00000/data" + "#" + location), conf);
		//DistributedCache.addCacheFile(new URI(mapPath + "/singleentitymap.data" + "#" + location), conf);
		DistributedCache.createSymlink(conf);
	} catch (URISyntaxException e) {
		e.printStackTrace();
	}

	FileInputFormat.addInputPath(conf, new Path(inputPath));
	FileOutputFormat.setOutputPath(conf, new Path(outputPath));

	conf.setInputFormat(SequenceFileInputFormat.class);
	conf.setOutputFormat(MapFileOutputFormat.class);
	// conf.setOutputFormat(TextOutputFormat.class);

	conf.setMapOutputKeyClass(Text.class);
	conf.setMapOutputValueClass(IntWritable.class);

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(IntWritable.class);

	conf.setMapperClass(MyMapper3.class);
	conf.setCombinerClass(MyReducer3.class);
	conf.setReducerClass(MyReducer3.class);

	JobClient.runJob(conf);
}
 
开发者ID:yahoo,项目名称:FEL,代码行数:48,代码来源:ExtractWikipediaAnchorText.java

示例6: testCacheFilesLocalization

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Run the job with two distributed cache files and verify
 * whether job is succeeded or not.
 * @throws Exception
 */
@Test
public void testCacheFilesLocalization() throws Exception {
  conf = wovenClient.getDaemonConf();
  SleepJob job = new SleepJob();
  job.setConf(conf);
  JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
  DistributedCache.createSymlink(jobConf);
  DistributedCache.addCacheFile(cacheFileURI1, jobConf);
  DistributedCache.addCacheFile(cacheFileURI2, jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  JobID jobId = runJob.getID();

  Assert.assertTrue("Job has not been started for 1 min.", 
      jtClient.isJobStarted(jobId));
  TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
  Assert.assertTrue("Cache File1 has not been localize",
      checkLocalization(taskInfos,cacheFile1));
  Assert.assertTrue("Cache File2 has not been localize",
          checkLocalization(taskInfos,cacheFile2));
  JobInfo jInfo = wovenClient.getJobInfo(jobId);
  LOG.info("Waiting till the job is completed...");
  while (!jInfo.getStatus().isJobComplete()) {
    UtilsForTests.waitFor(100);
    jInfo = wovenClient.getJobInfo(jobId);
  }
  Assert.assertEquals("Job has not been succeeded", 
      jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:34,代码来源:TestCacheFileReferenceCount.java

示例7: testDeleteCacheFileInDFSAfterLocalized

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Run the job with distributed cache files and remove one cache
 * file from the DFS when it is localized.verify whether the job
 * is failed or not.
 * @throws Exception
 */
@Test
public void testDeleteCacheFileInDFSAfterLocalized() throws Exception {
  conf = wovenClient.getDaemonConf();
  SleepJob job = new SleepJob();
  job.setConf(conf);
  JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
  cacheFileURI3 = createCacheFile(tmpFolderPath, cacheFile3);
  DistributedCache.createSymlink(jobConf);
  DistributedCache.addCacheFile(cacheFileURI3, jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  JobID jobId = runJob.getID();
  Assert.assertTrue("Job has not been started for 1 min.", 
      jtClient.isJobStarted(jobId));
  TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
  boolean iscacheFileLocalized = checkLocalization(taskInfos,cacheFile3);
  Assert.assertTrue("CacheFile has not been localized", 
      iscacheFileLocalized);
  deleteCacheFile(new Path(tmpFolderPath, cacheFile3));
  JobInfo jInfo = wovenClient.getJobInfo(jobId);
  LOG.info("Waiting till the job is completed...");
  while (!jInfo.getStatus().isJobComplete()) {
    UtilsForTests.waitFor(100);
    jInfo = wovenClient.getJobInfo(jobId);
  }
  Assert.assertEquals("Job has not been failed", 
      jInfo.getStatus().getRunState(), JobStatus.FAILED);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:34,代码来源:TestCacheFileReferenceCount.java

示例8: addHDFSFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Add non-jar files already on HDFS that the job depends on to DistributedCache.
 */
private void addHDFSFiles(String jobFileList) throws IOException {
  DistributedCache.createSymlink(this.conf);
  for (String jobFile : SPLITTER.split(jobFileList)) {
    Path srcJobFile = new Path(jobFile);
    // Create a URI that is in the form path#symlink
    URI srcFileUri = URI.create(srcJobFile.toUri().getPath() + "#" + srcJobFile.getName());
    LOG.info(String.format("Adding %s to DistributedCache", srcFileUri));
    // Finally add the file to DistributedCache with a symlink named after the file name
    DistributedCache.addCacheFile(srcFileUri, this.conf);
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:15,代码来源:MRJobLauncher.java

示例9: run

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	final CmdLineParser parser = new CmdLineParser(this);

	try {
		parser.parseArgument(args);
	} catch (final CmdLineException e) {
		System.err.println(e.getMessage());
		System.err.println("Usage: hadoop jar HadoopImageIndexer.jar [options]");
		parser.printUsage(System.err);
		return -1;
	}

	final Path[] paths = SequenceFileUtility.getFilePaths(input, "part");
	final Path outputPath = new Path(output);

	if (outputPath.getFileSystem(this.getConf()).exists(outputPath) && replace)
		outputPath.getFileSystem(this.getConf()).delete(outputPath, true);

	final Job job = TextBytesJobUtil.createJob(paths, outputPath, null, this.getConf());
	job.setJarByClass(this.getClass());
	job.setMapperClass(PqPcaVladMapper.class);
	job.setNumReduceTasks(0);

	MultipleOutputs.addNamedOutput(job, "pcavlad", SequenceFileOutputFormat.class, Text.class, BytesWritable.class);

	DistributedCache.createSymlink(job.getConfiguration());
	DistributedCache.addCacheFile(new URI(indexerData + "#vlad-data.bin"), job.getConfiguration());

	SequenceFileOutputFormat.setCompressOutput(job, !dontcompress);
	job.waitForCompletion(true);

	return 0;
}
 
开发者ID:openimaj,项目名称:openimaj,代码行数:35,代码来源:HadoopPqPcaVladIndexer.java

示例10: shareArray

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareArray(String symbol, Path fdict, int words, JobConf job) throws URISyntaxException {
	DistributedCache.createSymlink(job);
	String uridict = fdict.toUri().toString() + "#" + symbol;
	DistributedCache.addCacheFile(new URI(uridict), job);
	job.setInt(symbol, words);
}
 
开发者ID:thrill,项目名称:fst-bench,代码行数:7,代码来源:Utils.java

示例11: task2

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 *
 * Maps from (srcID, (targetID, anchor) to (targetID, (anchor, count)).
 *
 * @param inputPath
 * @param outputPath
 * @throws IOException
 */
private void task2(String inputPath, String outputPath, String redirPath) throws IOException {
	LOG.info("Extracting anchor text (phase 2)...");
	LOG.info(" - input: " + inputPath);
	LOG.info(" - output: " + outputPath);
	Random r = new Random(  );
	//String tmpOutput = "tmp-" + this.getClass().getCanonicalName() + "-" + r.nextInt(10000);
	//LOG.info( "intermediate folder for merge " + tmpOutput );

	JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
	conf.setJobName(String.format("ExtractWikipediaAnchorText:phase2[input: %s, output: %s]", inputPath, outputPath));

	// Gathers everything together for convenience; feasible for Wikipedia.
	conf.setNumReduceTasks(1);

	try {
		DistributedCache.addCacheFile(new URI(redirPath + "/part-00000" + "#" + "redirs.dat"), conf);
		DistributedCache.createSymlink(conf);
	} catch (URISyntaxException e) {
		e.printStackTrace();
	}

	FileInputFormat.addInputPath(conf, new Path(inputPath));
	FileOutputFormat.setOutputPath(conf, new Path(outputPath));
	//FileOutputFormat.setOutputPath(conf, new Path(tmpOutput));

	conf.setInputFormat(SequenceFileInputFormat.class);
	conf.setOutputFormat(MapFileOutputFormat.class);
	// conf.setOutputFormat(TextOutputFormat.class);

	conf.setMapOutputKeyClass(Text.class);
	conf.setMapOutputValueClass(Text.class);

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(HMapSIW.class);

	conf.setMapperClass(MyMapper2.class);
	conf.setReducerClass(MyReducer2.class);

	// Delete the output directory if it exists already.
	FileSystem.get(conf).delete(new Path(outputPath), true);

	JobClient.runJob(conf);
	// Clean up intermediate data.
	FileSystem.get(conf).delete(new Path(inputPath), true);

	/*
	//merge
	String finalO = outputPath+"/part-00000/data";
	FileSystem.get(conf).mkdirs( new Path( outputPath + "part-00000") );
	getMergeInHdfs( tmpOutput, finalO, conf );
	FileSystem.get(conf).delete(new Path(tmpOutput), true);
	*/
}
 
开发者ID:yahoo,项目名称:FEL,代码行数:62,代码来源:ExtractWikipediaAnchorText.java

示例12: runDictionaryJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
	boolean jobOK;
	Job job = null;
	BufferedWriter bufferedWriter;

	// if output path exists...
	if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
		if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
			this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
			System.out.println("Select other path or use option -dd to overwrite");
			System.exit(-1);
		}
	}

	// Sample the SequenceInputFormat to do TotalSort and create final output
	job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");

	job.setJarByClass(HDTBuilderDriver.class);

	System.out.println("samples = " + this.conf.getDictionarySamplesPath());
	System.out.println("output = " + this.conf.getDictionaryOutputPath());

	FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
	FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

	job.setInputFormatClass(SequenceFileInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	// Identity Mapper
	// job.setMapperClass(Mapper.class);
	job.setCombinerClass(DictionaryCombiner.class);
	job.setPartitionerClass(TotalOrderPartitioner.class);
	job.setReducerClass(DictionaryReducer.class);

	job.setNumReduceTasks(this.conf.getDictionaryReducers());

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);

	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(NullWritable.class);

	System.out.println("Sampling started");
	InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
	String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
	URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
	DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
	DistributedCache.createSymlink(job.getConfiguration());
	System.out.println("Sampling finished");

	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class);
	MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class);

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
	this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
	this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
	this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

	bufferedWriter = new BufferedWriter(new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

	bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
	bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

	bufferedWriter.close();

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:80,代码来源:HDTBuilderDriver.java

示例13: runTriplesJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
	Job job = null;
	boolean jobOK;

	// if triples output path exists...
	if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
		if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
			this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
		} else { // ... and option not provided, fail
			System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
			System.out.println("Select other path or use option -dt to overwrite");
			System.exit(-1);
		}
	}

	job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");

	job.setJarByClass(HDTBuilderDriver.class);

	FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
	FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

	job.setInputFormatClass(SequenceFileInputFormat.class);
	LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

	job.setSortComparatorClass(TripleSPOComparator.class);
	job.setGroupingComparatorClass(TripleSPOComparator.class);

	job.setPartitionerClass(TotalOrderPartitioner.class);

	job.setOutputKeyClass(TripleSPOWritable.class);
	job.setOutputValueClass(NullWritable.class);

	job.setNumReduceTasks(this.conf.getTriplesReducers());

	System.out.println("Sampling started");
	InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
	String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
	URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
	DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
	DistributedCache.createSymlink(job.getConfiguration());
	System.out.println("Sampling finished");

	SequenceFileOutputFormat.setCompressOutput(job, true);
	SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
	SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

	jobOK = job.waitForCompletion(true);

	return jobOK;
}
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:52,代码来源:HDTBuilderDriver.java

示例14: setupPipesJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static void setupPipesJob(JobConf conf) throws IOException {
  // default map output types to Text
  if (!getIsJavaMapper(conf)) {
    conf.setMapRunnerClass(PipesMapRunner.class);
    // Save the user's partitioner and hook in our's.
    setJavaPartitioner(conf, conf.getPartitionerClass());
    conf.setPartitionerClass(PipesPartitioner.class);
  }
  if (!getIsJavaReducer(conf)) {
    conf.setReducerClass(PipesReducer.class);
    if (!getIsJavaRecordWriter(conf)) {
      conf.setOutputFormat(NullOutputFormat.class);
    }
  }
  String textClassname = Text.class.getName();
  setIfUnset(conf, "mapred.mapoutput.key.class", textClassname);
  setIfUnset(conf, "mapred.mapoutput.value.class", textClassname);
  setIfUnset(conf, "mapred.output.key.class", textClassname);
  setIfUnset(conf, "mapred.output.value.class", textClassname);
  
  // Use PipesNonJavaInputFormat if necessary to handle progress reporting
  // from C++ RecordReaders ...
  if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
    conf.setClass("mapred.pipes.user.inputformat", 
                  conf.getInputFormat().getClass(), InputFormat.class);
    conf.setInputFormat(PipesNonJavaInputFormat.class);
  }
  
  String exec = getExecutable(conf);
  if (exec == null) {
    throw new IllegalArgumentException("No application program defined.");
  }
  // add default debug script only when executable is expressed as
  // <path>#<executable>
  if (exec.contains("#")) {
    DistributedCache.createSymlink(conf);
    // set default gdb commands for map and reduce task 
    String defScript = "$HADOOP_HOME/src/c++/pipes/debug/pipes-default-script";
    setIfUnset(conf,"mapred.map.task.debug.script",defScript);
    setIfUnset(conf,"mapred.reduce.task.debug.script",defScript);
  }
  URI[] fileCache = DistributedCache.getCacheFiles(conf);
  if (fileCache == null) {
    fileCache = new URI[1];
  } else {
    URI[] tmp = new URI[fileCache.length+1];
    System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
    fileCache = tmp;
  }
  try {
    fileCache[0] = new URI(exec);
  } catch (URISyntaxException e) {
    IOException ie = new IOException("Problem parsing execable URI " + exec);
    ie.initCause(e);
    throw ie;
  }
  DistributedCache.setCacheFiles(fileCache, conf);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:59,代码来源:Submitter.java

示例15: launchFailMapAndDebug

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Launches failed map task and debugs the failed task
 * @param conf configuration for the mapred job
 * @param inDir input path
 * @param outDir output path
 * @param debugDir debug directory where script is present
 * @param debugCommand The command to execute script
 * @param input Input text
 * @return the output of debug script 
 * @throws IOException
 */
public String launchFailMapAndDebug(JobConf conf,
                                    Path inDir,
                                    Path outDir,
                                    Path debugDir,
                                    String debugScript,
                                    String input)
throws IOException {

  // set up the input file system and write input text.
  FileSystem inFs = inDir.getFileSystem(conf);
  FileSystem outFs = outDir.getFileSystem(conf);
  outFs.delete(outDir, true);
  if (!inFs.mkdirs(inDir)) {
    throw new IOException("Mkdirs failed to create " + inDir.toString());
  }
  {
    // write input into input file
    DataOutputStream file = inFs.create(new Path(inDir, "part-0"));
    file.writeBytes(input);
    file.close();
  }

  // configure the mapred Job for failing map task.
  conf.setJobName("failmap");
  conf.setMapperClass(MapClass.class);        
  conf.setReducerClass(IdentityReducer.class);
  conf.setNumMapTasks(1);
  conf.setNumReduceTasks(0);
  conf.setMapDebugScript(debugScript);
  FileInputFormat.setInputPaths(conf, inDir);
  FileOutputFormat.setOutputPath(conf, outDir);
  String TEST_ROOT_DIR = new Path(System.getProperty("test.build.data",
                                    "/tmp")).toString().replace(' ', '+');
  conf.set("test.build.data", TEST_ROOT_DIR);

  // copy debug script to cache from local file system.
  FileSystem debugFs = debugDir.getFileSystem(conf);
  Path scriptPath = new Path(debugDir,"testscript.txt");
  Path cachePath = new Path("/cacheDir");
  if (!debugFs.mkdirs(cachePath)) {
    throw new IOException("Mkdirs failed to create " + cachePath.toString());
  }
  debugFs.copyFromLocalFile(scriptPath,cachePath);
  
  URI uri = debugFs.getUri().resolve(cachePath+"/testscript.txt#testscript");
  DistributedCache.createSymlink(conf);
  DistributedCache.addCacheFile(uri, conf);

  RunningJob job =null;
  // run the job. It will fail with IOException.
  try {
    job = new JobClient(conf).submitJob(conf);
  } catch (IOException e) {
  	LOG.info("Running Job failed", e);
  }

  JobID jobId = job.getID();
  // construct the task id of first map task of failmap
  TaskAttemptID taskId = new TaskAttemptID(new TaskID(jobId,true, 0), 0);
  // wait for the job to finish.
  while (!job.isComplete()) ;
  
  // return the output of debugout log.
  return readTaskLog(TaskLog.LogName.DEBUGOUT,taskId, false);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:77,代码来源:TestMiniMRMapRedDebugScript.java


注:本文中的org.apache.hadoop.filecache.DistributedCache.createSymlink方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。