Java DistributedCache.addCacheFile方法代码示例

本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.addCacheFile方法的典型用法代码示例。如果您正苦于以下问题：Java DistributedCache.addCacheFile方法的具体用法？Java DistributedCache.addCacheFile怎么用？Java DistributedCache.addCacheFile使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.filecache.DistributedCache的用法示例。

在下文中一共展示了DistributedCache.addCacheFile方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: main

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {

		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		conf.set("cachefile", otherArgs[0]);
		if (otherArgs.length != 3) {
			System.err.println("Usage: Question4 <cacheFile> <in> <out>");
			System.exit(3);
		}
		Job job = new Job(conf, "Question4");
		DistributedCache.addCacheFile(new URI(args[0]), conf);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FloatWritable.class); 
		job.setJarByClass(Question4.class);
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);

		FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
		job.waitForCompletion(true);
	}

开发者ID:BhargaviRavula，项目名称:Bigdata，代码行数:26，代码来源:Question4.java

示例2: run

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}

开发者ID:rhli，项目名称:hadoop-EAR，代码行数:27，代码来源:TeraSort.java

示例3: addLocalFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Add local non-jar files the job depends on to DistributedCache.
 */
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
  DistributedCache.createSymlink(this.conf);
  for (String jobFile : SPLITTER.split(jobFileList)) {
    Path srcJobFile = new Path(jobFile);
    // DistributedCache requires absolute path, so we need to use makeQualified.
    Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
    // Copy the file from local file system to HDFS
    this.fs.copyFromLocalFile(srcJobFile, destJobFile);
    // Create a URI that is in the form path#symlink
    URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
    LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
    // Finally add the file to DistributedCache with a symlink named after the file name
    DistributedCache.addCacheFile(destFileUri, this.conf);
  }
}

开发者ID:Hanmourang，项目名称:Gobblin，代码行数:19，代码来源:MRJobLauncher.java

示例4: configureJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void configureJob(Job job) throws IOException {
  Configuration conf = job.getConfiguration();
  
  job.setJarByClass(PartialBuilder.class);
  
  FileInputFormat.setInputPaths(job, getDataPath());
  FileOutputFormat.setOutputPath(job, getOutputPath(conf));
  
  // put the data in the DistributedCache
  DistributedCache.addCacheFile(getDataPath().toUri(), conf);
  
  job.setMapOutputKeyClass(LongWritable.class);
  job.setMapOutputValueClass(MapredOutput.class);
  
  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(RuleBase.class);
  
  job.setMapperClass(ChiCSMapper.class);
  job.setReducerClass(ChiCSReducer.class);
  
  job.setNumReduceTasks(1);
  
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Max，代码行数:27，代码来源:PartialBuilder.java

示例5: copyCredentialIntoDistributedCache

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private void copyCredentialIntoDistributedCache() throws URISyntaxException {
  LOG.debug("{} added to distributed cache with symlink {}", HDFS_GS_CREDENTIAL_DIRECTORY,
      "." + CACHED_CREDENTIAL_NAME);
  DistributedCache.addCacheFile(new URI(HDFS_GS_CREDENTIAL_ABSOLUTE_PATH), conf);
  //The "." must be prepended for the symlink to be created correctly for reference in Map Reduce job
  conf.set(GCP_KEYFILE_CACHED_LOCATION, "." + CACHED_CREDENTIAL_NAME);
}

开发者ID:HotelsDotCom，项目名称:circus-train，代码行数:8，代码来源:GCPCredentialCopier.java

示例6: shareMapFile

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
	
	FileSystem fs = FileSystem.get(job);
	if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {

		DistributedCache.createSymlink(job);
		
		FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
		
		LongWritable key = new LongWritable();
		Text value = new Text();
		for (int i=0; i<fstats.length; i++) {
			Path curMap = fstats[i].getPath();
			MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
			if (mreader.next(key, value)) {
				int rid = (int) (key.get() % slots);
				String uriWithLink =
						curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
				DistributedCache.addCacheFile(new URI(uriWithLink), job);
			} else {
				System.exit(-1);
			}
			mreader.close();
		}
	}
	
	job.setInt(symbol, slots);
}

开发者ID:thrill，项目名称:fst-bench，代码行数:29，代码来源:Utils.java

示例7: shareZipfCore

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareZipfCore(String fname, DataOptions options, JobConf job) throws URISyntaxException {

		DistributedCache.createSymlink(job);
		
		Path zipfPath = new Path(options.getWorkPath(), fname);
		String uriWithLink = zipfPath.toString() + "#" + fname;
		DistributedCache.addCacheFile(new URI(uriWithLink), job);
	}

开发者ID:thrill，项目名称:fst-bench，代码行数:9，代码来源:Utils.java

示例8: task3

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Extracts CF for each found anchor.
 *
 * @param inputPath
 * @param mapPath
 * @param outputPath
 * @throws IOException
 */
private void task3(String inputPath, String mapPath, String outputPath) throws IOException {
	LOG.info("Extracting anchor text (phase 3)...");
	LOG.info(" - input:   " + inputPath);
	LOG.info(" - output:  " + outputPath);
	LOG.info(" - mapping: " + mapPath);

	JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
	conf.setJobName(String.format("ExtractWikipediaAnchorText:phase3[input: %s, output: %s]", inputPath, outputPath));

	conf.setNumReduceTasks(1);
	String location = "map.dat";

	try {
		DistributedCache.addCacheFile(new URI(mapPath + "/part-00000/data" + "#" + location), conf);
		//DistributedCache.addCacheFile(new URI(mapPath + "/singleentitymap.data" + "#" + location), conf);
		DistributedCache.createSymlink(conf);
	} catch (URISyntaxException e) {
		e.printStackTrace();
	}

	FileInputFormat.addInputPath(conf, new Path(inputPath));
	FileOutputFormat.setOutputPath(conf, new Path(outputPath));

	conf.setInputFormat(SequenceFileInputFormat.class);
	conf.setOutputFormat(MapFileOutputFormat.class);
	// conf.setOutputFormat(TextOutputFormat.class);

	conf.setMapOutputKeyClass(Text.class);
	conf.setMapOutputValueClass(IntWritable.class);

	conf.setOutputKeyClass(Text.class);
	conf.setOutputValueClass(IntWritable.class);

	conf.setMapperClass(MyMapper3.class);
	conf.setCombinerClass(MyReducer3.class);
	conf.setReducerClass(MyReducer3.class);

	JobClient.runJob(conf);
}

开发者ID:yahoo，项目名称:FEL，代码行数:48，代码来源:ExtractWikipediaAnchorText.java

示例9: main

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception
{
    Configuration config = new Configuration() ;
    JobConf conf = new JobConf(config, UFOLocation2.class);
    conf.setJobName("UFOLocation");
    DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), conf) ;
    
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);
    JobConf mapconf1 = new JobConf(false) ;
    ChainMapper.addMapper( conf, UFORecordValidationMapper.class,
        LongWritable.class, Text.class, LongWritable.class, Text.class,
            true, mapconf1) ;
        
        JobConf mapconf2 = new JobConf(false) ;
        //            DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), mapconf2) ;
        
        ChainMapper.addMapper( conf, MapClass.class,
            LongWritable.class, Text.class, Text.class, LongWritable.class,
                true, mapconf2) ;
            
            conf.setMapperClass(ChainMapper.class);
            //conf.setMapperClass(MapClass.class) ;
            conf.setCombinerClass(LongSumReducer.class);
            conf.setReducerClass(LongSumReducer.class);
            
            FileInputFormat.setInputPaths(conf,args[0]) ;
            FileOutputFormat.setOutputPath(conf, new Path(args[1])) ;
            
            JobClient.runJob(conf);
        }

开发者ID:PacktPublishing，项目名称:Data-Science-with-Hadoop，代码行数:32，代码来源:UFOLocation2.java

示例10: main

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception
{
    Configuration config = new Configuration() ;
    JobConf conf = new JobConf(config, UFOLocation3.class);
    conf.setJobName("UFOLocation");
    DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), conf) ;
    
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);
    JobConf mapconf1 = new JobConf(false) ;
    ChainMapper.addMapper( conf, UFOCountingRecordValidationMapper.class,
        LongWritable.class, Text.class, LongWritable.class, Text.class,
            true, mapconf1) ;
        
        JobConf mapconf2 = new JobConf(false) ;
        //            DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), mapconf2) ;
        
        ChainMapper.addMapper( conf, MapClass.class,
            LongWritable.class, Text.class, Text.class, LongWritable.class,
                true, mapconf2) ;
            
            conf.setMapperClass(ChainMapper.class);
            //conf.setMapperClass(MapClass.class) ;
            conf.setCombinerClass(LongSumReducer.class);
            conf.setReducerClass(LongSumReducer.class);
            
            FileInputFormat.setInputPaths(conf,args[0]) ;
            FileOutputFormat.setOutputPath(conf, new Path(args[1])) ;
            
            JobClient.runJob(conf);
        }

开发者ID:PacktPublishing，项目名称:Data-Science-with-Hadoop，代码行数:32，代码来源:UFOLocation3.java

示例11: build

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public RuleBase build() throws IOException, ClassNotFoundException, InterruptedException {
  
  Path outputPath = getOutputPath(conf);
  FileSystem fs = outputPath.getFileSystem(conf);
  
  // check the output
  if (fs.exists(outputPath)) {
    throw new IOException("Chi: Output path already exists : " + outputPath);
  }

  setFuzzy_ChiBuilder(conf, fuzzy_ChiBuilder);
  
  // put the dataset into the DistributedCache
  DistributedCache.addCacheFile(datasetPath.toUri(), conf);
  
  Job job = new Job(conf, "fuzzy_Chi builder");
  
  log.debug("Chi: Configuring the job...");
  configureJob(job);
  
  log.debug("Chi: Running the job...");
  if (!runJob(job)) {
    log.error("Chi: Job failed!");
    return null;
  }
  
  if (isOutput(conf)) {
    log.debug("Parsing the output...");
    RuleBase ruleBase = parseOutput(job);
    HadoopUtil.delete(conf, outputPath);
    return ruleBase;
  }
  
  return null;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Ave，代码行数:36，代码来源:Builder.java

示例12: testCacheFilesLocalization

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Run the job with two distributed cache files and verify
 * whether job is succeeded or not.
 * @throws Exception
 */
@Test
public void testCacheFilesLocalization() throws Exception {
  conf = wovenClient.getDaemonConf();
  SleepJob job = new SleepJob();
  job.setConf(conf);
  JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
  DistributedCache.createSymlink(jobConf);
  DistributedCache.addCacheFile(cacheFileURI1, jobConf);
  DistributedCache.addCacheFile(cacheFileURI2, jobConf);
  RunningJob runJob = jobClient.submitJob(jobConf);
  JobID jobId = runJob.getID();

  Assert.assertTrue("Job has not been started for 1 min.", 
      jtClient.isJobStarted(jobId));
  TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
  Assert.assertTrue("Cache File1 has not been localize",
      checkLocalization(taskInfos,cacheFile1));
  Assert.assertTrue("Cache File2 has not been localize",
          checkLocalization(taskInfos,cacheFile2));
  JobInfo jInfo = wovenClient.getJobInfo(jobId);
  LOG.info("Waiting till the job is completed...");
  while (!jInfo.getStatus().isJobComplete()) {
    UtilsForTests.waitFor(100);
    jInfo = wovenClient.getJobInfo(jobId);
  }
  Assert.assertEquals("Job has not been succeeded", 
      jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED);
}

开发者ID:Nextzero，项目名称:hadoop-2.6.0-cdh5.4.3，代码行数:34，代码来源:TestCacheFileReferenceCount.java

示例13: run

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {
   
FileSystem fs = FileSystem.get(conf);

// check the output
if (fs.exists(outputPath)) {
  throw new IOException("Chi: Output path already exists : " + outputPath);
}

log.info("Chi: Adding the dataset to the DistributedCache");
// put the dataset into the DistributedCache
DistributedCache.addCacheFile(datasetPath.toUri(), conf);

log.info("Chi: Adding the model to the DistributedCache");
DistributedCache.addCacheFile(modelPath.toUri(), conf);

Job job = new Job(conf, "Chi_RW classifier");

log.info("Chi: Configuring the job...");
configureJob(job);

log.info("Chi: Running the job...");
if (!job.waitForCompletion(true)) {
  throw new IllegalStateException("Chi: Job failed!");
}

parseOutput(job);

HadoopUtil.delete(conf, mappersOutputPath);
 }

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigData-Max，代码行数:31，代码来源:Chi_RWClassifier.java

示例14: main

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

Job job = new Job(conf, "DiseaseApplication_format_4");

DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB11_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB4_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ATP8B1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/JAG1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/SERPINA1_GENE.txt").toUri(), job.getConfiguration());

DistributedCache.addCacheFile(new Path("/user/brfilho/db/ABCB11.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/ABCB4.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/ATP8B1.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/JAG1.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/SERPINA1.txt").toUri(), job.getConfiguration());

job.setJarByClass(DiseaseApplication_format_4.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
//job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

//job.setNumReduceTasks(5);
	
   FileInputFormat.setInputPaths(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);
 }

开发者ID:GeneticMapping，项目名称:GSM，代码行数:32，代码来源:Backup_f_DiseaseApplication_format_4.java

示例15: runJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void runJob(Configuration conf,
                          Path userLogsPath,
                          Path usersPath,
                          Path outputPath)
    throws Exception {

  FileSystem fs = usersPath.getFileSystem(conf);

  FileStatus usersStatus = fs.getFileStatus(usersPath);

  if (usersStatus.isDir()) {
    for (FileStatus f : fs.listStatus(usersPath)) {
      if (f.getPath().getName().startsWith("part")) {
        DistributedCache.addCacheFile(f.getPath().toUri(), conf);
      }
    }
  } else {
    DistributedCache.addCacheFile(usersPath.toUri(), conf);
  }

  Job job = new Job(conf);

  job.setJarByClass(FinalJoinJob.class);
  job.setMapperClass(GenericReplicatedJoin.class);

  job.setNumReduceTasks(0);

  job.setInputFormatClass(KeyValueTextInputFormat.class);

  outputPath.getFileSystem(conf).delete(outputPath, true);

  FileInputFormat.setInputPaths(job, userLogsPath);
  FileOutputFormat.setOutputPath(job, outputPath);

  if (!job.waitForCompletion(true)) {
    throw new Exception("Job failed");
  }
}

开发者ID:Hanmourang，项目名称:hiped2，代码行数:39，代码来源:FinalJoinJob.java

注：本文中的org.apache.hadoop.filecache.DistributedCache.addCacheFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。