本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.addCacheFile方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.addCacheFile方法的具体用法?Java DistributedCache.addCacheFile怎么用?Java DistributedCache.addCacheFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.filecache.DistributedCache
的用法示例。
在下文中一共展示了DistributedCache.addCacheFile方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
conf.set("cachefile", otherArgs[0]);
if (otherArgs.length != 3) {
System.err.println("Usage: Question4 <cacheFile> <in> <out>");
System.exit(3);
}
Job job = new Job(conf, "Question4");
DistributedCache.addCacheFile(new URI(args[0]), conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
job.setJarByClass(Question4.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
job.waitForCompletion(true);
}
示例2: run
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public int run(String[] args) throws Exception {
LOG.info("starting");
JobConf job = (JobConf) getConf();
Path inputDir = new Path(args[0]);
inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
URI partitionUri = new URI(partitionFile.toString() +
"#" + TeraInputFormat.PARTITION_FILENAME);
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSort");
job.setJarByClass(TeraSort.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormat(TeraInputFormat.class);
job.setOutputFormat(TeraOutputFormat.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
TeraInputFormat.writePartitionFile(job, partitionFile);
DistributedCache.addCacheFile(partitionUri, job);
DistributedCache.createSymlink(job);
job.setInt("dfs.replication", 1);
TeraOutputFormat.setFinalSync(job, true);
JobClient.runJob(job);
LOG.info("done");
return 0;
}
示例3: addLocalFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Add local non-jar files the job depends on to DistributedCache.
*/
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
DistributedCache.createSymlink(this.conf);
for (String jobFile : SPLITTER.split(jobFileList)) {
Path srcJobFile = new Path(jobFile);
// DistributedCache requires absolute path, so we need to use makeQualified.
Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
// Copy the file from local file system to HDFS
this.fs.copyFromLocalFile(srcJobFile, destJobFile);
// Create a URI that is in the form path#symlink
URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
// Finally add the file to DistributedCache with a symlink named after the file name
DistributedCache.addCacheFile(destFileUri, this.conf);
}
}
示例4: configureJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void configureJob(Job job) throws IOException {
Configuration conf = job.getConfiguration();
job.setJarByClass(PartialBuilder.class);
FileInputFormat.setInputPaths(job, getDataPath());
FileOutputFormat.setOutputPath(job, getOutputPath(conf));
// put the data in the DistributedCache
DistributedCache.addCacheFile(getDataPath().toUri(), conf);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(MapredOutput.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(RuleBase.class);
job.setMapperClass(ChiCSMapper.class);
job.setReducerClass(ChiCSReducer.class);
job.setNumReduceTasks(1);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
}
示例5: copyCredentialIntoDistributedCache
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private void copyCredentialIntoDistributedCache() throws URISyntaxException {
LOG.debug("{} added to distributed cache with symlink {}", HDFS_GS_CREDENTIAL_DIRECTORY,
"." + CACHED_CREDENTIAL_NAME);
DistributedCache.addCacheFile(new URI(HDFS_GS_CREDENTIAL_ABSOLUTE_PATH), conf);
//The "." must be prepended for the symlink to be created correctly for reference in Map Reduce job
conf.set(GCP_KEYFILE_CACHED_LOCATION, "." + CACHED_CREDENTIAL_NAME);
}
示例6: shareMapFile
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareMapFile(String symbol, int slots, Path mfile, JobConf job) throws IOException, URISyntaxException {
FileSystem fs = FileSystem.get(job);
if (fs.exists(mfile) && fs.getFileStatus(mfile).isDir()) {
DistributedCache.createSymlink(job);
FileStatus[] fstats = fs.listStatus(mfile, getPassDirectoriesFilter(fs));
LongWritable key = new LongWritable();
Text value = new Text();
for (int i=0; i<fstats.length; i++) {
Path curMap = fstats[i].getPath();
MapFile.Reader mreader = new MapFile.Reader(fs, curMap.toString(), job);
if (mreader.next(key, value)) {
int rid = (int) (key.get() % slots);
String uriWithLink =
curMap.toUri().toString() + "#" + symbol + "-" + Integer.toString(rid);
DistributedCache.addCacheFile(new URI(uriWithLink), job);
} else {
System.exit(-1);
}
mreader.close();
}
}
job.setInt(symbol, slots);
}
示例7: shareZipfCore
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static final void shareZipfCore(String fname, DataOptions options, JobConf job) throws URISyntaxException {
DistributedCache.createSymlink(job);
Path zipfPath = new Path(options.getWorkPath(), fname);
String uriWithLink = zipfPath.toString() + "#" + fname;
DistributedCache.addCacheFile(new URI(uriWithLink), job);
}
示例8: task3
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Extracts CF for each found anchor.
*
* @param inputPath
* @param mapPath
* @param outputPath
* @throws IOException
*/
private void task3(String inputPath, String mapPath, String outputPath) throws IOException {
LOG.info("Extracting anchor text (phase 3)...");
LOG.info(" - input: " + inputPath);
LOG.info(" - output: " + outputPath);
LOG.info(" - mapping: " + mapPath);
JobConf conf = new JobConf(getConf(), ExtractWikipediaAnchorText.class);
conf.setJobName(String.format("ExtractWikipediaAnchorText:phase3[input: %s, output: %s]", inputPath, outputPath));
conf.setNumReduceTasks(1);
String location = "map.dat";
try {
DistributedCache.addCacheFile(new URI(mapPath + "/part-00000/data" + "#" + location), conf);
//DistributedCache.addCacheFile(new URI(mapPath + "/singleentitymap.data" + "#" + location), conf);
DistributedCache.createSymlink(conf);
} catch (URISyntaxException e) {
e.printStackTrace();
}
FileInputFormat.addInputPath(conf, new Path(inputPath));
FileOutputFormat.setOutputPath(conf, new Path(outputPath));
conf.setInputFormat(SequenceFileInputFormat.class);
conf.setOutputFormat(MapFileOutputFormat.class);
// conf.setOutputFormat(TextOutputFormat.class);
conf.setMapOutputKeyClass(Text.class);
conf.setMapOutputValueClass(IntWritable.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(IntWritable.class);
conf.setMapperClass(MyMapper3.class);
conf.setCombinerClass(MyReducer3.class);
conf.setReducerClass(MyReducer3.class);
JobClient.runJob(conf);
}
示例9: main
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception
{
Configuration config = new Configuration() ;
JobConf conf = new JobConf(config, UFOLocation2.class);
conf.setJobName("UFOLocation");
DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), conf) ;
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
JobConf mapconf1 = new JobConf(false) ;
ChainMapper.addMapper( conf, UFORecordValidationMapper.class,
LongWritable.class, Text.class, LongWritable.class, Text.class,
true, mapconf1) ;
JobConf mapconf2 = new JobConf(false) ;
// DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), mapconf2) ;
ChainMapper.addMapper( conf, MapClass.class,
LongWritable.class, Text.class, Text.class, LongWritable.class,
true, mapconf2) ;
conf.setMapperClass(ChainMapper.class);
//conf.setMapperClass(MapClass.class) ;
conf.setCombinerClass(LongSumReducer.class);
conf.setReducerClass(LongSumReducer.class);
FileInputFormat.setInputPaths(conf,args[0]) ;
FileOutputFormat.setOutputPath(conf, new Path(args[1])) ;
JobClient.runJob(conf);
}
示例10: main
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception
{
Configuration config = new Configuration() ;
JobConf conf = new JobConf(config, UFOLocation3.class);
conf.setJobName("UFOLocation");
DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), conf) ;
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(LongWritable.class);
JobConf mapconf1 = new JobConf(false) ;
ChainMapper.addMapper( conf, UFOCountingRecordValidationMapper.class,
LongWritable.class, Text.class, LongWritable.class, Text.class,
true, mapconf1) ;
JobConf mapconf2 = new JobConf(false) ;
// DistributedCache.addCacheFile(new URI("/user/hadoop/states.txt"), mapconf2) ;
ChainMapper.addMapper( conf, MapClass.class,
LongWritable.class, Text.class, Text.class, LongWritable.class,
true, mapconf2) ;
conf.setMapperClass(ChainMapper.class);
//conf.setMapperClass(MapClass.class) ;
conf.setCombinerClass(LongSumReducer.class);
conf.setReducerClass(LongSumReducer.class);
FileInputFormat.setInputPaths(conf,args[0]) ;
FileOutputFormat.setOutputPath(conf, new Path(args[1])) ;
JobClient.runJob(conf);
}
示例11: build
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public RuleBase build() throws IOException, ClassNotFoundException, InterruptedException {
Path outputPath = getOutputPath(conf);
FileSystem fs = outputPath.getFileSystem(conf);
// check the output
if (fs.exists(outputPath)) {
throw new IOException("Chi: Output path already exists : " + outputPath);
}
setFuzzy_ChiBuilder(conf, fuzzy_ChiBuilder);
// put the dataset into the DistributedCache
DistributedCache.addCacheFile(datasetPath.toUri(), conf);
Job job = new Job(conf, "fuzzy_Chi builder");
log.debug("Chi: Configuring the job...");
configureJob(job);
log.debug("Chi: Running the job...");
if (!runJob(job)) {
log.error("Chi: Job failed!");
return null;
}
if (isOutput(conf)) {
log.debug("Parsing the output...");
RuleBase ruleBase = parseOutput(job);
HadoopUtil.delete(conf, outputPath);
return ruleBase;
}
return null;
}
示例12: testCacheFilesLocalization
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Run the job with two distributed cache files and verify
* whether job is succeeded or not.
* @throws Exception
*/
@Test
public void testCacheFilesLocalization() throws Exception {
conf = wovenClient.getDaemonConf();
SleepJob job = new SleepJob();
job.setConf(conf);
JobConf jobConf = job.setupJobConf(4, 1, 4000, 4000, 1000, 1000);
DistributedCache.createSymlink(jobConf);
DistributedCache.addCacheFile(cacheFileURI1, jobConf);
DistributedCache.addCacheFile(cacheFileURI2, jobConf);
RunningJob runJob = jobClient.submitJob(jobConf);
JobID jobId = runJob.getID();
Assert.assertTrue("Job has not been started for 1 min.",
jtClient.isJobStarted(jobId));
TaskInfo[] taskInfos = wovenClient.getTaskInfo(jobId);
Assert.assertTrue("Cache File1 has not been localize",
checkLocalization(taskInfos,cacheFile1));
Assert.assertTrue("Cache File2 has not been localize",
checkLocalization(taskInfos,cacheFile2));
JobInfo jInfo = wovenClient.getJobInfo(jobId);
LOG.info("Waiting till the job is completed...");
while (!jInfo.getStatus().isJobComplete()) {
UtilsForTests.waitFor(100);
jInfo = wovenClient.getJobInfo(jobId);
}
Assert.assertEquals("Job has not been succeeded",
jInfo.getStatus().getRunState(), JobStatus.SUCCEEDED);
}
示例13: run
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void run() throws IOException, ClassNotFoundException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
// check the output
if (fs.exists(outputPath)) {
throw new IOException("Chi: Output path already exists : " + outputPath);
}
log.info("Chi: Adding the dataset to the DistributedCache");
// put the dataset into the DistributedCache
DistributedCache.addCacheFile(datasetPath.toUri(), conf);
log.info("Chi: Adding the model to the DistributedCache");
DistributedCache.addCacheFile(modelPath.toUri(), conf);
Job job = new Job(conf, "Chi_RW classifier");
log.info("Chi: Configuring the job...");
configureJob(job);
log.info("Chi: Running the job...");
if (!job.waitForCompletion(true)) {
throw new IllegalStateException("Chi: Job failed!");
}
parseOutput(job);
HadoopUtil.delete(conf, mappersOutputPath);
}
示例14: main
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "DiseaseApplication_format_4");
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB11_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB4_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ATP8B1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/JAG1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/SERPINA1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/ABCB11.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/ABCB4.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/ATP8B1.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/JAG1.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/db/SERPINA1.txt").toUri(), job.getConfiguration());
job.setJarByClass(DiseaseApplication_format_4.class);
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
//job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//job.setNumReduceTasks(5);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例15: runJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static void runJob(Configuration conf,
Path userLogsPath,
Path usersPath,
Path outputPath)
throws Exception {
FileSystem fs = usersPath.getFileSystem(conf);
FileStatus usersStatus = fs.getFileStatus(usersPath);
if (usersStatus.isDir()) {
for (FileStatus f : fs.listStatus(usersPath)) {
if (f.getPath().getName().startsWith("part")) {
DistributedCache.addCacheFile(f.getPath().toUri(), conf);
}
}
} else {
DistributedCache.addCacheFile(usersPath.toUri(), conf);
}
Job job = new Job(conf);
job.setJarByClass(FinalJoinJob.class);
job.setMapperClass(GenericReplicatedJoin.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(KeyValueTextInputFormat.class);
outputPath.getFileSystem(conf).delete(outputPath, true);
FileInputFormat.setInputPaths(job, userLogsPath);
FileOutputFormat.setOutputPath(job, outputPath);
if (!job.waitForCompletion(true)) {
throw new Exception("Job failed");
}
}