本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache类的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache类的具体用法?Java DistributedCache怎么用?Java DistributedCache使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
DistributedCache类属于org.apache.hadoop.filecache包,在下文中一共展示了DistributedCache类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: configure
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
slots = job.getLong("slots", 0);
visits = job.getLong("visits", 0);
delim = job.get("delimiter");
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
vitem = new JoinBytesInt();
vitem.refs = 1;
} catch (IOException e) {
e.printStackTrace();
}
}
示例2: main
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
conf.set("cachefile", otherArgs[0]);
if (otherArgs.length != 3) {
System.err.println("Usage: Question4 <cacheFile> <in> <out>");
System.exit(3);
}
Job job = new Job(conf, "Question4");
DistributedCache.addCacheFile(new URI(args[0]), conf);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
job.setJarByClass(Question4.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
job.waitForCompletion(true);
}
示例3: setup
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@SuppressWarnings("deprecation")
@BeforeClass
public static void setup() throws Exception {
if (!isLocal) {
hadoopConfig = HdpBootstrap.hadoopConfig();
HdfsUtils.copyFromLocal(Provisioner.ESHADOOP_TESTING_JAR, Provisioner.HDFS_ES_HDP_LIB);
hdfsEsLib = HdfsUtils.qualify(Provisioner.HDFS_ES_HDP_LIB, hadoopConfig);
// copy jar to DistributedCache
try {
DistributedCache.addArchiveToClassPath(new Path(Provisioner.HDFS_ES_HDP_LIB), hadoopConfig);
} catch (IOException ex) {
throw new RuntimeException("Cannot provision Hive", ex);
}
hdfsResource = "/eshdp/hive/hive-compund.dat";
HdfsUtils.copyFromLocal(originalResource, hdfsResource);
hdfsResource = HdfsUtils.qualify(hdfsResource, hadoopConfig);
hdfsJsonResource = "/eshdp/hive/hive-compund.json";
HdfsUtils.copyFromLocal(originalResource, hdfsJsonResource);
hdfsJsonResource = HdfsUtils.qualify(hdfsJsonResource, hadoopConfig);
}
}
示例4: run
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public int run(String[] args) throws Exception {
LOG.info("starting");
JobConf job = (JobConf) getConf();
Path inputDir = new Path(args[0]);
inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
URI partitionUri = new URI(partitionFile.toString() +
"#" + TeraInputFormat.PARTITION_FILENAME);
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSort");
job.setJarByClass(TeraSort.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormat(TeraInputFormat.class);
job.setOutputFormat(TeraOutputFormat.class);
job.setPartitionerClass(TotalOrderPartitioner.class);
TeraInputFormat.writePartitionFile(job, partitionFile);
DistributedCache.addCacheFile(partitionUri, job);
DistributedCache.createSymlink(job);
job.setInt("dfs.replication", 1);
TeraOutputFormat.setFinalSync(job, true);
JobClient.runJob(job);
LOG.info("done");
return 0;
}
示例5: downloadPrivateCache
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
* Download the parts of the distributed cache that are private.
* @param conf the job's configuration
* @throws IOException
* @return the size of the archive objects
*/
public static long[] downloadPrivateCache(Configuration conf) throws IOException {
long[] fileSizes = downloadPrivateCacheObjects(conf,
DistributedCache.getCacheFiles(conf),
DistributedCache.getLocalCacheFiles(conf),
DistributedCache.getFileTimestamps(conf),
TrackerDistributedCacheManager.
getFileVisibilities(conf),
false);
long[] archiveSizes = downloadPrivateCacheObjects(conf,
DistributedCache.getCacheArchives(conf),
DistributedCache.getLocalCacheArchives(conf),
DistributedCache.getArchiveTimestamps(conf),
TrackerDistributedCacheManager.
getArchiveVisibilities(conf),
true);
// The order here matters - it has to match order of cache files
// in TaskDistributedCacheManager.
return ArrayUtils.addAll(fileSizes, archiveSizes);
}
示例6: main
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "DiseaseApplication_format_4");
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB11_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB4_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ATP8B1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/JAG1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/SERPINA1_GENE.txt").toUri(), job.getConfiguration());
job.setJarByClass(DiseaseApplication_format_4.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
//JobClient.runJob(conf);
}
示例7: writeDocTopicInference
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
throws IOException, ClassNotFoundException, InterruptedException {
String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
log.info("About to run: " + jobName);
Job job = new Job(conf, jobName);
job.setMapperClass(CVB0DocInferenceMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(VectorWritable.class);
FileSystem fs = FileSystem.get(corpus.toUri(), conf);
if (modelInput != null && fs.exists(modelInput)) {
FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
URI[] modelUris = new URI[statuses.length];
for (int i = 0; i < statuses.length; i++) {
modelUris[i] = statuses[i].getPath().toUri();
}
DistributedCache.setCacheFiles(modelUris, conf);
}
FileInputFormat.addInputPath(job, corpus);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClass(CVB0Driver.class);
job.submit();
return job;
}
示例8: setup
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is word value is id
for (Pair<Writable, IntWritable> record
: new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().toString(), record.getSecond().get());
}
}
示例9: setup
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
maxDf = conf.getLong(TFIDFConverter.MAX_DF, -1);
sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is feature, value is the document frequency
for (Pair<IntWritable,LongWritable> record
: new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().get(), record.getSecond().get());
}
}
示例10: setup
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, -1);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is feature, value is the document frequency
for (Pair<IntWritable, LongWritable> record :
new SequenceFileIterable<IntWritable, LongWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().get(), record.getSecond().get());
}
}
示例11: addJars
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
* Add framework or job-specific jars to the classpath through DistributedCache
* so the mappers can use them.
*/
private void addJars(Path jarFileDir, String jarFileList) throws IOException {
LocalFileSystem lfs = FileSystem.getLocal(this.conf);
for (String jarFile : SPLITTER.split(jarFileList)) {
Path srcJarFile = new Path(jarFile);
FileStatus[] fileStatusList = lfs.globStatus(srcJarFile);
for (FileStatus status : fileStatusList) {
// DistributedCache requires absolute path, so we need to use makeQualified.
Path destJarFile = new Path(this.fs.makeQualified(jarFileDir), status.getPath().getName());
// Copy the jar file from local file system to HDFS
this.fs.copyFromLocalFile(status.getPath(), destJarFile);
// Then add the jar file on HDFS to the classpath
LOG.info(String.format("Adding %s to classpath", destJarFile));
DistributedCache.addFileToClassPath(destJarFile, this.conf, this.fs);
}
}
}
示例12: addLocalFiles
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
* Add local non-jar files the job depends on to DistributedCache.
*/
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
DistributedCache.createSymlink(this.conf);
for (String jobFile : SPLITTER.split(jobFileList)) {
Path srcJobFile = new Path(jobFile);
// DistributedCache requires absolute path, so we need to use makeQualified.
Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
// Copy the file from local file system to HDFS
this.fs.copyFromLocalFile(srcJobFile, destJobFile);
// Create a URI that is in the form path#symlink
URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
// Finally add the file to DistributedCache with a symlink named after the file name
DistributedCache.addCacheFile(destFileUri, this.conf);
}
}
示例13: addSolrConfToDistributedCache
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void addSolrConfToDistributedCache(Job job, File solrHomeZip)
throws IOException {
// Make a reasonably unique name for the zip file in the distributed cache
// to avoid collisions if multiple jobs are running.
String hdfsZipName = UUID.randomUUID().toString() + '.'
+ ZIP_FILE_BASE_NAME;
Configuration jobConf = job.getConfiguration();
jobConf.set(ZIP_NAME, hdfsZipName);
Path zipPath = new Path("/tmp", getZipName(jobConf));
FileSystem fs = FileSystem.get(jobConf);
fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath);
final URI baseZipUrl = fs.getUri().resolve(
zipPath.toString() + '#' + getZipName(jobConf));
DistributedCache.addCacheArchive(baseZipUrl, jobConf);
LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives()));
LOG.debug("Set zipPath: {}", zipPath);
// Actually send the path for the configuration zip file
jobConf.set(SETUP_OK, zipPath.toString());
}
示例14: configure
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public void configure(JobConf job) {
caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
inputFile = job.get("map.input.file");
if (job.getBoolean("wordcount.skip.patterns", false)) {
Path[] patternsFiles = new Path[0];
try {
patternsFiles = DistributedCache.getLocalCacheFiles(job);
} catch (IOException ioe) {
System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(ioe));
}
for (Path patternsFile : patternsFiles) {
parseSkipFile(patternsFile);
}
}
}
示例15: configure
import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
public void configure(Job job) throws IOException {
for (Path p : getLocalPaths()) {
Configuration conf = job.getConfiguration();
FileSystem jobFS = FileSystem.get(conf);
FileSystem localFS = FileSystem.getLocal(conf);
Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
DistributedCache.addFileToClassPath(stagedPath, conf, jobFS);
}
// We don't really need to set a mapred job jar here,
// but doing so suppresses a warning
String mj = getMapredJar();
if (null != mj)
job.getConfiguration().set(Hadoop1Compat.CFG_JOB_JAR, mj);
}