当前位置: 首页>>代码示例>>Java>>正文


Java DistributedCache类代码示例

本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache类的具体用法?Java DistributedCache怎么用?Java DistributedCache使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DistributedCache类属于org.apache.hadoop.filecache包,在下文中一共展示了DistributedCache类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configure

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public void configure (JobConf job)
{
	try {
		pages = job.getLong("pages", 0);
		slots = job.getLong("slots", 0);
		visits = job.getLong("visits", 0);
		delim = job.get("delimiter");
	
		visit = new Visit(DistributedCache.getLocalCacheFiles(job),
				delim, pages);
		
		vitem = new JoinBytesInt();
		vitem.refs = 1;
	
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
开发者ID:thrill,项目名称:fst-bench,代码行数:19,代码来源:HiveData.java

示例2: main

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void main(String args[]) throws IOException,InterruptedException, ClassNotFoundException, URISyntaxException {

		Configuration conf = new Configuration();
		String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
		conf.set("cachefile", otherArgs[0]);
		if (otherArgs.length != 3) {
			System.err.println("Usage: Question4 <cacheFile> <in> <out>");
			System.exit(3);
		}
		Job job = new Job(conf, "Question4");
		DistributedCache.addCacheFile(new URI(args[0]), conf);

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(FloatWritable.class); 
		job.setJarByClass(Question4.class);
		job.setMapperClass(Map.class);
		job.setReducerClass(Reduce.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);

		FileInputFormat.addInputPath(job, new Path(otherArgs[1]));
		FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));
		job.waitForCompletion(true);
	}
 
开发者ID:BhargaviRavula,项目名称:Bigdata,代码行数:26,代码来源:Question4.java

示例3: setup

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@SuppressWarnings("deprecation")
@BeforeClass
public static void setup() throws Exception {
    if (!isLocal) {
        hadoopConfig = HdpBootstrap.hadoopConfig();

        HdfsUtils.copyFromLocal(Provisioner.ESHADOOP_TESTING_JAR, Provisioner.HDFS_ES_HDP_LIB);
        hdfsEsLib = HdfsUtils.qualify(Provisioner.HDFS_ES_HDP_LIB, hadoopConfig);
        // copy jar to DistributedCache
        try {
            DistributedCache.addArchiveToClassPath(new Path(Provisioner.HDFS_ES_HDP_LIB), hadoopConfig);
        } catch (IOException ex) {
            throw new RuntimeException("Cannot provision Hive", ex);
        }

        hdfsResource = "/eshdp/hive/hive-compund.dat";
        HdfsUtils.copyFromLocal(originalResource, hdfsResource);
        hdfsResource = HdfsUtils.qualify(hdfsResource, hadoopConfig);

        hdfsJsonResource = "/eshdp/hive/hive-compund.json";
        HdfsUtils.copyFromLocal(originalResource, hdfsJsonResource);
        hdfsJsonResource = HdfsUtils.qualify(hdfsJsonResource, hadoopConfig);
    }
}
 
开发者ID:xushjie1987,项目名称:es-hadoop-v2.2.0,代码行数:25,代码来源:HiveSuite.java

示例4: run

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public int run(String[] args) throws Exception {
  LOG.info("starting");
  JobConf job = (JobConf) getConf();
  Path inputDir = new Path(args[0]);
  inputDir = inputDir.makeQualified(inputDir.getFileSystem(job));
  Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME);
  URI partitionUri = new URI(partitionFile.toString() +
                             "#" + TeraInputFormat.PARTITION_FILENAME);
  TeraInputFormat.setInputPaths(job, new Path(args[0]));
  FileOutputFormat.setOutputPath(job, new Path(args[1]));
  job.setJobName("TeraSort");
  job.setJarByClass(TeraSort.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormat(TeraInputFormat.class);
  job.setOutputFormat(TeraOutputFormat.class);
  job.setPartitionerClass(TotalOrderPartitioner.class);
  TeraInputFormat.writePartitionFile(job, partitionFile);
  DistributedCache.addCacheFile(partitionUri, job);
  DistributedCache.createSymlink(job);
  job.setInt("dfs.replication", 1);
  TeraOutputFormat.setFinalSync(job, true);
  JobClient.runJob(job);
  LOG.info("done");
  return 0;
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:27,代码来源:TeraSort.java

示例5: downloadPrivateCache

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
 * Download the parts of the distributed cache that are private.
 * @param conf the job's configuration
 * @throws IOException
 * @return the size of the archive objects
 */
public static long[] downloadPrivateCache(Configuration conf) throws IOException {
  long[] fileSizes = downloadPrivateCacheObjects(conf,
                              DistributedCache.getCacheFiles(conf),
                              DistributedCache.getLocalCacheFiles(conf),
                              DistributedCache.getFileTimestamps(conf),
                              TrackerDistributedCacheManager.
                                getFileVisibilities(conf),
                              false);

  long[] archiveSizes = downloadPrivateCacheObjects(conf,
                                DistributedCache.getCacheArchives(conf),
                                DistributedCache.getLocalCacheArchives(conf),
                                DistributedCache.getArchiveTimestamps(conf),
                                TrackerDistributedCacheManager.
                                  getArchiveVisibilities(conf),
                                true);

  // The order here matters - it has to match order of cache files
  // in TaskDistributedCacheManager.
  return ArrayUtils.addAll(fileSizes, archiveSizes);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:28,代码来源:JobLocalizer.java

示例6: main

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();

Job job = new Job(conf, "DiseaseApplication_format_4");

DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB11_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ABCB4_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/ATP8B1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/JAG1_GENE.txt").toUri(), job.getConfiguration());
DistributedCache.addCacheFile(new Path("/user/brfilho/generef/SERPINA1_GENE.txt").toUri(), job.getConfiguration());


job.setJarByClass(DiseaseApplication_format_4.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
	
   FileInputFormat.setInputPaths(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));

System.exit(job.waitForCompletion(true) ? 0 : 1);

   //JobClient.runJob(conf);
 }
 
开发者ID:GeneticMapping,项目名称:GSM,代码行数:26,代码来源:Backup_2_DiseaseApplication_format_4.java

示例7: writeDocTopicInference

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
private static Job writeDocTopicInference(Configuration conf, Path corpus, Path modelInput, Path output)
  throws IOException, ClassNotFoundException, InterruptedException {
  String jobName = String.format("Writing final document/topic inference from %s to %s", corpus, output);
  log.info("About to run: " + jobName);
  Job job = new Job(conf, jobName);
  job.setMapperClass(CVB0DocInferenceMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(IntWritable.class);
  job.setOutputValueClass(VectorWritable.class);
  FileSystem fs = FileSystem.get(corpus.toUri(), conf);
  if (modelInput != null && fs.exists(modelInput)) {
    FileStatus[] statuses = fs.listStatus(modelInput, PathFilters.partFilter());
    URI[] modelUris = new URI[statuses.length];
    for (int i = 0; i < statuses.length; i++) {
      modelUris[i] = statuses[i].getPath().toUri();
    }
    DistributedCache.setCacheFiles(modelUris, conf);
  }
  FileInputFormat.addInputPath(job, corpus);
  FileOutputFormat.setOutputPath(job, output);
  job.setJarByClass(CVB0Driver.class);
  job.submit();
  return job;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:27,代码来源:CVB0Driver.java

示例8: setup

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
  maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is word value is id
  for (Pair<Writable, IntWritable> record
          : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().toString(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:21,代码来源:TFPartialVectorReducer.java

示例9: setup

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1, 
      "missing paths from the DistributedCache");

  vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
  featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
  minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
  maxDf = conf.getLong(TFIDFConverter.MAX_DF, -1);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable,LongWritable> record 
       : new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:23,代码来源:TFIDFPartialVectorReducer.java

示例10: setup

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, -1);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable, LongWritable> record :
          new SequenceFileIterable<IntWritable, LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:18,代码来源:WordsPrunerReducer.java

示例11: addJars

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
 * Add framework or job-specific jars to the classpath through DistributedCache
 * so the mappers can use them.
 */
private void addJars(Path jarFileDir, String jarFileList) throws IOException {
  LocalFileSystem lfs = FileSystem.getLocal(this.conf);
  for (String jarFile : SPLITTER.split(jarFileList)) {
    Path srcJarFile = new Path(jarFile);
    FileStatus[] fileStatusList = lfs.globStatus(srcJarFile);
    for (FileStatus status : fileStatusList) {
      // DistributedCache requires absolute path, so we need to use makeQualified.
      Path destJarFile = new Path(this.fs.makeQualified(jarFileDir), status.getPath().getName());
      // Copy the jar file from local file system to HDFS
      this.fs.copyFromLocalFile(status.getPath(), destJarFile);
      // Then add the jar file on HDFS to the classpath
      LOG.info(String.format("Adding %s to classpath", destJarFile));
      DistributedCache.addFileToClassPath(destJarFile, this.conf, this.fs);
    }
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:21,代码来源:MRJobLauncher.java

示例12: addLocalFiles

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
/**
 * Add local non-jar files the job depends on to DistributedCache.
 */
private void addLocalFiles(Path jobFileDir, String jobFileList) throws IOException {
  DistributedCache.createSymlink(this.conf);
  for (String jobFile : SPLITTER.split(jobFileList)) {
    Path srcJobFile = new Path(jobFile);
    // DistributedCache requires absolute path, so we need to use makeQualified.
    Path destJobFile = new Path(this.fs.makeQualified(jobFileDir), srcJobFile.getName());
    // Copy the file from local file system to HDFS
    this.fs.copyFromLocalFile(srcJobFile, destJobFile);
    // Create a URI that is in the form path#symlink
    URI destFileUri = URI.create(destJobFile.toUri().getPath() + "#" + destJobFile.getName());
    LOG.info(String.format("Adding %s to DistributedCache", destFileUri));
    // Finally add the file to DistributedCache with a symlink named after the file name
    DistributedCache.addCacheFile(destFileUri, this.conf);
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:19,代码来源:MRJobLauncher.java

示例13: addSolrConfToDistributedCache

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public static void addSolrConfToDistributedCache(Job job, File solrHomeZip)
    throws IOException {
  // Make a reasonably unique name for the zip file in the distributed cache
  // to avoid collisions if multiple jobs are running.
  String hdfsZipName = UUID.randomUUID().toString() + '.'
      + ZIP_FILE_BASE_NAME;
  Configuration jobConf = job.getConfiguration();
  jobConf.set(ZIP_NAME, hdfsZipName);

  Path zipPath = new Path("/tmp", getZipName(jobConf));
  FileSystem fs = FileSystem.get(jobConf);
  fs.copyFromLocalFile(new Path(solrHomeZip.toString()), zipPath);
  final URI baseZipUrl = fs.getUri().resolve(
      zipPath.toString() + '#' + getZipName(jobConf));

  DistributedCache.addCacheArchive(baseZipUrl, jobConf);
  LOG.debug("Set Solr distributed cache: {}", Arrays.asList(job.getCacheArchives()));
  LOG.debug("Set zipPath: {}", zipPath);
  // Actually send the path for the configuration zip file
  jobConf.set(SETUP_OK, zipPath.toString());
}
 
开发者ID:europeana,项目名称:search,代码行数:22,代码来源:SolrOutputFormat.java

示例14: configure

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
public void configure(JobConf job) {
  caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
  inputFile = job.get("map.input.file");

  if (job.getBoolean("wordcount.skip.patterns", false)) {
    Path[] patternsFiles = new Path[0];
    try {
      patternsFiles = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException ioe) {
      System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(ioe));
    }
    for (Path patternsFile : patternsFiles) {
      parseSkipFile(patternsFile);
    }
  }
}
 
开发者ID:qubole,项目名称:qubole-jar-test,代码行数:17,代码来源:WordCount.java

示例15: configure

import org.apache.hadoop.filecache.DistributedCache; //导入依赖的package包/类
@Override
public void configure(Job job) throws IOException {

    for (Path p : getLocalPaths()) {
        Configuration conf = job.getConfiguration();
        FileSystem jobFS = FileSystem.get(conf);
        FileSystem localFS = FileSystem.getLocal(conf);
        Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
        DistributedCache.addFileToClassPath(stagedPath, conf, jobFS);
    }

    // We don't really need to set a mapred job jar here,
    // but doing so suppresses a warning
    String mj = getMapredJar();
    if (null != mj)
        job.getConfiguration().set(Hadoop1Compat.CFG_JOB_JAR, mj);
}
 
开发者ID:graben1437,项目名称:titan1.0.1.kafka,代码行数:18,代码来源:DistCacheConfigurer.java


注:本文中的org.apache.hadoop.filecache.DistributedCache类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。