当前位置: 首页>>代码示例>>Java>>正文


Java DistributedCache.getCacheFiles方法代码示例

本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.getCacheFiles方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.getCacheFiles方法的具体用法?Java DistributedCache.getCacheFiles怎么用?Java DistributedCache.getCacheFiles使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.filecache.DistributedCache的用法示例。


在下文中一共展示了DistributedCache.getCacheFiles方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
  maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is word value is id
  for (Pair<Writable, IntWritable> record
          : new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().toString(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:21,代码来源:TFPartialVectorReducer.java

示例2: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1, 
      "missing paths from the DistributedCache");

  vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
  featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
  minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
  maxDf = conf.getLong(TFIDFConverter.MAX_DF, -1);
  sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
  namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable,LongWritable> record 
       : new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:23,代码来源:TFIDFPartialVectorReducer.java

示例3: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);
  Configuration conf = context.getConfiguration();
  URI[] localFiles = DistributedCache.getCacheFiles(conf);
  Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
          "missing paths from the DistributedCache");

  maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, -1);

  Path dictionaryFile = new Path(localFiles[0].getPath());
  // key is feature, value is the document frequency
  for (Pair<IntWritable, LongWritable> record :
          new SequenceFileIterable<IntWritable, LongWritable>(dictionaryFile, true, conf)) {
    dictionary.put(record.getFirst().get(), record.getSecond().get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:18,代码来源:WordsPrunerReducer.java

示例4: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
    super.setup(context);
    try {
        conf = context.getConfiguration();
        Plan.conf = conf;
        Config.read(Plan.conf);
        Tree code = Tree.parse(conf.get("mrql.reducer"));
        reduce_fnc = functional_argument(conf,code);
        code = Tree.parse(conf.get("mrql.mapper"));
        map_fnc = functional_argument(conf,code);
        if (conf.get("mrql.zero") != null) {
            code = Tree.parse(conf.get("mrql.zero"));
            result = Interpreter.evalE(code);
            code = Tree.parse(conf.get("mrql.accumulator"));
            acc_fnc = functional_argument(conf,code);
        } else result = null;
        counter = conf.get("mrql.counter");
        uris = DistributedCache.getCacheFiles(conf);
        local_paths = DistributedCache.getLocalCacheFiles(conf);
        index = 0;
    } catch (Exception e) {
        throw new Error("Cannot setup the crossProduct: "+e);
    }
}
 
开发者ID:apache,项目名称:incubator-mrql,代码行数:26,代码来源:CrossProductOperation.java

示例5: getCachedFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static Path[] getCachedFiles(Configuration conf) throws IOException {
  LocalFileSystem localFs = FileSystem.getLocal(conf);
  Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);

  URI[] fallbackFiles = DistributedCache.getCacheFiles(conf);

  // fallback for local execution
  if (cacheFiles == null) {

    Preconditions.checkState(fallbackFiles != null, "Unable to find cached files!");

    cacheFiles = new Path[fallbackFiles.length];
    for (int n = 0; n < fallbackFiles.length; n++) {
      cacheFiles[n] = new Path(fallbackFiles[n].getPath());
    }
  } else {

    for (int n = 0; n < cacheFiles.length; n++) {
      cacheFiles[n] = localFs.makeQualified(cacheFiles[n]);
      // fallback for local execution
      if (!localFs.exists(cacheFiles[n])) {
        cacheFiles[n] = new Path(fallbackFiles[n].getPath());
      }
    }
  }

  Preconditions.checkState(cacheFiles.length > 0, "Unable to find cached files!");

  return cacheFiles;
}
 
开发者ID:huyang1,项目名称:LDA,代码行数:31,代码来源:HadoopUtil.java

示例6: load

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Loads the vector from {@link DistributedCache}. Returns null if no vector exists.
 */
public static Vector load(Configuration conf) throws IOException {
  URI[] files = DistributedCache.getCacheFiles(conf);
  if (files == null || files.length < 1) {
    return null;
  }
  log.info("Files are: {}", Arrays.toString(files));
  return load(conf, new Path(files[0].getPath()));
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:12,代码来源:VectorCache.java

示例7: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);    //To change body of overridden methods use File | Settings | File Templates.

  Configuration conf = context.getConfiguration();

  URI[] files = DistributedCache.getCacheFiles(conf);

  if (files == null || files.length < 2) {
    throw new IOException("not enough paths in the DistributedCache");
  }
  
  dataset = Dataset.load(conf, new Path(files[0].getPath()));

  converter = new DataConverter(dataset);
  
  ruleBase = RuleBase.load(conf, new Path(files[1].getPath()));  
  
  if (ruleBase == null) {
    throw new InterruptedException("Model not found!");
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:23,代码来源:Chi_RWClassifier.java

示例8: open

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static InputStream open(String resource, Configuration conf) {
    ClassLoader loader = conf.getClassLoader();

    if (loader == null) {
        loader = Thread.currentThread().getContextClassLoader();
    }

    if (loader == null) {
        loader = HadoopIOUtils.class.getClassLoader();
    }

    boolean trace = log.isTraceEnabled();

    try {
        // no prefix means classpath
        if (!resource.contains(":")) {

            InputStream result = loader.getResourceAsStream(resource);
            if (result != null) {
                if (trace) {
                    log.trace(String.format("Loaded resource %s from classpath", resource));
                }
                return result;
            }
            // fall back to the distributed cache
            URI[] uris = DistributedCache.getCacheFiles(conf);
            if (uris != null) {
                for (URI uri : uris) {
                    if (uri.toString().contains(resource)) {
                        if (trace) {
                            log.trace(String.format("Loaded resource %s from distributed cache", resource));
                        }
                        return uri.toURL().openStream();
                    }
                }
            }
        }

        // fall back to file system
        Path p = new Path(resource);
        FileSystem fs = p.getFileSystem(conf);
        return fs.open(p);
    } catch (IOException ex) {
        throw new EsHadoopIllegalArgumentException(String.format("Cannot open stream for resource %s", resource));
    }
}
 
开发者ID:xushjie1987,项目名称:es-hadoop-v2.2.0,代码行数:47,代码来源:HadoopIOUtils.java

示例9: setupPipesJob

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static void setupPipesJob(JobConf conf) throws IOException {
  // default map output types to Text
  if (!getIsJavaMapper(conf)) {
    conf.setMapRunnerClass(PipesMapRunner.class);
    // Save the user's partitioner and hook in our's.
    setJavaPartitioner(conf, conf.getPartitionerClass());
    conf.setPartitionerClass(PipesPartitioner.class);
  }
  if (!getIsJavaReducer(conf)) {
    conf.setReducerClass(PipesReducer.class);
    if (!getIsJavaRecordWriter(conf)) {
      conf.setOutputFormat(NullOutputFormat.class);
    }
  }
  String textClassname = Text.class.getName();
  setIfUnset(conf, "mapred.mapoutput.key.class", textClassname);
  setIfUnset(conf, "mapred.mapoutput.value.class", textClassname);
  setIfUnset(conf, "mapred.output.key.class", textClassname);
  setIfUnset(conf, "mapred.output.value.class", textClassname);
  
  // Use PipesNonJavaInputFormat if necessary to handle progress reporting
  // from C++ RecordReaders ...
  if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
    conf.setClass("mapred.pipes.user.inputformat", 
                  conf.getInputFormat().getClass(), InputFormat.class);
    conf.setInputFormat(PipesNonJavaInputFormat.class);
  }
  
  String exec = getExecutable(conf);
  if (exec == null) {
    throw new IllegalArgumentException("No application program defined.");
  }
  // add default debug script only when executable is expressed as
  // <path>#<executable>
  if (exec.contains("#")) {
    DistributedCache.createSymlink(conf);
    // set default gdb commands for map and reduce task 
    String defScript = "$HADOOP_HOME/src/c++/pipes/debug/pipes-default-script";
    setIfUnset(conf,"mapred.map.task.debug.script",defScript);
    setIfUnset(conf,"mapred.reduce.task.debug.script",defScript);
  }
  URI[] fileCache = DistributedCache.getCacheFiles(conf);
  if (fileCache == null) {
    fileCache = new URI[1];
  } else {
    URI[] tmp = new URI[fileCache.length+1];
    System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
    fileCache = tmp;
  }
  try {
    fileCache[0] = new URI(exec);
  } catch (URISyntaxException e) {
    IOException ie = new IOException("Problem parsing execable URI " + exec);
    ie.initCause(e);
    throw ie;
  }
  DistributedCache.setCacheFiles(fileCache, conf);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:59,代码来源:Submitter.java

示例10: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
  super.setup(context);    //To change body of overridden methods use File | Settings | File Templates.

  Configuration conf = context.getConfiguration();

  URI[] files = DistributedCache.getCacheFiles(conf);

  if (files == null || files.length < 2) {
    throw new IOException("not enough paths in the DistributedCache");
  }
  
  dataset = Dataset.load(conf, new Path(files[0].getPath()));

  converter = new DataConverter(dataset);

  ruleBase = RuleBase.load(conf, new Path(files[1].getPath()));  
  
  if (ruleBase == null) {
    throw new InterruptedException("Model not found!");
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:23,代码来源:Chi_RWCSClassifier.java

示例11: getDistributedCacheFile

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Helper method. Get a path from the DistributedCache
 * 
 * @param conf
 *          configuration
 * @param index
 *          index of the path in the DistributedCache files
 * @return path from the DistributedCache
 * @throws IOException
 *           if no path is found
 */
public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException {
  URI[] files = DistributedCache.getCacheFiles(conf);
  
  if (files == null || files.length <= index) {
    throw new IOException("path not found in the DistributedCache");
  }
  
  return new Path(files[index].getPath());
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:21,代码来源:Builder.java

示例12: getCacheFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
 * Get cache files set in the Configuration
 * @return A URI array of the files set in the Configuration
 * @throws IOException
 */

public URI[] getCacheFiles() throws IOException {
  return DistributedCache.getCacheFiles(conf);
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:10,代码来源:JobContextImpl.java


注:本文中的org.apache.hadoop.filecache.DistributedCache.getCacheFiles方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。