当前位置: 首页>>代码示例>>Java>>正文


Java DistributedCache.getLocalCacheFiles方法代码示例

本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.getLocalCacheFiles方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.getLocalCacheFiles方法的具体用法?Java DistributedCache.getLocalCacheFiles怎么用?Java DistributedCache.getLocalCacheFiles使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.filecache.DistributedCache的用法示例。


在下文中一共展示了DistributedCache.getLocalCacheFiles方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void configure (JobConf job)
{
	try {
		pages = job.getLong("pages", 0);
		slots = job.getLong("slots", 0);
		visits = job.getLong("visits", 0);
		delim = job.get("delimiter");
	
		visit = new Visit(DistributedCache.getLocalCacheFiles(job),
				delim, pages);
		
		vitem = new JoinBytesInt();
		vitem.refs = 1;
	
	} catch (IOException e) {
		e.printStackTrace();
	}
}
 
开发者ID:thrill,项目名称:fst-bench,代码行数:19,代码来源:HiveData.java

示例2: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void configure(JobConf job) {
  caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
  inputFile = job.get("map.input.file");

  if (job.getBoolean("wordcount.skip.patterns", false)) {
    Path[] patternsFiles = new Path[0];
    try {
      patternsFiles = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException ioe) {
      System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(ioe));
    }
    for (Path patternsFile : patternsFiles) {
      parseSkipFile(patternsFile);
    }
  }
}
 
开发者ID:qubole,项目名称:qubole-jar-test,代码行数:17,代码来源:WordCount.java

示例3: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    LOG.info("Configure START");
    //
    // read join index
    //
    Path[] cache = null;
    try {
        cache = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (cache == null) {
        addJoinIndex(new Path(
                job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
    } else {
        for (Path path : cache) {
            addJoinIndex(path);
        }
    }
    LOG.info("Configure END");
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:23,代码来源:MapBroadcastSelfJoin.java

示例4: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
    super.setup(context);
    try {
        conf = context.getConfiguration();
        Plan.conf = conf;
        Config.read(Plan.conf);
        Tree code = Tree.parse(conf.get("mrql.reducer"));
        reduce_fnc = functional_argument(conf,code);
        code = Tree.parse(conf.get("mrql.mapper"));
        map_fnc = functional_argument(conf,code);
        if (conf.get("mrql.zero") != null) {
            code = Tree.parse(conf.get("mrql.zero"));
            result = Interpreter.evalE(code);
            code = Tree.parse(conf.get("mrql.accumulator"));
            acc_fnc = functional_argument(conf,code);
        } else result = null;
        counter = conf.get("mrql.counter");
        uris = DistributedCache.getCacheFiles(conf);
        local_paths = DistributedCache.getLocalCacheFiles(conf);
        index = 0;
    } catch (Exception e) {
        throw new Error("Cannot setup the crossProduct: "+e);
    }
}
 
开发者ID:apache,项目名称:incubator-mrql,代码行数:26,代码来源:CrossProductOperation.java

示例5: getCachedFiles

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static Path[] getCachedFiles(Configuration conf) throws IOException {
  LocalFileSystem localFs = FileSystem.getLocal(conf);
  Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);

  URI[] fallbackFiles = DistributedCache.getCacheFiles(conf);

  // fallback for local execution
  if (cacheFiles == null) {

    Preconditions.checkState(fallbackFiles != null, "Unable to find cached files!");

    cacheFiles = new Path[fallbackFiles.length];
    for (int n = 0; n < fallbackFiles.length; n++) {
      cacheFiles[n] = new Path(fallbackFiles[n].getPath());
    }
  } else {

    for (int n = 0; n < cacheFiles.length; n++) {
      cacheFiles[n] = localFs.makeQualified(cacheFiles[n]);
      // fallback for local execution
      if (!localFs.exists(cacheFiles[n])) {
        cacheFiles[n] = new Path(fallbackFiles[n].getPath());
      }
    }
  }

  Preconditions.checkState(cacheFiles.length > 0, "Unable to find cached files!");

  return cacheFiles;
}
 
开发者ID:huyang1,项目名称:LDA,代码行数:31,代码来源:HadoopUtil.java

示例6: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure( JobConf job)
{
    try
    {
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job) ;
        setupStateMap( cacheFiles[0].toString()) ;
        } catch (IOException e)
        {
            System.err.println("Error reading state file.") ;
            System.exit(1) ;
        }
    }
 
开发者ID:PacktPublishing,项目名称:Data-Science-with-Hadoop,代码行数:14,代码来源:UFOLocation2.java

示例7: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
   protected void setup(Context context) throws IOException, InterruptedException {

   Path[] cache = DistributedCache.getLocalCacheFiles(context.getConfiguration());
   	
this.conf = new HDTBuilderConfiguration(context.getConfiguration());
CountInputStream input = new CountInputStream(new BufferedInputStream(new FileInputStream(cache[0].toString())));
File file = new File(cache[0].toString());
this.dictionary = new FourSectionDictionary(this.conf.getSpec());
this.dictionary.mapFromFile(input, file, this);
input.close();

// DEBUG
// ((PFCDictionarySection) this.dictionary.getShared()).dumpAll();
   }
 
开发者ID:rdfhdt,项目名称:hdt-mr,代码行数:16,代码来源:TriplesMapper.java

示例8: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(
    Context context)
    throws IOException, InterruptedException {

  Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
  filter = BloomFilterDumper.fromFile(
      new File(files[0].toString()));

  System.out.println("Filter = " + filter);
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:12,代码来源:BloomJoin.java

示例9: map

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
    
    
    //Get from configuration the path to the stopwords file and create a StopWords object for filtering
    Configuration conf = context.getConfiguration();
    Path[] filename = DistributedCache.getLocalCacheFiles(conf);
    StopWords stopwords = new StopWords(filename[0].toString());
    
    //get filename of the current input and add it to Text variable id
    FileSplit fileSplit = (FileSplit)context.getInputSplit();
    String fileId = fileSplit.getPath().getName();
    id = new Text(fileId);
    
    //remove xml tags
    String new_value = value.toString().replaceAll("<.*?>", "");
            
    //split the line into words,remove punctuation and separate each word
    String[] words = new_value.replaceAll("[^a-zA-Z ]", " ").toLowerCase().split(" ");
    
    //check each word if it is a stopword and emit to reducer those who are not
    for (String str : words) { //filter each word
        if(!stopwords.contains(str) && str.length()>0)
        {//str is not a stopword 
            final_word = new Text(str);
            //emit the word and the id of the file it belongs to
            context.write(final_word, id); 
        }
    }
}
 
开发者ID:efikalti,项目名称:Inverted-KMeans-Hadoop,代码行数:31,代码来源:InvertedIndexMap.java

示例10: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    //
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(job.get(
            FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
            similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil.getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY,
            FuzzyJoinConfig.RECORD_DATA_VALUE));
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:38,代码来源:MapSelfJoin.java

示例11: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    LOG.info("Configure START");
    //
    // read join index
    //
    Path[] cache = null;
    try {
        cache = DistributedCache.getLocalCacheFiles(job);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    if (cache == null) {
        addJoinIndex(new Path(
                job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
    } else {
        for (Path path : cache) {
            addJoinIndex(path);
        }
    }
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
    LOG.info("Configure END");
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:28,代码来源:MapBroadcastJoin.java

示例12: setup

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
 public void setup(Context context) throws IOException, InterruptedException{
 	this.word = null;
   this.IDs = null;
   this.codonID = 1;
   this.currFileOffSet = 0;

this.localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
 }
 
开发者ID:GeneticMapping,项目名称:GSM,代码行数:10,代码来源:Backup_2_DiseaseApplication_format_4.java

示例13: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    //
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(job.get(
            FuzzyJoinConfig.TOKENIZER_PROPERTY,
            FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
            FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(
            FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
            similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job
                    .get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil.getDataColumns(job.get(
            FuzzyJoinConfig.RECORD_DATA_PROPERTY,
            FuzzyJoinConfig.RECORD_DATA_VALUE));
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:43,代码来源:MapSelfJoin.java

示例14: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    //
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(job.get(
            FuzzyJoinConfig.TOKENIZER_PROPERTY,
            FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
            FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(
            FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
            similarityName, similarityThreshold);
    //
    // set TokenRank
    //
    Path tokensPath;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job
                    .get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokensPath = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil.getDataColumns(job.get(
            FuzzyJoinConfig.RECORD_DATA_PROPERTY,
            FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil.getDataColumns(job.get(
            FuzzyJoinConfig.RECORD_DATA1_PROPERTY,
            FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:51,代码来源:MapJoin.java

示例15: configure

import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
    //
    // create RecordGenerator
    //
    int offset = job.getInt(FuzzyJoinDriver.DATA_CRTCOPY_PROPERTY, -1);
    if (offset == -1) {
        System.err.println("ERROR: fuzzyjoin.data.crtcopy not set.");
        System.exit(-1);
    }
    recordGenerator = new RecordGenerator(offset);
    int noRecords = job.getInt(FuzzyJoinDriver.DATA_NORECORDS_PROPERTY, -1);
    if (noRecords == -1) {
        System.err.println("ERROR: fuzzyjoin.data.norecords not set.");
        System.exit(-1);
    }
    offsetRID = offset * noRecords;
    int dictionaryFactor = job.getInt(
            FuzzyJoinDriver.DATA_DICTIONARY_FACTOR_PROPERTY, 1);
    //
    // set RecordGenerator
    //
    Path tokenRankFile;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokenRankFile = new Path(
                    job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
        } else {
            tokenRankFile = cache[0];
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    TokenLoad tokenLoad = new TokenLoad(tokenRankFile.toString(),
            recordGenerator);
    tokenLoad.loadTokenRank(dictionaryFactor);
    //
    // set Tokenizer
    //
    tokenizer = TokenizerFactory.getTokenizer(job.get(
            FuzzyJoinConfig.TOKENIZER_PROPERTY,
            FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
            FuzzyJoinConfig.TOKEN_SEPARATOR);
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil.getDataColumns(job.get(
            FuzzyJoinConfig.RECORD_DATA_PROPERTY,
            FuzzyJoinConfig.RECORD_DATA_VALUE));
    // Arrays.sort(dataColumns);
}
 
开发者ID:TonyApuzzo,项目名称:fuzzyjoin,代码行数:54,代码来源:MapNewRecord.java


注:本文中的org.apache.hadoop.filecache.DistributedCache.getLocalCacheFiles方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。