本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.getLocalCacheFiles方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.getLocalCacheFiles方法的具体用法?Java DistributedCache.getLocalCacheFiles怎么用?Java DistributedCache.getLocalCacheFiles使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.filecache.DistributedCache
的用法示例。
在下文中一共展示了DistributedCache.getLocalCacheFiles方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
slots = job.getLong("slots", 0);
visits = job.getLong("visits", 0);
delim = job.get("delimiter");
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
vitem = new JoinBytesInt();
vitem.refs = 1;
} catch (IOException e) {
e.printStackTrace();
}
}
示例2: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public void configure(JobConf job) {
caseSensitive = job.getBoolean("wordcount.case.sensitive", true);
inputFile = job.get("map.input.file");
if (job.getBoolean("wordcount.skip.patterns", false)) {
Path[] patternsFiles = new Path[0];
try {
patternsFiles = DistributedCache.getLocalCacheFiles(job);
} catch (IOException ioe) {
System.err.println("Caught exception while getting cached files: " + StringUtils.stringifyException(ioe));
}
for (Path patternsFile : patternsFiles) {
parseSkipFile(patternsFile);
}
}
}
示例3: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
LOG.info("Configure START");
//
// read join index
//
Path[] cache = null;
try {
cache = DistributedCache.getLocalCacheFiles(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (cache == null) {
addJoinIndex(new Path(
job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
} else {
for (Path path : cache) {
addJoinIndex(path);
}
}
LOG.info("Configure END");
}
示例4: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
super.setup(context);
try {
conf = context.getConfiguration();
Plan.conf = conf;
Config.read(Plan.conf);
Tree code = Tree.parse(conf.get("mrql.reducer"));
reduce_fnc = functional_argument(conf,code);
code = Tree.parse(conf.get("mrql.mapper"));
map_fnc = functional_argument(conf,code);
if (conf.get("mrql.zero") != null) {
code = Tree.parse(conf.get("mrql.zero"));
result = Interpreter.evalE(code);
code = Tree.parse(conf.get("mrql.accumulator"));
acc_fnc = functional_argument(conf,code);
} else result = null;
counter = conf.get("mrql.counter");
uris = DistributedCache.getCacheFiles(conf);
local_paths = DistributedCache.getLocalCacheFiles(conf);
index = 0;
} catch (Exception e) {
throw new Error("Cannot setup the crossProduct: "+e);
}
}
示例5: getCachedFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static Path[] getCachedFiles(Configuration conf) throws IOException {
LocalFileSystem localFs = FileSystem.getLocal(conf);
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
URI[] fallbackFiles = DistributedCache.getCacheFiles(conf);
// fallback for local execution
if (cacheFiles == null) {
Preconditions.checkState(fallbackFiles != null, "Unable to find cached files!");
cacheFiles = new Path[fallbackFiles.length];
for (int n = 0; n < fallbackFiles.length; n++) {
cacheFiles[n] = new Path(fallbackFiles[n].getPath());
}
} else {
for (int n = 0; n < cacheFiles.length; n++) {
cacheFiles[n] = localFs.makeQualified(cacheFiles[n]);
// fallback for local execution
if (!localFs.exists(cacheFiles[n])) {
cacheFiles[n] = new Path(fallbackFiles[n].getPath());
}
}
}
Preconditions.checkState(cacheFiles.length > 0, "Unable to find cached files!");
return cacheFiles;
}
示例6: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure( JobConf job)
{
try
{
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job) ;
setupStateMap( cacheFiles[0].toString()) ;
} catch (IOException e)
{
System.err.println("Error reading state file.") ;
System.exit(1) ;
}
}
示例7: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Path[] cache = DistributedCache.getLocalCacheFiles(context.getConfiguration());
this.conf = new HDTBuilderConfiguration(context.getConfiguration());
CountInputStream input = new CountInputStream(new BufferedInputStream(new FileInputStream(cache[0].toString())));
File file = new File(cache[0].toString());
this.dictionary = new FourSectionDictionary(this.conf.getSpec());
this.dictionary.mapFromFile(input, file, this);
input.close();
// DEBUG
// ((PFCDictionarySection) this.dictionary.getShared()).dumpAll();
}
示例8: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(
Context context)
throws IOException, InterruptedException {
Path[] files = DistributedCache.getLocalCacheFiles(context.getConfiguration());
filter = BloomFilterDumper.fromFile(
new File(files[0].toString()));
System.out.println("Filter = " + filter);
}
示例9: map
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//Get from configuration the path to the stopwords file and create a StopWords object for filtering
Configuration conf = context.getConfiguration();
Path[] filename = DistributedCache.getLocalCacheFiles(conf);
StopWords stopwords = new StopWords(filename[0].toString());
//get filename of the current input and add it to Text variable id
FileSplit fileSplit = (FileSplit)context.getInputSplit();
String fileId = fileSplit.getPath().getName();
id = new Text(fileId);
//remove xml tags
String new_value = value.toString().replaceAll("<.*?>", "");
//split the line into words,remove punctuation and separate each word
String[] words = new_value.replaceAll("[^a-zA-Z ]", " ").toLowerCase().split(" ");
//check each word if it is a stopword and emit to reducer those who are not
for (String str : words) { //filter each word
if(!stopwords.contains(str) && str.length()>0)
{//str is not a stopword
final_word = new Text(str);
//emit the word and the id of the file it belongs to
context.write(final_word, id);
}
}
}
示例10: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
//
// set Tokenizer and SimilarityFilters
//
tokenizer = TokenizerFactory.getTokenizer(job.get(
FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
float similarityThreshold = job.getFloat(
FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
similarityName, similarityThreshold);
//
// set TokenRank
//
Path tokensPath;
try {
Path[] cache = DistributedCache.getLocalCacheFiles(job);
if (cache == null) {
tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
} else {
tokensPath = cache[0];
}
} catch (IOException e) {
throw new RuntimeException(e);
}
TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
tokenLoad.loadTokenRank();
//
// set dataColumn
//
dataColumns = FuzzyJoinUtil.getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY,
FuzzyJoinConfig.RECORD_DATA_VALUE));
}
示例11: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
LOG.info("Configure START");
//
// read join index
//
Path[] cache = null;
try {
cache = DistributedCache.getLocalCacheFiles(job);
} catch (IOException e) {
throw new RuntimeException(e);
}
if (cache == null) {
addJoinIndex(new Path(
job.get(FuzzyJoinDriver.DATA_JOININDEX_PROPERTY)));
} else {
for (Path path : cache) {
addJoinIndex(path);
}
}
//
// get suffix for second relation
//
suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
.split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
LOG.info("Configure END");
}
示例12: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void setup(Context context) throws IOException, InterruptedException{
this.word = null;
this.IDs = null;
this.codonID = 1;
this.currFileOffSet = 0;
this.localFiles = DistributedCache.getLocalCacheFiles(context.getConfiguration());
}
示例13: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
//
// set Tokenizer and SimilarityFilters
//
tokenizer = TokenizerFactory.getTokenizer(job.get(
FuzzyJoinConfig.TOKENIZER_PROPERTY,
FuzzyJoinConfig.TOKENIZER_VALUE),
FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
FuzzyJoinConfig.TOKEN_SEPARATOR);
String similarityName = job.get(
FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
float similarityThreshold = job.getFloat(
FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
similarityName, similarityThreshold);
//
// set TokenRank
//
Path tokensPath;
try {
Path[] cache = DistributedCache.getLocalCacheFiles(job);
if (cache == null) {
tokensPath = new Path(job
.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
} else {
tokensPath = cache[0];
}
} catch (IOException e) {
throw new RuntimeException(e);
}
TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
tokenLoad.loadTokenRank();
//
// set dataColumn
//
dataColumns = FuzzyJoinUtil.getDataColumns(job.get(
FuzzyJoinConfig.RECORD_DATA_PROPERTY,
FuzzyJoinConfig.RECORD_DATA_VALUE));
}
示例14: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
//
// set Tokenizer and SimilarityFilters
//
tokenizer = TokenizerFactory.getTokenizer(job.get(
FuzzyJoinConfig.TOKENIZER_PROPERTY,
FuzzyJoinConfig.TOKENIZER_VALUE),
FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
FuzzyJoinConfig.TOKEN_SEPARATOR);
String similarityName = job.get(
FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
float similarityThreshold = job.getFloat(
FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(
similarityName, similarityThreshold);
//
// set TokenRank
//
Path tokensPath;
try {
Path[] cache = DistributedCache.getLocalCacheFiles(job);
if (cache == null) {
tokensPath = new Path(job
.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
} else {
tokensPath = cache[0];
}
} catch (IOException e) {
throw new RuntimeException(e);
}
TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
tokenLoad.loadTokenRank();
//
// set dataColumn
//
dataColumns[0] = FuzzyJoinUtil.getDataColumns(job.get(
FuzzyJoinConfig.RECORD_DATA_PROPERTY,
FuzzyJoinConfig.RECORD_DATA_VALUE));
dataColumns[1] = FuzzyJoinUtil.getDataColumns(job.get(
FuzzyJoinConfig.RECORD_DATA1_PROPERTY,
FuzzyJoinConfig.RECORD_DATA1_VALUE));
//
// get suffix for second relation
//
suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
.split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}
示例15: configure
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
public void configure(JobConf job) {
//
// create RecordGenerator
//
int offset = job.getInt(FuzzyJoinDriver.DATA_CRTCOPY_PROPERTY, -1);
if (offset == -1) {
System.err.println("ERROR: fuzzyjoin.data.crtcopy not set.");
System.exit(-1);
}
recordGenerator = new RecordGenerator(offset);
int noRecords = job.getInt(FuzzyJoinDriver.DATA_NORECORDS_PROPERTY, -1);
if (noRecords == -1) {
System.err.println("ERROR: fuzzyjoin.data.norecords not set.");
System.exit(-1);
}
offsetRID = offset * noRecords;
int dictionaryFactor = job.getInt(
FuzzyJoinDriver.DATA_DICTIONARY_FACTOR_PROPERTY, 1);
//
// set RecordGenerator
//
Path tokenRankFile;
try {
Path[] cache = DistributedCache.getLocalCacheFiles(job);
if (cache == null) {
tokenRankFile = new Path(
job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
} else {
tokenRankFile = cache[0];
}
} catch (IOException e) {
throw new RuntimeException(e);
}
TokenLoad tokenLoad = new TokenLoad(tokenRankFile.toString(),
recordGenerator);
tokenLoad.loadTokenRank(dictionaryFactor);
//
// set Tokenizer
//
tokenizer = TokenizerFactory.getTokenizer(job.get(
FuzzyJoinConfig.TOKENIZER_PROPERTY,
FuzzyJoinConfig.TOKENIZER_VALUE),
FuzzyJoinConfig.WORD_SEPARATOR_REGEX,
FuzzyJoinConfig.TOKEN_SEPARATOR);
//
// set dataColumn
//
dataColumns = FuzzyJoinUtil.getDataColumns(job.get(
FuzzyJoinConfig.RECORD_DATA_PROPERTY,
FuzzyJoinConfig.RECORD_DATA_VALUE));
// Arrays.sort(dataColumns);
}