本文整理汇总了Java中org.apache.hadoop.filecache.DistributedCache.getCacheFiles方法的典型用法代码示例。如果您正苦于以下问题:Java DistributedCache.getCacheFiles方法的具体用法?Java DistributedCache.getCacheFiles怎么用?Java DistributedCache.getCacheFiles使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.filecache.DistributedCache
的用法示例。
在下文中一共展示了DistributedCache.getCacheFiles方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
dimension = conf.getInt(PartialVectorMerger.DIMENSION, Integer.MAX_VALUE);
sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
maxNGramSize = conf.getInt(DictionaryVectorizer.MAX_NGRAMS, maxNGramSize);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is word value is id
for (Pair<Writable, IntWritable> record
: new SequenceFileIterable<Writable, IntWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().toString(), record.getSecond().get());
}
}
示例2: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
vectorCount = conf.getLong(TFIDFConverter.VECTOR_COUNT, 1);
featureCount = conf.getLong(TFIDFConverter.FEATURE_COUNT, 1);
minDf = conf.getInt(TFIDFConverter.MIN_DF, 1);
maxDf = conf.getLong(TFIDFConverter.MAX_DF, -1);
sequentialAccess = conf.getBoolean(PartialVectorMerger.SEQUENTIAL_ACCESS, false);
namedVector = conf.getBoolean(PartialVectorMerger.NAMED_VECTOR, false);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is feature, value is the document frequency
for (Pair<IntWritable,LongWritable> record
: new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().get(), record.getSecond().get());
}
}
示例3: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
Configuration conf = context.getConfiguration();
URI[] localFiles = DistributedCache.getCacheFiles(conf);
Preconditions.checkArgument(localFiles != null && localFiles.length >= 1,
"missing paths from the DistributedCache");
maxDf = conf.getLong(HighDFWordsPruner.MAX_DF, -1);
Path dictionaryFile = new Path(localFiles[0].getPath());
// key is feature, value is the document frequency
for (Pair<IntWritable, LongWritable> record :
new SequenceFileIterable<IntWritable, LongWritable>(dictionaryFile, true, conf)) {
dictionary.put(record.getFirst().get(), record.getSecond().get());
}
}
示例4: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup ( Context context ) throws IOException,InterruptedException {
super.setup(context);
try {
conf = context.getConfiguration();
Plan.conf = conf;
Config.read(Plan.conf);
Tree code = Tree.parse(conf.get("mrql.reducer"));
reduce_fnc = functional_argument(conf,code);
code = Tree.parse(conf.get("mrql.mapper"));
map_fnc = functional_argument(conf,code);
if (conf.get("mrql.zero") != null) {
code = Tree.parse(conf.get("mrql.zero"));
result = Interpreter.evalE(code);
code = Tree.parse(conf.get("mrql.accumulator"));
acc_fnc = functional_argument(conf,code);
} else result = null;
counter = conf.get("mrql.counter");
uris = DistributedCache.getCacheFiles(conf);
local_paths = DistributedCache.getLocalCacheFiles(conf);
index = 0;
} catch (Exception e) {
throw new Error("Cannot setup the crossProduct: "+e);
}
}
示例5: getCachedFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static Path[] getCachedFiles(Configuration conf) throws IOException {
LocalFileSystem localFs = FileSystem.getLocal(conf);
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
URI[] fallbackFiles = DistributedCache.getCacheFiles(conf);
// fallback for local execution
if (cacheFiles == null) {
Preconditions.checkState(fallbackFiles != null, "Unable to find cached files!");
cacheFiles = new Path[fallbackFiles.length];
for (int n = 0; n < fallbackFiles.length; n++) {
cacheFiles[n] = new Path(fallbackFiles[n].getPath());
}
} else {
for (int n = 0; n < cacheFiles.length; n++) {
cacheFiles[n] = localFs.makeQualified(cacheFiles[n]);
// fallback for local execution
if (!localFs.exists(cacheFiles[n])) {
cacheFiles[n] = new Path(fallbackFiles[n].getPath());
}
}
}
Preconditions.checkState(cacheFiles.length > 0, "Unable to find cached files!");
return cacheFiles;
}
示例6: load
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Loads the vector from {@link DistributedCache}. Returns null if no vector exists.
*/
public static Vector load(Configuration conf) throws IOException {
URI[] files = DistributedCache.getCacheFiles(conf);
if (files == null || files.length < 1) {
return null;
}
log.info("Files are: {}", Arrays.toString(files));
return load(conf, new Path(files[0].getPath()));
}
示例7: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context); //To change body of overridden methods use File | Settings | File Templates.
Configuration conf = context.getConfiguration();
URI[] files = DistributedCache.getCacheFiles(conf);
if (files == null || files.length < 2) {
throw new IOException("not enough paths in the DistributedCache");
}
dataset = Dataset.load(conf, new Path(files[0].getPath()));
converter = new DataConverter(dataset);
ruleBase = RuleBase.load(conf, new Path(files[1].getPath()));
if (ruleBase == null) {
throw new InterruptedException("Model not found!");
}
}
示例8: open
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
public static InputStream open(String resource, Configuration conf) {
ClassLoader loader = conf.getClassLoader();
if (loader == null) {
loader = Thread.currentThread().getContextClassLoader();
}
if (loader == null) {
loader = HadoopIOUtils.class.getClassLoader();
}
boolean trace = log.isTraceEnabled();
try {
// no prefix means classpath
if (!resource.contains(":")) {
InputStream result = loader.getResourceAsStream(resource);
if (result != null) {
if (trace) {
log.trace(String.format("Loaded resource %s from classpath", resource));
}
return result;
}
// fall back to the distributed cache
URI[] uris = DistributedCache.getCacheFiles(conf);
if (uris != null) {
for (URI uri : uris) {
if (uri.toString().contains(resource)) {
if (trace) {
log.trace(String.format("Loaded resource %s from distributed cache", resource));
}
return uri.toURL().openStream();
}
}
}
}
// fall back to file system
Path p = new Path(resource);
FileSystem fs = p.getFileSystem(conf);
return fs.open(p);
} catch (IOException ex) {
throw new EsHadoopIllegalArgumentException(String.format("Cannot open stream for resource %s", resource));
}
}
示例9: setupPipesJob
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
private static void setupPipesJob(JobConf conf) throws IOException {
// default map output types to Text
if (!getIsJavaMapper(conf)) {
conf.setMapRunnerClass(PipesMapRunner.class);
// Save the user's partitioner and hook in our's.
setJavaPartitioner(conf, conf.getPartitionerClass());
conf.setPartitionerClass(PipesPartitioner.class);
}
if (!getIsJavaReducer(conf)) {
conf.setReducerClass(PipesReducer.class);
if (!getIsJavaRecordWriter(conf)) {
conf.setOutputFormat(NullOutputFormat.class);
}
}
String textClassname = Text.class.getName();
setIfUnset(conf, "mapred.mapoutput.key.class", textClassname);
setIfUnset(conf, "mapred.mapoutput.value.class", textClassname);
setIfUnset(conf, "mapred.output.key.class", textClassname);
setIfUnset(conf, "mapred.output.value.class", textClassname);
// Use PipesNonJavaInputFormat if necessary to handle progress reporting
// from C++ RecordReaders ...
if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
conf.setClass("mapred.pipes.user.inputformat",
conf.getInputFormat().getClass(), InputFormat.class);
conf.setInputFormat(PipesNonJavaInputFormat.class);
}
String exec = getExecutable(conf);
if (exec == null) {
throw new IllegalArgumentException("No application program defined.");
}
// add default debug script only when executable is expressed as
// <path>#<executable>
if (exec.contains("#")) {
DistributedCache.createSymlink(conf);
// set default gdb commands for map and reduce task
String defScript = "$HADOOP_HOME/src/c++/pipes/debug/pipes-default-script";
setIfUnset(conf,"mapred.map.task.debug.script",defScript);
setIfUnset(conf,"mapred.reduce.task.debug.script",defScript);
}
URI[] fileCache = DistributedCache.getCacheFiles(conf);
if (fileCache == null) {
fileCache = new URI[1];
} else {
URI[] tmp = new URI[fileCache.length+1];
System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
fileCache = tmp;
}
try {
fileCache[0] = new URI(exec);
} catch (URISyntaxException e) {
IOException ie = new IOException("Problem parsing execable URI " + exec);
ie.initCause(e);
throw ie;
}
DistributedCache.setCacheFiles(fileCache, conf);
}
示例10: setup
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context); //To change body of overridden methods use File | Settings | File Templates.
Configuration conf = context.getConfiguration();
URI[] files = DistributedCache.getCacheFiles(conf);
if (files == null || files.length < 2) {
throw new IOException("not enough paths in the DistributedCache");
}
dataset = Dataset.load(conf, new Path(files[0].getPath()));
converter = new DataConverter(dataset);
ruleBase = RuleBase.load(conf, new Path(files[1].getPath()));
if (ruleBase == null) {
throw new InterruptedException("Model not found!");
}
}
示例11: getDistributedCacheFile
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Helper method. Get a path from the DistributedCache
*
* @param conf
* configuration
* @param index
* index of the path in the DistributedCache files
* @return path from the DistributedCache
* @throws IOException
* if no path is found
*/
public static Path getDistributedCacheFile(Configuration conf, int index) throws IOException {
URI[] files = DistributedCache.getCacheFiles(conf);
if (files == null || files.length <= index) {
throw new IOException("path not found in the DistributedCache");
}
return new Path(files[index].getPath());
}
示例12: getCacheFiles
import org.apache.hadoop.filecache.DistributedCache; //导入方法依赖的package包/类
/**
* Get cache files set in the Configuration
* @return A URI array of the files set in the Configuration
* @throws IOException
*/
public URI[] getCacheFiles() throws IOException {
return DistributedCache.getCacheFiles(conf);
}