本文整理匯總了Java中org.apache.hadoop.mapred.JobConf.getStrings方法的典型用法代碼示例。如果您正苦於以下問題:Java JobConf.getStrings方法的具體用法?Java JobConf.getStrings怎麽用?Java JobConf.getStrings使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapred.JobConf
的用法示例。
在下文中一共展示了JobConf.getStrings方法的2個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: updateHDFSDistCacheFilesList
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
/**
* For the job to be simulated, identify the needed distributed cache files by
* mapping original cluster's distributed cache file paths to the simulated cluster's
* paths and add these paths in the map {@code distCacheFiles}.
*<br>
* JobStory should contain distributed cache related properties like
* <li> {@link MRJobConfig#CACHE_FILES}
* <li> {@link MRJobConfig#CACHE_FILE_VISIBILITIES}
* <li> {@link MRJobConfig#CACHE_FILES_SIZES}
* <li> {@link MRJobConfig#CACHE_FILE_TIMESTAMPS}
* <li> {@link MRJobConfig#CLASSPATH_FILES}
*
* <li> {@link MRJobConfig#CACHE_ARCHIVES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_VISIBILITIES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_SIZES}
* <li> {@link MRJobConfig#CACHE_ARCHIVES_TIMESTAMPS}
* <li> {@link MRJobConfig#CLASSPATH_ARCHIVES}
*
* <li> {@link MRJobConfig#CACHE_SYMLINK}
*
* @param jobdesc JobStory of original job obtained from trace
* @throws IOException
*/
void updateHDFSDistCacheFilesList(JobStory jobdesc) throws IOException {
// Map original job's distributed cache file paths to simulated cluster's
// paths, to be used by this simulated job.
JobConf jobConf = jobdesc.getJobConf();
String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
if (files != null) {
String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
String[] visibilities =
jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
String[] timeStamps =
jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
FileSystem fs = FileSystem.get(conf);
String user = jobConf.getUser();
for (int i = 0; i < files.length; i++) {
// Check if visibilities are available because older hadoop versions
// didn't have public, private Distributed Caches separately.
boolean visibility =
(visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
if (isLocalDistCacheFile(files[i], user, visibility)) {
// local FS based distributed cache file.
// Create this file on the pseudo local FS on the fly (i.e. when the
// simulated job is submitted).
continue;
}
// distributed cache file on hdfs
String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
visibility, user);
// No need to add a distributed cache file path to the list if
// (1) the mapped path is already there in the list OR
// (2) the file with the mapped path already exists.
// In any of the above 2 cases, file paths, timestamps, file sizes and
// visibilities match. File sizes should match if file paths and
// timestamps match because single file path with single timestamp
// should correspond to a single file size.
if (distCacheFiles.containsKey(mappedPath) ||
fs.exists(new Path(mappedPath))) {
continue;
}
distCacheFiles.put(mappedPath, Long.valueOf(fileSizes[i]));
}
}
}
示例2: configureDistCacheFiles
import org.apache.hadoop.mapred.JobConf; //導入方法依賴的package包/類
/**
* If gridmix needs to emulate distributed cache load, then configure
* distributed cache files of a simulated job by mapping the original
* cluster's distributed cache file paths to the simulated cluster's paths and
* setting these mapped paths in the job configuration of the simulated job.
* <br>
* Configure local FS based distributed cache files through the property
* "tmpfiles" and hdfs based distributed cache files through the property
* {@link MRJobConfig#CACHE_FILES}.
* @param conf configuration for the simulated job to be run
* @param jobConf job configuration of original cluster's job, obtained from
* trace
* @throws IOException
*/
void configureDistCacheFiles(Configuration conf, JobConf jobConf)
throws IOException {
if (shouldEmulateDistCacheLoad()) {
String[] files = jobConf.getStrings(MRJobConfig.CACHE_FILES);
if (files != null) {
// hdfs based distributed cache files to be configured for simulated job
List<String> cacheFiles = new ArrayList<String>();
// local FS based distributed cache files to be configured for
// simulated job
List<String> localCacheFiles = new ArrayList<String>();
String[] visibilities =
jobConf.getStrings(MRJobConfig.CACHE_FILE_VISIBILITIES);
String[] timeStamps =
jobConf.getStrings(MRJobConfig.CACHE_FILE_TIMESTAMPS);
String[] fileSizes = jobConf.getStrings(MRJobConfig.CACHE_FILES_SIZES);
String user = jobConf.getUser();
for (int i = 0; i < files.length; i++) {
// Check if visibilities are available because older hadoop versions
// didn't have public, private Distributed Caches separately.
boolean visibility =
(visibilities == null) ? true : Boolean.valueOf(visibilities[i]);
if (isLocalDistCacheFile(files[i], user, visibility)) {
// local FS based distributed cache file.
// Create this file on the pseudo local FS.
String fileId = MD5Hash.digest(files[i] + timeStamps[i]).toString();
long fileSize = Long.parseLong(fileSizes[i]);
Path mappedLocalFilePath =
PseudoLocalFs.generateFilePath(fileId, fileSize)
.makeQualified(pseudoLocalFs.getUri(),
pseudoLocalFs.getWorkingDirectory());
pseudoLocalFs.create(mappedLocalFilePath);
localCacheFiles.add(mappedLocalFilePath.toUri().toString());
} else {
// hdfs based distributed cache file.
// Get the mapped HDFS path on simulated cluster
String mappedPath = mapDistCacheFilePath(files[i], timeStamps[i],
visibility, user);
cacheFiles.add(mappedPath);
}
}
if (cacheFiles.size() > 0) {
// configure hdfs based distributed cache files for simulated job
conf.setStrings(MRJobConfig.CACHE_FILES,
cacheFiles.toArray(new String[cacheFiles.size()]));
}
if (localCacheFiles.size() > 0) {
// configure local FS based distributed cache files for simulated job
conf.setStrings("tmpfiles", localCacheFiles.toArray(
new String[localCacheFiles.size()]));
}
}
}
}