本文整理汇总了Java中org.apache.hadoop.mapred.JobClient.getClusterStatus方法的典型用法代码示例。如果您正苦于以下问题:Java JobClient.getClusterStatus方法的具体用法?Java JobClient.getClusterStatus怎么用?Java JobClient.getClusterStatus使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapred.JobClient
的用法示例。
在下文中一共展示了JobClient.getClusterStatus方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSplits
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobClient client =
new JobClient(new JobConf(jobCtxt.getConfiguration()));
ClusterStatus stat = client.getClusterStatus(true);
final long toGen =
jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
final int nTrackers = stat.getTaskTrackers();
final long bytesPerTracker = toGen / nTrackers;
final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
final Matcher m = trackerPattern.matcher("");
for (String tracker : stat.getActiveTrackerNames()) {
m.reset(tracker);
if (!m.find()) {
System.err.println("Skipping node: " + tracker);
continue;
}
final String name = m.group(1);
splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
}
return splits;
}
示例2: getSplits
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
final JobClient client = new JobClient(jobConf);
ClusterStatus stat = client.getClusterStatus(true);
int numTrackers = stat.getTaskTrackers();
final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);
// Total size of distributed cache files to be generated
final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
// Get the path of the special file
String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
throw new RuntimeException("Invalid metadata: #files (" + fileCount
+ "), total_size (" + totalSize + "), filelisturi ("
+ distCacheFileList + ")");
}
Path sequenceFile = new Path(distCacheFileList);
FileSystem fs = sequenceFile.getFileSystem(jobConf);
FileStatus srcst = fs.getFileStatus(sequenceFile);
// Consider the number of TTs * mapSlotsPerTracker as number of mappers.
int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
int numSplits = numTrackers * numMapSlotsPerTracker;
List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
LongWritable key = new LongWritable();
BytesWritable value = new BytesWritable();
// Average size of data to be generated by each map task
final long targetSize = Math.max(totalSize / numSplits,
DistributedCacheEmulator.AVG_BYTES_PER_MAP);
long splitStartPosition = 0L;
long splitEndPosition = 0L;
long acc = 0L;
long bytesRemaining = srcst.getLen();
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
while (reader.next(key, value)) {
// If adding this file would put this split past the target size,
// cut the last split and put this file in the next split.
if (acc + key.get() > targetSize && acc != 0) {
long splitSize = splitEndPosition - splitStartPosition;
splits.add(new FileSplit(
sequenceFile, splitStartPosition, splitSize, (String[])null));
bytesRemaining -= splitSize;
splitStartPosition = splitEndPosition;
acc = 0L;
}
acc += key.get();
splitEndPosition = reader.getPosition();
}
} finally {
if (reader != null) {
reader.close();
}
}
if (bytesRemaining != 0) {
splits.add(new FileSplit(
sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
}
return splits;
}
示例3: getNumAvailableReds
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
public static final int getNumAvailableReds () throws IOException {
JobConf job = new JobConf(Utils.class);
JobClient client = new JobClient(job);
ClusterStatus cluster = client.getClusterStatus();
int maxReduces = cluster.getMaxReduceTasks();
int runnings = cluster.getReduceTasks();
return maxReduces - runnings;
}
示例4: getNumAvailableMaps
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
public static final int getNumAvailableMaps () throws IOException {
JobConf job = new JobConf(Utils.class);
JobClient client = new JobClient(job);
ClusterStatus cluster = client.getClusterStatus();
int maxMaps = cluster.getMaxMapTasks();
int runnings = cluster.getMapTasks();
return maxMaps - runnings;
}
示例5: start
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
public void start() throws IOException {
File testCluster = new File(WORKING_DIRECTORY);
if (testCluster.exists()) {
FileUtil.deleteDirectory(testCluster);
}
testCluster.mkdirs();
File testClusterData = new File(WORKING_DIRECTORY + "/data");
File testClusterLog = new File(WORKING_DIRECTORY + "/logs");
if (cluster == null) {
conf = new HdfsConfiguration();
conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR,
testClusterData.getAbsolutePath());
cluster = new MiniDFSCluster.Builder(conf).build();
fs = cluster.getFileSystem();
// set mincluster as default config
HdfsUtil.setDefaultConfiguration(conf);
System.setProperty("hadoop.log.dir", testClusterLog.getAbsolutePath());
MiniMRCluster mrCluster = new MiniMRCluster(1, fs.getUri()
.toString(), 1, null, null, new JobConf(conf));
JobConf mrClusterConf = mrCluster.createJobConf();
HdfsUtil.setDefaultConfiguration(new Configuration(mrClusterConf));
System.out.println("------");
JobClient client = new JobClient(mrClusterConf);
ClusterStatus status = client.getClusterStatus(true);
System.out.println(status.getActiveTrackerNames());
}
}
示例6: run
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
/**
* This is the main routine for launching a distributed random write job.
* It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
* The reduce doesn't do anything.
*
* @throws IOException
*/
public int run(String[] args) throws Exception {
if (args.length == 0) {
return printUsage();
}
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
1*1024*1024*1024);
if (numBytesToWritePerMap == 0) {
System.err.println("Cannot have " + BYTES_PER_MAP +" set to 0");
return -2;
}
long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
Job job = Job.getInstance(conf);
job.setJarByClass(RandomTextWriter.class);
job.setJobName("random-text-writer");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(RandomWriter.RandomInputFormat.class);
job.setMapperClass(RandomTextMapper.class);
Class<? extends OutputFormat> outputFormatClass =
SequenceFileOutputFormat.class;
List<String> otherArgs = new ArrayList<String>();
for(int i=0; i < args.length; ++i) {
try {
if ("-outFormat".equals(args[i])) {
outputFormatClass =
Class.forName(args[++i]).asSubclass(OutputFormat.class);
} else {
otherArgs.add(args[i]);
}
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " +
args[i-1]);
return printUsage(); // exits
}
}
job.setOutputFormatClass(outputFormatClass);
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(0)));
System.out.println("Running " + numMaps + " maps.");
// reducer NONE
job.setNumReduceTasks(0);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " +
(endTime.getTime() - startTime.getTime()) /1000 +
" seconds.");
return ret;
}
示例7: run
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
/**
* This is the main routine for launching a distributed random write job.
* It runs 10 maps/node and each node writes 1 gig of data to a DFS file.
* The reduce doesn't do anything.
*
* @throws IOException
*/
public int run(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: writer <out-dir>");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path outDir = new Path(args[0]);
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int numMapsPerHost = conf.getInt(MAPS_PER_HOST, 10);
long numBytesToWritePerMap = conf.getLong(BYTES_PER_MAP,
1*1024*1024*1024);
if (numBytesToWritePerMap == 0) {
System.err.println("Cannot have" + BYTES_PER_MAP + " set to 0");
return -2;
}
long totalBytesToWrite = conf.getLong(TOTAL_BYTES,
numMapsPerHost*numBytesToWritePerMap*cluster.getTaskTrackers());
int numMaps = (int) (totalBytesToWrite / numBytesToWritePerMap);
if (numMaps == 0 && totalBytesToWrite > 0) {
numMaps = 1;
conf.setLong(BYTES_PER_MAP, totalBytesToWrite);
}
conf.setInt(MRJobConfig.NUM_MAPS, numMaps);
Job job = Job.getInstance(conf);
job.setJarByClass(RandomWriter.class);
job.setJobName("random-writer");
FileOutputFormat.setOutputPath(job, outDir);
job.setOutputKeyClass(BytesWritable.class);
job.setOutputValueClass(BytesWritable.class);
job.setInputFormatClass(RandomInputFormat.class);
job.setMapperClass(RandomMapper.class);
job.setReducerClass(Reducer.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
System.out.println("Running " + numMaps + " maps.");
// reducer NONE
job.setNumReduceTasks(0);
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date endTime = new Date();
System.out.println("Job ended: " + endTime);
System.out.println("The job took " +
(endTime.getTime() - startTime.getTime()) /1000 +
" seconds.");
return ret;
}
示例8: getMaxNumReds
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
public static final int getMaxNumReds () throws IOException {
JobConf job = new JobConf(Utils.class);
JobClient client = new JobClient(job);
ClusterStatus cluster = client.getClusterStatus();
return cluster.getMaxReduceTasks();
}
示例9: getMaxNumMaps
import org.apache.hadoop.mapred.JobClient; //导入方法依赖的package包/类
public static final int getMaxNumMaps () throws IOException {
JobConf job = new JobConf(Utils.class);
JobClient client = new JobClient(job);
ClusterStatus cluster = client.getClusterStatus();
return cluster.getMaxMapTasks();
}