本文整理汇总了Java中org.apache.hadoop.mapreduce.split.JobSplit类的典型用法代码示例。如果您正苦于以下问题:Java JobSplit类的具体用法?Java JobSplit怎么用?Java JobSplit使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
JobSplit类属于org.apache.hadoop.mapreduce.split包,在下文中一共展示了JobSplit类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: FakeTaskInProgress
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
FakeTaskInProgress(JobID jId, JobConf jobConf, Task t,
boolean isMap, FakeJobInProgress job) {
super(jId, "", JobSplit.EMPTY_TASK_SPLIT, job.jobtracker, jobConf, job,
0, 1);
this.isMap = isMap;
this.fakeJob = job;
activeTasks = new TreeMap<TaskAttemptID, String>();
activeTasks.put(t.getTaskID(), "tt");
// create a fake status for a task that is running for a bit
this.taskStatus = TaskStatus.createTaskStatus(isMap);
taskStatus.setProgress(0.5f);
taskStatus.setRunState(TaskStatus.State.RUNNING);
if (jobConf.getMapSpeculativeExecution()) {
//resetting of the hasSpeculativeMap is done
//when speculative map is scheduled by the job.
hasSpeculativeMap = true;
}
if (jobConf.getReduceSpeculativeExecution()) {
//resetting of the hasSpeculativeReduce is done
//when speculative reduce is scheduled by the job.
hasSpeculativeReduce = true;
}
}
示例2: obtainNewMapTask
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
@Override
public Task obtainNewMapTask(final TaskTrackerStatus tts, int clusterSize,
int numUniqueHosts, int localityLevel) throws IOException {
for (int map = 0; map < maps.length; map++) {
FakeTaskInProgress tip = (FakeTaskInProgress) maps[map];
if (!tip.isRunning() && !tip.isComplete() &&
getLocalityLevel(tip, tts) < localityLevel) {
TaskAttemptID attemptId = getTaskAttemptID(tip);
JobSplit.TaskSplitMetaInfo split = JobSplit.EMPTY_TASK_SPLIT;
Task task = new MapTask("", attemptId, 0, split.getSplitIndex(), 1) {
@Override
public String toString() {
return String.format("%s on %s", getTaskID(), tts.getTrackerName());
}
};
runningMapTasks++;
tip.createTaskAttempt(task, tts.getTrackerName());
nonLocalRunningMaps.add(tip);
taskTrackerManager.startTask(tts.getTrackerName(), task, tip);
return task;
}
}
return null;
}
示例3: writeOldSplits
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
private static SplitMetaInfo[] writeOldSplits(org.apache.hadoop.mapred.InputSplit[] splits, FSDataOutputStream out,
Configuration conf) throws IOException {
SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
if(splits.length != 0) {
int i = 0;
long offset = out.getPos();
for(org.apache.hadoop.mapred.InputSplit split: splits) {
long prevLen = out.getPos();
Text.writeString(out, split.getClass().getName());
split.write(out);
long currLen = out.getPos();
String[] locations = split.getLocations();
final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10);
if(locations.length > max_loc) {
LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length
+ " maxsize: " + max_loc);
locations = Arrays.copyOf(locations, max_loc);
}
info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength());
offset += currLen - prevLen;
}
}
return info;
}
示例4: writeJobSplitMetaInfo
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
private static void writeJobSplitMetaInfo(FileSystem fs, Path filename, FsPermission p, int splitMetaInfoVersion,
JobSplit.SplitMetaInfo[] allSplitMetaInfo) throws IOException {
// write the splits meta-info to a file for the job tracker
FSDataOutputStream out = null;
try {
out = FileSystem.create(fs, filename, p);
out.write(META_SPLIT_FILE_HEADER);
WritableUtils.writeVInt(out, splitMetaInfoVersion);
WritableUtils.writeVInt(out, allSplitMetaInfo.length);
for(JobSplit.SplitMetaInfo splitMetaInfo: allSplitMetaInfo) {
splitMetaInfo.write(out);
}
} finally {
IOUtils.closeStream(out);
}
}
示例5: obtainNewMapTask
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
public Task obtainNewMapTask(final TaskTrackerStatus tts, int clusterSize,
int numUniqueHosts, int localityLevel) throws IOException {
for (int map = 0; map < maps.length; map++) {
FakeTaskInProgress tip = (FakeTaskInProgress) maps[map];
if (!tip.isRunning() && !tip.isComplete() &&
getLocalityLevel(tip, tts) < localityLevel) {
TaskAttemptID attemptId = getTaskAttemptID(tip);
JobSplit.TaskSplitMetaInfo split = JobSplit.EMPTY_TASK_SPLIT;
Task task = new MapTask("", attemptId, 0, split.getSplitIndex(), 1) {
@Override
public String toString() {
return String.format("%s on %s", getTaskID(), tts.getTrackerName());
}
};
runningMapTasks++;
tip.createTaskAttempt(task, tts.getTrackerName());
nonLocalRunningMaps.add(tip);
taskTrackerManager.startTask(tts.getTrackerName(), task, tip);
return task;
}
}
return null;
}
示例6: obtainNewMapTask
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
public Task obtainNewMapTask(final TaskTrackerStatus tts, int clusterSize,
int numUniqueHosts, int localityLevel) throws IOException {
for (int map = 0; map < maps.length; map++) {
HFSPFakeTaskInProgress tip = (HFSPFakeTaskInProgress) maps[map];
if (!tip.isRunning() && !tip.isComplete()
&& getLocalityLevel(tip, tts) < localityLevel) {
TaskAttemptID attemptId = getTaskAttemptID(tip);
JobSplit.TaskSplitMetaInfo split = JobSplit.EMPTY_TASK_SPLIT;
Task task = new MapTask("", attemptId, 0, split.getSplitIndex(), 1) {
@Override
public String toString() {
return String.format("%s on %s", getTaskID(), tts.getTrackerName());
}
};
runningMapTasks++;
tip.createTaskAttempt(task, tts.getTrackerName());
nonLocalRunningMaps.add(tip);
taskTrackerManager.startTask(tts.getTrackerName(), task, tip);
return task;
}
}
return null;
}
示例7: FakeTaskInProgress
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
FakeTaskInProgress(
JobID jId, JobConf jobConf, Task t,
boolean isMap, FakeJobInProgress job,
JobSplit.TaskSplitMetaInfo split) {
super(jId, "", split, null, jobConf, job, 0, 1);
this.isMap = isMap;
this.fakeJob = job;
activeTasks = new TreeMap<TaskAttemptID, String>();
activeTasks.put(t.getTaskID(), "tt");
// create a fake status for a task that is running for a bit
this.taskStatus = TaskStatus.createTaskStatus(isMap);
taskStatus.setProgress(0.5f);
taskStatus.setRunState(TaskStatus.State.RUNNING);
if (jobConf.getMapSpeculativeExecution()) {
//resetting of the hasSpeculativeMap is done
//when speculative map is scheduled by the job.
hasSpeculativeMap = true;
}
if (jobConf.getReduceSpeculativeExecution()) {
//resetting of the hasSpeculativeReduce is done
//when speculative reduce is scheduled by the job.
hasSpeculativeReduce = true;
}
}
示例8: verifyLocationHints
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
private void verifyLocationHints(Path inputSplitsDir,
List<TaskLocationHint> actual) throws Exception {
JobID jobId = new JobID("dummy", 1);
JobSplit.TaskSplitMetaInfo[] splitsInfo =
SplitMetaInfoReader.readSplitMetaInfo(jobId, remoteFs,
conf, inputSplitsDir);
int splitsCount = splitsInfo.length;
List<TaskLocationHint> locationHints =
new ArrayList<TaskLocationHint>(splitsCount);
for (int i = 0; i < splitsCount; ++i) {
locationHints.add(
TaskLocationHint.createTaskLocationHint(new HashSet<String>(
Arrays.asList(splitsInfo[i].getLocations())), null)
);
}
Assert.assertEquals(locationHints, actual);
}
示例9: main
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
public static void main(String... args) throws IOException {
String taskSplitFile = args[0];
Configuration conf = new Configuration();
DataInputStream is =
new DataInputStream(new FileInputStream(taskSplitFile));
JobSplit.TaskSplitIndex taskSplitIndex = new JobSplit.TaskSplitIndex();
taskSplitIndex.readFields(is);
is.close();
Object split = getSplitDetails(conf,
new Path(taskSplitIndex.getSplitLocation()),
taskSplitIndex.getStartOffset());
System.out.println("InputSplit instance class = " + split.getClass().getName());
System.out.println("ToString on split = " + split);
System.out.println("Reflection fields = " + ToStringBuilder
.reflectionToString(split, ToStringStyle.SHORT_PREFIX_STYLE));
}
示例10: testResourceEstimator
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
public void testResourceEstimator() throws Exception {
final int maps = 100;
final int reduces = 2;
final int singleMapOutputSize = 1000;
JobConf jc = new JobConf();
JobID jid = new JobID("testJT", 0);
jc.setNumMapTasks(maps);
jc.setNumReduceTasks(reduces);
JobInProgress jip = new JobInProgress(jid, jc,
UtilsForTests.getJobTracker());
//unfortunately, we can't set job input size from here.
ResourceEstimator re = new ResourceEstimator(jip);
for(int i = 0; i < maps; ++i) {
if (i < maps / 10) {
// re.thresholdToUse is maps / 10
long estOutSize = re.getEstimatedMapOutputSize();
System.out.println(estOutSize);
assertEquals(0, estOutSize);
}
TaskStatus ts = new MapTaskStatus();
ts.setOutputSize(singleMapOutputSize);
JobSplit.TaskSplitMetaInfo split =
new JobSplit.TaskSplitMetaInfo(new String[0], 0, 0);
TaskInProgress tip =
new TaskInProgress(jid, "", split, jip.jobtracker, jc, jip, 0, 1);
re.updateWithCompletedTask(ts, tip);
}
assertEquals(2* singleMapOutputSize, re.getEstimatedMapOutputSize());
assertEquals(2* singleMapOutputSize * maps / reduces, re.getEstimatedReduceInputSize());
}
示例11: obtainNewMapTask
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
@Override
public Task obtainNewMapTask(final TaskTrackerStatus tts, int clusterSize,
int ignored) throws IOException {
TaskAttemptID attemptId = getTaskAttemptID(true);
Task task = new MapTask("", attemptId, 0, new JobSplit.TaskSplitIndex(),
1) {
@Override
public String toString() {
return String.format("%s on %s", getTaskID(), tts.getTrackerName());
}
};
taskTrackerManager.update(tts.getTrackerName(), task);
runningMapTasks++;
return task;
}
示例12: createAndAddTIP
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
private TaskInProgress createAndAddTIP(JobTracker jobtracker,
JobInProgress jip, TaskType type) {
JobConf conf = jip.getJobConf();
JobID id = jip.getJobID();
// now create a fake tip for this fake job
TaskInProgress tip = null;
if (type == TaskType.MAP) {
tip = new TaskInProgress(id, "dummy", JobSplit.EMPTY_TASK_SPLIT,
jobtracker, conf, jip, 0, 1);
jip.maps = new TaskInProgress[] {tip};
} else if (type == TaskType.REDUCE) {
tip = new TaskInProgress(id, "dummy", jip.desiredMaps(), 0,
jobtracker, conf, jip, 1);
jip.reduces = new TaskInProgress[] {tip};
} else if (type == TaskType.JOB_SETUP) {
tip =
new TaskInProgress(id, "dummy", JobSplit.EMPTY_TASK_SPLIT,
jobtracker, conf, jip, 0, 1);
jip.setup = new TaskInProgress[] {tip};
} else if (type == TaskType.JOB_CLEANUP) {
tip =
new TaskInProgress(id, "dummy", JobSplit.EMPTY_TASK_SPLIT,
jobtracker, conf, jip, 0, 1);
jip.cleanup = new TaskInProgress[] {tip};
}
return tip;
}
示例13: FakeTaskInProgress
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
FakeTaskInProgress(JobID jId, int id, JobConf jobConf,
FakeJobInProgress job, String[] inputLocations,
JobSplit.TaskSplitMetaInfo split) {
super(jId, "", split, job.jobtracker, jobConf, job, id, 1);
this.isMap = true;
this.fakeJob = job;
this.inputLocations = inputLocations;
activeTasks = new TreeMap<TaskAttemptID, String>();
taskStatus = TaskStatus.createTaskStatus(isMap);
taskStatus.setRunState(TaskStatus.State.UNASSIGNED);
}
示例14: writeNewSplits
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
@SuppressWarnings("unchecked")
private static <T extends InputSplit> SplitMetaInfo[] writeNewSplits(Configuration conf, T[] array,
FSDataOutputStream out) throws IOException, InterruptedException {
SplitMetaInfo[] info = new SplitMetaInfo[array.length];
if(array.length != 0) {
SerializationFactory factory = new SerializationFactory(conf);
int i = 0;
long offset = out.getPos();
for(T split: array) {
long prevCount = out.getPos();
Text.writeString(out, split.getClass().getName());
Serializer<T> serializer = factory.getSerializer((Class<T>) split.getClass());
serializer.open(out);
serializer.serialize(split);
long currCount = out.getPos();
String[] locations = split.getLocations();
final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10);
if(locations.length > max_loc) {
LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length
+ " maxsize: " + max_loc);
locations = Arrays.copyOf(locations, max_loc);
}
info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength());
offset += currCount - prevCount;
}
}
return info;
}
示例15: testResourceEstimator
import org.apache.hadoop.mapreduce.split.JobSplit; //导入依赖的package包/类
public void testResourceEstimator() throws Exception {
final int maps = 100;
final int reduces = 2;
final int singleMapOutputSize = 1000;
JobConf jc = new JobConf();
JobID jid = new JobID("testJT", 0);
jc.setNumMapTasks(maps);
jc.setNumReduceTasks(reduces);
JobInProgress jip = new JobInProgress(jid, jc,
UtilsForTests.getJobTracker());
//unfortunately, we can't set job input size from here.
ResourceEstimator re = new ResourceEstimator(jip);
for(int i = 0; i < maps / 10 ; ++i) {
long estOutSize = re.getEstimatedMapOutputSize();
System.out.println(estOutSize);
assertEquals(0, estOutSize);
TaskStatus ts = new MapTaskStatus();
ts.setOutputSize(singleMapOutputSize);
JobSplit.TaskSplitMetaInfo split =
new JobSplit.TaskSplitMetaInfo(new String[0], 0, 0);
TaskInProgress tip =
new TaskInProgress(jid, "", split, jip.jobtracker, jc, jip, 0, 1);
re.updateWithCompletedTask(ts, tip);
}
assertEquals(2* singleMapOutputSize, re.getEstimatedMapOutputSize());
assertEquals(2* singleMapOutputSize * maps / reduces, re.getEstimatedReduceInputSize());
}