本文整理汇总了Java中org.apache.hadoop.mapreduce.JobContext类的典型用法代码示例。如果您正苦于以下问题:Java JobContext类的具体用法?Java JobContext怎么用?Java JobContext使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
JobContext类属于org.apache.hadoop.mapreduce包,在下文中一共展示了JobContext类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobClient client =
new JobClient(new JobConf(jobCtxt.getConfiguration()));
ClusterStatus stat = client.getClusterStatus(true);
final long toGen =
jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
final int nTrackers = stat.getTaskTrackers();
final long bytesPerTracker = toGen / nTrackers;
final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
final Matcher m = trackerPattern.matcher("");
for (String tracker : stat.getActiveTrackerNames()) {
m.reset(tracker);
if (!m.find()) {
System.err.println("Skipping node: " + tracker);
continue;
}
final String name = m.group(1);
splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
}
return splits;
}
示例2: createSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext, List<DynamicInputChunk> chunks) throws IOException {
int numMaps = getNumMapTasks(jobContext.getConfiguration());
final int nSplits = Math.min(numMaps, chunks.size());
List<InputSplit> splits = new ArrayList<>(nSplits);
for (int i = 0; i < nSplits; ++i) {
TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
chunks.get(i).assignTo(taskId);
splits.add(new FileSplit(chunks.get(i).getPath(), 0,
// Setting non-zero length for FileSplit size, to avoid a possible
// future when 0-sized file-splits are considered "empty" and skipped
// over.
getMinRecordsPerChunk(jobContext.getConfiguration()), null));
}
ConfigurationUtil.publish(jobContext.getConfiguration(), CONF_LABEL_NUM_SPLITS, splits.size());
return splits;
}
示例3: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException {
List<InputSplit> allSplits = new ArrayList<InputSplit>();
Scan originalScan = getScan();
Scan[] scans = rowKeyDistributor.getDistributedScans(originalScan);
for (Scan scan : scans) {
// Internally super.getSplits(...) uses scan object stored in private variable,
// to re-use the code of super class we switch scan object with scans we
setScan(scan);
List<InputSplit> splits = super.getSplits(context);
allSplits.addAll(splits);
}
// Setting original scan back
setScan(originalScan);
return allSplits;
}
示例4: getDesiredNumberOfMappers
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
private int getDesiredNumberOfMappers(JobContext jobContext) {
int desiredNumberOfMappers =
jobContext.getConfiguration().getInt(
OraOopConstants.ORAOOP_DESIRED_NUMBER_OF_MAPPERS, -1);
int minMappersAcceptedByOraOop =
OraOopUtilities.getMinNumberOfImportMappersAcceptedByOraOop(jobContext
.getConfiguration());
if (desiredNumberOfMappers < minMappersAcceptedByOraOop) {
LOG.warn(String.format("%s should not be used to perform a sqoop import "
+ "when the number of mappers is %d\n "
+ "i.e. OraOopManagerFactory.accept() should only appect jobs "
+ "where the number of mappers is at least %d",
OraOopConstants.ORAOOP_PRODUCT_NAME, desiredNumberOfMappers,
minMappersAcceptedByOraOop));
}
return desiredNumberOfMappers;
}
示例5: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
public void checkOutputSpecs(JobContext job
) throws FileAlreadyExistsException, IOException{
// Ensure that the output directory is set and not already there
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set.");
}
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, job.getConfiguration());
if (outDir.getFileSystem(job.getConfiguration()).exists(outDir)) {
throw new FileAlreadyExistsException("Output directory " + outDir +
" already exists");
}
}
示例6: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Table name is not set for export.");
} else if (null == dbConf.getOutputFieldNames()) {
throw new IOException(
"Output field names are null.");
} else if (null == conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY)) {
throw new IOException("Update key column is not set for export.");
}
}
示例7: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Procedure name is not set for export");
} else if (null == dbConf.getOutputFieldNames()
&& 0 == dbConf.getOutputFieldCount()) {
throw new IOException(
"Output field names are null and zero output field count set.");
}
}
示例8: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
/** {@inheritDoc} */
public List<InputSplit> getSplits(JobContext context) {
//get the property values
final int startDigit = context.getConfiguration().getInt(
DIGIT_START_PROPERTY, 1);
final int nDigits = context.getConfiguration().getInt(
DIGIT_SIZE_PROPERTY, 100);
final int nMaps = context.getConfiguration().getInt(
DIGIT_PARTS_PROPERTY, 1);
//create splits
final List<InputSplit> splits = new ArrayList<InputSplit>(nMaps);
final int[] parts = partition(startDigit - 1, nDigits, nMaps);
for (int i = 0; i < parts.length; ++i) {
final int k = i < parts.length - 1 ? parts[i+1]: nDigits+startDigit-1;
splits.add(new BbpSplit(i, parts[i], k - parts[i]));
}
return splits;
}
示例9: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Table name is not set for export");
} else if (null == dbConf.getOutputFieldNames()
&& 0 == dbConf.getOutputFieldCount()) {
throw new IOException(
"Output field names are null and zero output field count set.");
}
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:19,代码来源:SQLServerResilientExportOutputFormat.java
示例10: cleanupJob
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
@Deprecated
public void cleanupJob(JobContext context) throws IOException {
if (hasOutputPath()) {
Path pendingJobAttemptsPath = getPendingJobAttemptsPath();
FileSystem fs = pendingJobAttemptsPath
.getFileSystem(context.getConfiguration());
fs.delete(pendingJobAttemptsPath, true);
} else {
LOG.warn("Output Path is null in cleanupJob()");
}
}
示例11: getOutputIndexFileForWriteInVolume
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
/**
* Create a local map output index file name on the same volume.
*/
public Path getOutputIndexFileForWriteInVolume(Path existing) {
Path outputDir = new Path(existing.getParent(), JOB_OUTPUT_DIR);
Path attemptOutputDir = new Path(outputDir,
conf.get(JobContext.TASK_ATTEMPT_ID));
return new Path(attemptOutputDir, MAP_OUTPUT_FILENAME_STRING +
MAP_OUTPUT_INDEX_SUFFIX_STRING);
}
示例12: commitJob
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
/**
* The job has completed so move all committed tasks to the final output dir.
* Delete the temporary directory, including all of the work directories.
* Create a _SUCCESS file to make it as successful.
* @param context the job's context
*/
public void commitJob(JobContext context) throws IOException {
if (hasOutputPath()) {
Path finalOutput = getOutputPath();
FileSystem fs = finalOutput.getFileSystem(context.getConfiguration());
if (algorithmVersion == 1) {
for (FileStatus stat: getAllCommittedTaskPaths(context)) {
mergePaths(fs, stat, finalOutput);
}
}
// delete the _temporary folder and create a _done file in the o/p folder
cleanupJob(context);
// True if the job requires output.dir marked on successful job.
// Note that by default it is set to true.
if (context.getConfiguration().getBoolean(SUCCESSFUL_JOB_OUTPUT_DIR_MARKER, true)) {
Path markerPath = new Path(outputPath, SUCCEEDED_FILE_NAME);
fs.create(markerPath).close();
}
} else {
LOG.warn("Output Path is null in commitJob()");
}
}
示例13: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
/**
* Returns a split for each store files directory using the block location
* of each file as locality reference.
*/
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
List<FileStatus> files = listStatus(job);
Text key = new Text();
for (FileStatus file: files) {
Path path = file.getPath();
FileSystem fs = path.getFileSystem(job.getConfiguration());
LineReader reader = new LineReader(fs.open(path));
long pos = 0;
int n;
try {
while ((n = reader.readLine(key)) > 0) {
String[] hosts = getStoreDirHosts(fs, path);
splits.add(new FileSplit(path, pos, n, hosts));
pos += n;
}
} finally {
reader.close();
}
}
return splits;
}
示例14: testTransitionsAtFailed
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Test
public void testTransitionsAtFailed() throws IOException {
Configuration conf = new Configuration();
AsyncDispatcher dispatcher = new AsyncDispatcher();
dispatcher.init(conf);
dispatcher.start();
OutputCommitter committer = mock(OutputCommitter.class);
doThrow(new IOException("forcefail"))
.when(committer).setupJob(any(JobContext.class));
CommitterEventHandler commitHandler =
createCommitterEventHandler(dispatcher, committer);
commitHandler.init(conf);
commitHandler.start();
AppContext mockContext = mock(AppContext.class);
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(false);
JobImpl job = createStubbedJob(conf, dispatcher, 2, mockContext);
JobId jobId = job.getID();
job.handle(new JobEvent(jobId, JobEventType.JOB_INIT));
assertJobState(job, JobStateInternal.INITED);
job.handle(new JobStartEvent(jobId));
assertJobState(job, JobStateInternal.FAILED);
job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_COMPLETED));
assertJobState(job, JobStateInternal.FAILED);
job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_COMPLETED));
assertJobState(job, JobStateInternal.FAILED);
job.handle(new JobEvent(jobId, JobEventType.JOB_MAP_TASK_RESCHEDULED));
assertJobState(job, JobStateInternal.FAILED);
job.handle(new JobEvent(jobId, JobEventType.JOB_TASK_ATTEMPT_FETCH_FAILURE));
assertJobState(job, JobStateInternal.FAILED);
Assert.assertEquals(JobState.RUNNING, job.getState());
when(mockContext.hasSuccessfullyUnregistered()).thenReturn(true);
Assert.assertEquals(JobState.FAILED, job.getState());
dispatcher.stop();
commitHandler.stop();
}
示例15: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
// get the total data to be generated
long toGen =
jobCtxt.getConfiguration().getLong(GenerateData.GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
// get the total number of mappers configured
int totalMappersConfigured =
jobCtxt.getConfiguration().getInt(MRJobConfig.NUM_MAPS, -1);
if (totalMappersConfigured < 0) {
throw new IOException("Invalid/missing num mappers: "
+ totalMappersConfigured);
}
final long bytesPerTracker = toGen / totalMappersConfigured;
final ArrayList<InputSplit> splits =
new ArrayList<InputSplit>(totalMappersConfigured);
for (int i = 0; i < totalMappersConfigured; ++i) {
splits.add(new GenSplit(bytesPerTracker,
new String[] { "tracker_local" }));
}
return splits;
}