本文整理汇总了Java中org.apache.hadoop.mapreduce.JobContext.getConfiguration方法的典型用法代码示例。如果您正苦于以下问题:Java JobContext.getConfiguration方法的具体用法?Java JobContext.getConfiguration怎么用?Java JobContext.getConfiguration使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.JobContext
的用法示例。
在下文中一共展示了JobContext.getConfiguration方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
long maxSize = 0;
Configuration conf = job.getConfiguration();
maxSize = conf.getLong("mapreduce.input.fileinputformat.split.maxsize", 0);
// all the files in input set
List<FileStatus> stats = listStatus(job);
List<InputSplit> splits = new ArrayList<>();
if (stats.size() == 0) {
return splits;
}
getMoreSplits(conf, stats, maxSize, 0, 0, splits);
return splits;
}
示例2: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobClient client =
new JobClient(new JobConf(jobCtxt.getConfiguration()));
ClusterStatus stat = client.getClusterStatus(true);
final long toGen =
jobCtxt.getConfiguration().getLong(GRIDMIX_GEN_BYTES, -1);
if (toGen < 0) {
throw new IOException("Invalid/missing generation bytes: " + toGen);
}
final int nTrackers = stat.getTaskTrackers();
final long bytesPerTracker = toGen / nTrackers;
final ArrayList<InputSplit> splits = new ArrayList<InputSplit>(nTrackers);
final Pattern trackerPattern = Pattern.compile("tracker_([^:]*):.*");
final Matcher m = trackerPattern.matcher("");
for (String tracker : stat.getActiveTrackerNames()) {
m.reset(tracker);
if (!m.find()) {
System.err.println("Skipping node: " + tracker);
continue;
}
final String name = m.group(1);
splits.add(new GenSplit(bytesPerTracker, new String[] { name }));
}
return splits;
}
示例3: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Procedure name is not set for export");
} else if (null == dbConf.getOutputFieldNames()
&& 0 == dbConf.getOutputFieldCount()) {
throw new IOException(
"Output field names are null and zero output field count set.");
}
}
示例4: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Table name is not set for export.");
} else if (null == dbConf.getOutputFieldNames()) {
throw new IOException(
"Output field names are null.");
} else if (null == conf.get(ExportJobBase.SQOOP_EXPORT_UPDATE_COL_KEY)) {
throw new IOException("Update key column is not set for export.");
}
}
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:20,代码来源:SQLServerResilientUpdateOutputFormat.java
示例5: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
/** {@inheritDoc} */
public void checkOutputSpecs(JobContext context)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
DBConfiguration dbConf = new DBConfiguration(conf);
// Sanity check all the configuration values we need.
if (null == conf.get(DBConfiguration.URL_PROPERTY)) {
throw new IOException("Database connection URL is not set.");
} else if (null == dbConf.getOutputTableName()) {
throw new IOException("Table name is not set for export");
} else if (null == dbConf.getOutputFieldNames()
&& 0 == dbConf.getOutputFieldCount()) {
throw new IOException(
"Output field names are null and zero output field count set.");
}
}
示例6: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Path snapshotDir = new Path(conf.get(CONF_SNAPSHOT_DIR));
FileSystem fs = FileSystem.get(snapshotDir.toUri(), conf);
List<Pair<SnapshotFileInfo, Long>> snapshotFiles = getSnapshotFiles(conf, fs, snapshotDir);
int mappers = conf.getInt(CONF_NUM_SPLITS, 0);
if (mappers == 0 && snapshotFiles.size() > 0) {
mappers = 1 + (snapshotFiles.size() / conf.getInt(CONF_MAP_GROUP, 10));
mappers = Math.min(mappers, snapshotFiles.size());
conf.setInt(CONF_NUM_SPLITS, mappers);
conf.setInt(MR_NUM_MAPS, mappers);
}
List<List<Pair<SnapshotFileInfo, Long>>> groups = getBalancedSplits(snapshotFiles, mappers);
List<InputSplit> splits = new ArrayList(groups.size());
for (List<Pair<SnapshotFileInfo, Long>> files: groups) {
splits.add(new ExportSnapshotInputSplit(files));
}
return splits;
}
示例7: getOutputCompressorClass
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
* Get the {@link CompressionCodec} for compressing the job outputs.
* @param job the {@link Job} to look in
* @param defaultValue the {@link CompressionCodec} to return if not set
* @return the {@link CompressionCodec} to be used to compress the
* job outputs
* @throws IllegalArgumentException if the class was specified, but not found
*/
public static Class<? extends CompressionCodec>
getOutputCompressorClass(JobContext job,
Class<? extends CompressionCodec> defaultValue) {
Class<? extends CompressionCodec> codecClass = defaultValue;
Configuration conf = job.getConfiguration();
String name = conf.get(FileOutputFormat.COMPRESS_CODEC);
if (name != null) {
try {
codecClass =
conf.getClassByName(name).asSubclass(CompressionCodec.class);
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Compression codec " + name +
" was not found.", e);
}
}
return codecClass;
}
示例8: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
* implementation shared with deprecated HLogInputFormat
*/
List<InputSplit> getSplits(final JobContext context, final String startKey, final String endKey)
throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
Path inputDir = new Path(conf.get("mapreduce.input.fileinputformat.inputdir"));
long startTime = conf.getLong(startKey, Long.MIN_VALUE);
long endTime = conf.getLong(endKey, Long.MAX_VALUE);
FileSystem fs = inputDir.getFileSystem(conf);
List<FileStatus> files = getFiles(fs, inputDir, startTime, endTime);
List<InputSplit> splits = new ArrayList<InputSplit>(files.size());
for (FileStatus file : files) {
splits.add(new WALSplit(file.getPath().toString(), file.getLen(), startTime, endTime));
}
return splits;
}
示例9: commitJob
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/** @inheritDoc */
@Override
public void commitJob(JobContext jobContext) throws IOException {
Configuration conf = jobContext.getConfiguration();
super.commitJob(jobContext);
try {
taskAttemptContext.setStatus("Commit Successful");
} finally {
cleanup(conf);
}
}
示例10: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
* Implementation of InputFormat::getSplits(). Returns a list of InputSplits, such that the number of bytes to be
* copied for all the splits are approximately equal.
*
* @param context JobContext for the job.
* @return The list of uniformly-distributed input-splits.
* @throws IOException: On failure.
* @throws InterruptedException
*/
@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
int numSplits = ConfigurationUtil.getInt(configuration, MRJobConfig.NUM_MAPS);
if (numSplits == 0) {
return new ArrayList<>();
}
return getSplits(configuration, numSplits,
ConfigurationUtil.getLong(configuration, S3MapReduceCpConstants.CONF_LABEL_TOTAL_BYTES_TO_BE_COPIED));
}
示例11: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public void checkOutputSpecs(JobContext job
) throws InvalidJobConfException, IOException {
// Ensure that the output directory is set
Path outDir = getOutputPath(job);
if (outDir == null) {
throw new InvalidJobConfException("Output directory not set in JobConf.");
}
final Configuration jobConf = job.getConfiguration();
// get delegation token for outDir's file system
TokenCache.obtainTokensForNamenodes(job.getCredentials(),
new Path[] { outDir }, jobConf);
final FileSystem fs = outDir.getFileSystem(jobConf);
if (fs.exists(outDir)) {
// existing output dir is considered empty iff its only content is the
// partition file.
//
final FileStatus[] outDirKids = fs.listStatus(outDir);
boolean empty = false;
if (outDirKids != null && outDirKids.length == 1) {
final FileStatus st = outDirKids[0];
final String fname = st.getPath().getName();
empty =
!st.isDirectory() && TeraInputFormat.PARTITION_FILENAME.equals(fname);
}
if (TeraSort.getUseSimplePartitioner(job) || !empty) {
throw new FileAlreadyExistsException("Output directory " + outDir
+ " already exists");
}
}
}
示例12: checkOutputSpecs
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public void checkOutputSpecs(JobContext context) throws IOException,
InterruptedException {
super.checkOutputSpecs(context);
Configuration conf = context.getConfiguration();
// This code is now running on a Datanode in the Hadoop cluster, so we
// need to enable debug logging in this JVM...
OraOopUtilities.enableDebugLoggingIfRequired(conf);
}
示例13: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/** @return a list containing a single split of summation */
@Override
public List<InputSplit> getSplits(JobContext context) {
//read sigma from conf
final Configuration conf = context.getConfiguration();
final Summation sigma = SummationWritable.read(DistSum.class, conf);
//create splits
final List<InputSplit> splits = new ArrayList<InputSplit>(1);
splits.add(new SummationSplit(sigma));
return splits;
}
示例14: getSplits
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
List<InputSplit> splits = new ArrayList<InputSplit>();
Configuration conf = job.getConfiguration();
String dsName
= conf.get(MainframeConfiguration.MAINFRAME_INPUT_DATASET_NAME);
LOG.info("Datasets to transfer from: " + dsName);
List<String> datasets = retrieveDatasets(dsName, conf);
if (datasets.isEmpty()) {
throw new IOException ("No sequential datasets retrieved from " + dsName);
} else {
int count = datasets.size();
int chunks = Math.min(count, ConfigurationHelper.getJobNumMaps(job));
for (int i = 0; i < chunks; i++) {
splits.add(new MainframeDatasetInputSplit());
}
int j = 0;
while(j < count) {
for (InputSplit sp : splits) {
if (j == count) {
break;
}
((MainframeDatasetInputSplit)sp).addDataset(datasets.get(j));
j++;
}
}
}
return splits;
}
示例15: getInputPathFilter
import org.apache.hadoop.mapreduce.JobContext; //导入方法依赖的package包/类
/**
* Get a PathFilter instance of the filter set for the input paths.
*
* @return the PathFilter instance set for the job, NULL if none has been set.
*/
public static PathFilter getInputPathFilter(JobContext context) {
Configuration conf = context.getConfiguration();
Class<?> filterClass = conf.getClass(PATHFILTER_CLASS, null,
PathFilter.class);
return (filterClass != null) ?
(PathFilter) ReflectionUtils.newInstance(filterClass, conf) : null;
}