本文整理汇总了Java中org.apache.hadoop.mapreduce.Job.getConfiguration方法的典型用法代码示例。如果您正苦于以下问题:Java Job.getConfiguration方法的具体用法?Java Job.getConfiguration怎么用?Java Job.getConfiguration使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.Job
的用法示例。
在下文中一共展示了Job.getConfiguration方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: configurePartitioner
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Configure <code>job</code> with a TotalOrderPartitioner, partitioning against
* <code>splitPoints</code>. Cleans up the partitions file after job exists.
*/
static void configurePartitioner(Job job, List<ImmutableBytesWritable> splitPoints)
throws IOException {
Configuration conf = job.getConfiguration();
// create the partitions file
FileSystem fs = FileSystem.get(conf);
String hbaseTmpFsDir =
conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
Path partitionsPath = new Path(hbaseTmpFsDir, "partitions_" + UUID.randomUUID());
fs.makeQualified(partitionsPath);
writePartitions(conf, partitionsPath, splitPoints);
fs.deleteOnExit(partitionsPath);
// configure job to use it
job.setPartitionerClass(TotalOrderPartitioner.class);
TotalOrderPartitioner.setPartitionFile(conf, partitionsPath);
}
示例2: testNewCounterB
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test
public void testNewCounterB() throws Exception {
final Job job = createJob();
final Configuration conf = job.getConfiguration();
conf.setInt(JobContext.IO_SORT_FACTOR, 2);
createWordsFile(inFiles[3], conf);
removeWordsFile(inFiles[4], conf);
long inputSize = 0;
inputSize += getFileSize(inFiles[0]);
inputSize += getFileSize(inFiles[1]);
inputSize += getFileSize(inFiles[2]);
inputSize += getFileSize(inFiles[3]);
org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(
job, IN_DIR);
org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(
job, new Path(OUT_DIR, "outputN1"));
assertTrue(job.waitForCompletion(true));
final Counters c1 = Counters.downgrade(job.getCounters());
validateCounters(c1, 98304, 20480, 81920);
validateFileCounters(c1, inputSize, 0, 0, 0);
}
示例3: readSplit
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static List<Text> readSplit(KeyValueTextInputFormat format,
InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(conf);
RecordReader<Text, Text> reader = format.createRecordReader(split,
MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<Text, Text, Text, Text> mcontext =
new MapContextImpl<Text, Text, Text, Text>(conf,
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
reader.close();
return result;
}
示例4: testEmptyOutput
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
public void testEmptyOutput() throws Exception {
Job job = Job.getInstance();
FileOutputFormat.setOutputPath(job, outDir);
Configuration conf = job.getConfiguration();
conf.set(MRJobConfig.TASK_ATTEMPT_ID, attempt);
JobContext jContext = new JobContextImpl(conf, taskID.getJobID());
TaskAttemptContext tContext = new TaskAttemptContextImpl(conf, taskID);
FileOutputCommitter committer = new FileOutputCommitter(outDir, tContext);
// setup
committer.setupJob(jContext);
committer.setupTask(tContext);
// Do not write any output
// do commit
committer.commitTask(tContext);
committer.commitJob(jContext);
FileUtil.fullyDelete(new File(outDir.toString()));
}
示例5: readSplit
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
private static List<Text> readSplit(InputFormat<LongWritable,Text> format,
InputSplit split, Job job) throws IOException, InterruptedException {
List<Text> result = new ArrayList<Text>();
Configuration conf = job.getConfiguration();
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(conf);
RecordReader<LongWritable, Text> reader = format.createRecordReader(split,
MapReduceTestUtil.createDummyMapTaskAttemptContext(conf));
MapContext<LongWritable,Text,LongWritable,Text> mcontext =
new MapContextImpl<LongWritable,Text,LongWritable,Text>(conf,
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(),
split);
reader.initialize(split, mcontext);
while (reader.nextKeyValue()) {
result.add(new Text(reader.getCurrentValue()));
}
return result;
}
示例6: propagateOptionsToJob
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Override
protected void propagateOptionsToJob(Job job) {
super.propagateOptionsToJob(job);
Configuration conf = job.getConfiguration();
// This is export job where re-trying failed mapper mostly don't make sense. By
// default we will force MR to run only one attempt per mapper. User or connector
// developer can override this behavior by setting SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS:
//
// * Positive number - we will allow specified number of attempts
// * Negative number - we will default to Hadoop's default number of attempts
//
// This is important for most connectors as they are directly committing data to
// final table and hence re-running one mapper will lead to a misleading errors
// of inserting duplicate rows.
int sqoopMaxAttempts = conf.getInt(SQOOP_EXPORT_MAP_TASK_MAX_ATTEMTPS, 1);
if (sqoopMaxAttempts > 1) {
conf.setInt(HADOOP_MAP_TASK_MAX_ATTEMTPS, sqoopMaxAttempts);
}
}
示例7: initScans
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* 初始化scan集合
*
* @param job
* @return
*/
private List<Scan> initScans(Job job) {
Configuration conf = job.getConfiguration();
// 获取运行时间: yyyy-MM-dd
String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
long startDate = TimeUtil.parseString2Long(date);
long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;
Scan scan = new Scan();
// 定义hbase扫描的开始rowkey和结束rowkey
scan.setStartRow(Bytes.toBytes("" + startDate));
scan.setStopRow(Bytes.toBytes("" + endDate));
FilterList filterList = new FilterList();
// 定义mapper中需要获取的列名
String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_UUID, // 用户id
EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME, // 服务器时间
EventLogConstants.LOG_COLUMN_NAME_PLATFORM, // 平台名称
EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME, // 浏览器名称
EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION // 浏览器版本号
};
filterList.addFilter(this.getColumnFilter(columns));
scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
scan.setFilter(filterList);
return Lists.newArrayList(scan);
}
示例8: initScans
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* 初始化scan集合
*
* @param job
* @return
*/
private List<Scan> initScans(Job job) {
// 时间戳+....
Configuration conf = job.getConfiguration();
// 获取运行时间: yyyy-MM-dd
String date = conf.get(GlobalConstants.RUNNING_DATE_PARAMES);
long startDate = TimeUtil.parseString2Long(date);
long endDate = startDate + GlobalConstants.DAY_OF_MILLISECONDS;
Scan scan = new Scan();
// 定义hbase扫描的开始rowkey和结束rowkey
scan.setStartRow(Bytes.toBytes(Long.toString(startDate)));
scan.setStopRow(Bytes.toBytes(Long.toString(endDate)));
FilterList filterList = new FilterList();
// 过滤数据,只分析launch事件
filterList.addFilter(new SingleColumnValueFilter(Bytes.toBytes(EventLogConstants.EVENT_LOGS_FAMILY_NAME),
Bytes.toBytes(EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME), CompareOp.EQUAL,
Bytes.toBytes(EventEnum.LAUNCH.alias)));
// 定义mapper中需要获取的列名
String[] columns = new String[] { EventLogConstants.LOG_COLUMN_NAME_EVENT_NAME,
EventLogConstants.LOG_COLUMN_NAME_UUID, EventLogConstants.LOG_COLUMN_NAME_SERVER_TIME,
EventLogConstants.LOG_COLUMN_NAME_PLATFORM, EventLogConstants.LOG_COLUMN_NAME_BROWSER_NAME,
EventLogConstants.LOG_COLUMN_NAME_BROWSER_VERSION };
// scan.addColumn(family, qualifier)
filterList.addFilter(this.getColumnFilter(columns));
scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(EventLogConstants.HBASE_NAME_EVENT_LOGS));
scan.setFilter(filterList);
return Lists.newArrayList(scan);
}
示例9: completeImport
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Perform the loading of Hfiles.
*/
@Override
protected void completeImport(Job job) throws IOException, ImportException {
super.completeImport(job);
FileSystem fileSystem = FileSystem.get(job.getConfiguration());
// Make the bulk load files source directory accessible to the world
// so that the hbase user can deal with it
Path bulkLoadDir = getContext().getDestination();
setPermission(fileSystem, fileSystem.getFileStatus(bulkLoadDir),
FsPermission.createImmutable((short) 00777));
HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());
// Load generated HFiles into table
try {
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(
job.getConfiguration());
loader.doBulkLoad(bulkLoadDir, hTable);
}
catch (Exception e) {
String errorMessage = String.format("Unrecoverable error while " +
"performing the bulk load of files in [%s]",
bulkLoadDir.toString());
throw new ImportException(errorMessage, e);
}
}
示例10: testNegativeRecordLength
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Test with record length set to a negative value
*/
@Test (timeout=5000)
public void testNegativeRecordLength() throws Exception {
localFs.delete(workDir, true);
Path file = new Path(workDir, new String("testFormat.txt"));
createFile(file, null, 10, 10);
// Set the fixed length record length config property
Job job = Job.getInstance(defaultConf);
FixedLengthInputFormat format = new FixedLengthInputFormat();
format.setRecordLength(job.getConfiguration(), -10);
FileInputFormat.setInputPaths(job, workDir);
List<InputSplit> splits = format.getSplits(job);
boolean exceptionThrown = false;
for (InputSplit split : splits) {
try {
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, BytesWritable> reader =
format.createRecordReader(split, context);
MapContext<LongWritable, BytesWritable, LongWritable, BytesWritable>
mcontext =
new MapContextImpl<LongWritable, BytesWritable, LongWritable,
BytesWritable>(job.getConfiguration(), context.getTaskAttemptID(),
reader, null, null, MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
} catch(IOException ioe) {
exceptionThrown = true;
LOG.info("Exception message:" + ioe.getMessage());
}
}
assertTrue("Exception for negative record length:", exceptionThrown);
}
示例11: addInputPath
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Add a {@link Path} with a custom {@link InputFormat} to the list of
* inputs for the map-reduce job.
*
* @param job The {@link Job}
* @param path {@link Path} to be added to the list of inputs for the job
* @param inputFormatClass {@link InputFormat} class to use for this path
*/
@SuppressWarnings("unchecked")
public static void addInputPath(Job job, Path path,
Class<? extends InputFormat> inputFormatClass) {
String inputFormatMapping = path.toString() + ";"
+ inputFormatClass.getName();
Configuration conf = job.getConfiguration();
String inputFormats = conf.get(DIR_FORMATS);
conf.set(DIR_FORMATS,
inputFormats == null ? inputFormatMapping : inputFormats + ","
+ inputFormatMapping);
job.setInputFormatClass(DelegatingInputFormat.class);
}
示例12: getSample
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* From each split sampled, take the first numSamples / numSplits records.
*/
@SuppressWarnings("unchecked") // ArrayList::toArray doesn't preserve type
public K[] getSample(InputFormat<K,V> inf, Job job)
throws IOException, InterruptedException {
List<InputSplit> splits = inf.getSplits(job);
ArrayList<K> samples = new ArrayList<K>(numSamples);
int splitsToSample = Math.min(maxSplitsSampled, splits.size());
int samplesPerSplit = numSamples / splitsToSample;
long records = 0;
for (int i = 0; i < splitsToSample; ++i) {
TaskAttemptContext samplingContext = new TaskAttemptContextImpl(
job.getConfiguration(), new TaskAttemptID());
RecordReader<K,V> reader = inf.createRecordReader(
splits.get(i), samplingContext);
reader.initialize(splits.get(i), samplingContext);
while (reader.nextKeyValue()) {
samples.add(ReflectionUtils.copy(job.getConfiguration(),
reader.getCurrentKey(), null));
++records;
if ((i+1) * samplesPerSplit <= records) {
break;
}
}
reader.close();
}
return (K[])samples.toArray();
}
示例13: addInputPath
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
/**
* Add a {@link Path} to the list of inputs for the map-reduce job.
*
* @param job The {@link Job} to modify
* @param path {@link Path} to be added to the list of inputs for
* the map-reduce job.
*/
public static void addInputPath(Job job,
Path path) throws IOException {
Configuration conf = job.getConfiguration();
path = path.getFileSystem(conf).makeQualified(path);
String dirStr = StringUtils.escapeString(path.toString());
String dirs = conf.get(INPUT_DIR);
conf.set(INPUT_DIR, dirs == null ? dirStr : dirs + "," + dirStr);
}
示例14: testFormat
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
@Test(timeout=10000)
public void testFormat() throws Exception {
Job job = Job.getInstance(new Configuration(defaultConf));
Random random = new Random();
long seed = random.nextLong();
LOG.info("seed = " + seed);
random.setSeed(seed);
localFs.delete(workDir, true);
FileInputFormat.setInputPaths(job, workDir);
final int length = 10000;
final int numFiles = 10;
// create files with various lengths
createFiles(length, numFiles, random);
// create a combined split for the files
CombineTextInputFormat format = new CombineTextInputFormat();
for (int i = 0; i < 3; i++) {
int numSplits = random.nextInt(length/20) + 1;
LOG.info("splitting: requesting = " + numSplits);
List<InputSplit> splits = format.getSplits(job);
LOG.info("splitting: got = " + splits.size());
// we should have a single split as the length is comfortably smaller than
// the block size
assertEquals("We got more than one splits!", 1, splits.size());
InputSplit split = splits.get(0);
assertEquals("It should be CombineFileSplit",
CombineFileSplit.class, split.getClass());
// check the split
BitSet bits = new BitSet(length);
LOG.debug("split= " + split);
TaskAttemptContext context = MapReduceTestUtil.
createDummyMapTaskAttemptContext(job.getConfiguration());
RecordReader<LongWritable, Text> reader =
format.createRecordReader(split, context);
assertEquals("reader class is CombineFileRecordReader.",
CombineFileRecordReader.class, reader.getClass());
MapContext<LongWritable,Text,LongWritable,Text> mcontext =
new MapContextImpl<LongWritable,Text,LongWritable,Text>(job.getConfiguration(),
context.getTaskAttemptID(), reader, null, null,
MapReduceTestUtil.createDummyReporter(), split);
reader.initialize(split, mcontext);
try {
int count = 0;
while (reader.nextKeyValue()) {
LongWritable key = reader.getCurrentKey();
assertNotNull("Key should not be null.", key);
Text value = reader.getCurrentValue();
final int v = Integer.parseInt(value.toString());
LOG.debug("read " + v);
assertFalse("Key in multiple partitions.", bits.get(v));
bits.set(v);
count++;
}
LOG.debug("split=" + split + " count=" + count);
} finally {
reader.close();
}
assertEquals("Some keys in no partition.", length, bits.cardinality());
}
}
示例15: configureIncrementalLoad
import org.apache.hadoop.mapreduce.Job; //导入方法依赖的package包/类
static void configureIncrementalLoad(Job job, HTableDescriptor tableDescriptor,
RegionLocator regionLocator, Class<? extends OutputFormat<?, ?>> cls) throws IOException,
UnsupportedEncodingException {
Configuration conf = job.getConfiguration();
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(KeyValue.class);
job.setOutputFormatClass(cls);
// Based on the configured map output class, set the correct reducer to properly
// sort the incoming values.
// TODO it would be nice to pick one or the other of these formats.
if (KeyValue.class.equals(job.getMapOutputValueClass())) {
job.setReducerClass(KeyValueSortReducer.class);
} else if (Put.class.equals(job.getMapOutputValueClass())) {
job.setReducerClass(PutSortReducer.class);
} else if (Text.class.equals(job.getMapOutputValueClass())) {
job.setReducerClass(TextSortReducer.class);
} else {
LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass());
}
conf.setStrings("io.serializations", conf.get("io.serializations"),
MutationSerialization.class.getName(), ResultSerialization.class.getName(),
KeyValueSerialization.class.getName());
// Use table's region boundaries for TOP split points.
LOG.info("Looking up current regions for table " + tableDescriptor.getTableName());
List<ImmutableBytesWritable> startKeys = getRegionStartKeys(regionLocator);
LOG.info("Configuring " + startKeys.size() + " reduce partitions " +
"to match current region count");
job.setNumReduceTasks(startKeys.size());
configurePartitioner(job, startKeys);
// Set compression algorithms based on column families
configureCompression(conf, tableDescriptor);
configureBloomType(tableDescriptor, conf);
configureBlockSize(tableDescriptor, conf);
configureDataBlockEncoding(tableDescriptor, conf);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
LOG.info("Incremental table " + regionLocator.getName() + " output configured.");
}