本文整理汇总了Java中org.apache.hadoop.mapred.JobConf类的典型用法代码示例。如果您正苦于以下问题:Java JobConf类的具体用法?Java JobConf怎么用?Java JobConf使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
JobConf类属于org.apache.hadoop.mapred包,在下文中一共展示了JobConf类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setup
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Before
@SuppressWarnings("unchecked") // mocked generics
public void setup() {
LOG.info(">>>> " + name.getMethodName());
job = new JobConf();
job.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, false);
jobWithRetry = new JobConf();
jobWithRetry.setBoolean(MRJobConfig.SHUFFLE_FETCH_RETRY_ENABLED, true);
id = TaskAttemptID.forName("attempt_0_1_r_1_1");
ss = mock(ShuffleSchedulerImpl.class);
mm = mock(MergeManagerImpl.class);
r = mock(Reporter.class);
metrics = mock(ShuffleClientMetrics.class);
except = mock(ExceptionReporter.class);
key = JobTokenSecretManager.createSecretKey(new byte[]{0,0,0,0});
connection = mock(HttpURLConnection.class);
allErrs = mock(Counters.Counter.class);
when(r.getCounter(anyString(), anyString())).thenReturn(allErrs);
ArrayList<TaskAttemptID> maps = new ArrayList<TaskAttemptID>(1);
maps.add(map1ID);
maps.add(map2ID);
when(ss.getMapsForHost(host)).thenReturn(maps);
}
示例2: testInputFormat
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
void testInputFormat(Class<? extends InputFormat> clazz) throws IOException {
final JobConf job = MapreduceTestingShim.getJobConf(mrCluster);
job.setInputFormat(clazz);
job.setOutputFormat(NullOutputFormat.class);
job.setMapperClass(ExampleVerifier.class);
job.setNumReduceTasks(0);
LOG.debug("submitting job.");
final RunningJob run = JobClient.runJob(job);
assertTrue("job failed!", run.isSuccessful());
assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getCounter());
assertEquals("Saw any instances of the filtered out row.", 0, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getCounter());
assertEquals("Saw the wrong number of instances of columnA.", 1, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getCounter());
assertEquals("Saw the wrong number of instances of columnB.", 1, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getCounter());
assertEquals("Saw the wrong count of values for the filtered-for row.", 2, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getCounter());
assertEquals("Saw the wrong count of values for the filtered-out row.", 0, run.getCounters()
.findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getCounter());
}
示例3: shouldCreateAndRunSubmittableJob
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Test
@SuppressWarnings({ "deprecation" })
public void shouldCreateAndRunSubmittableJob() throws Exception {
RowCounter rCounter = new RowCounter();
rCounter.setConf(HBaseConfiguration.create());
String[] args = new String[] { "\temp", "tableA", "column1", "column2",
"column3" };
JobConf jobConfig = rCounter.createSubmittableJob(args);
assertNotNull(jobConfig);
assertEquals(0, jobConfig.getNumReduceTasks());
assertEquals("rowcounter", jobConfig.getJobName());
assertEquals(jobConfig.getMapOutputValueClass(), Result.class);
assertEquals(jobConfig.getMapperClass(), RowCounterMapper.class);
assertEquals(jobConfig.get(TableInputFormat.COLUMN_LIST), Joiner.on(' ')
.join("column1", "column2", "column3"));
assertEquals(jobConfig.getMapOutputKeyClass(), ImmutableBytesWritable.class);
}
示例4: configure
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/** Mapper configuration.
* Extracts source and destination file system, as well as
* top-level paths on source and destination directories.
* Gets the named file systems, to be used later in map.
*/
public void configure(JobConf job)
{
destPath = new Path(job.get(DST_DIR_LABEL, "/"));
try {
destFileSys = destPath.getFileSystem(job);
} catch (IOException ex) {
throw new RuntimeException("Unable to get the named file system.", ex);
}
sizeBuf = job.getInt("copy.buf.size", 128 * 1024);
buffer = new byte[sizeBuf];
ignoreReadFailures = job.getBoolean(Options.IGNORE_READ_FAILURES.propertyname, false);
preserve_status = job.getBoolean(Options.PRESERVE_STATUS.propertyname, false);
if (preserve_status) {
preseved = FileAttribute.parse(job.get(PRESERVE_STATUS_LABEL));
}
update = job.getBoolean(Options.UPDATE.propertyname, false);
overwrite = !update && job.getBoolean(Options.OVERWRITE.propertyname, false);
skipCRCCheck = job.getBoolean(Options.SKIPCRC.propertyname, false);
this.job = job;
}
示例5: shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
* Check what the given number of reduce tasks for the given job configuration
* does not exceed the number of regions for the given table.
*/
@Test
public void shouldNumberOfReduceTaskNotExceedNumberOfRegionsForGivenTable()
throws IOException {
Assert.assertNotNull(presidentsTable);
Configuration cfg = UTIL.getConfiguration();
JobConf jobConf = new JobConf(cfg);
TableMapReduceUtil.setNumReduceTasks(TABLE_NAME, jobConf);
TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
TableMapReduceUtil.setScannerCaching(jobConf, 100);
assertEquals(1, jobConf.getNumReduceTasks());
assertEquals(100, jobConf.getInt("hbase.client.scanner.caching", 0));
jobConf.setNumReduceTasks(10);
TableMapReduceUtil.setNumMapTasks(TABLE_NAME, jobConf);
TableMapReduceUtil.limitNumReduceTasks(TABLE_NAME, jobConf);
assertEquals(1, jobConf.getNumReduceTasks());
}
示例6: writePasswordFile
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
* Writes the user's password to a tmp file with 0600 permissions.
* @return the filename used.
*/
public static String writePasswordFile(Configuration conf)
throws IOException {
// Create the temp file to hold the user's password.
String tmpDir = conf.get(
ConfigurationConstants.PROP_JOB_LOCAL_DIRECTORY, "/tmp/");
File tempFile = File.createTempFile("mysql-cnf", ".cnf", new File(tmpDir));
// Make the password file only private readable.
DirectImportUtils.setFilePermissions(tempFile, "0600");
// If we're here, the password file is believed to be ours alone. The
// inability to set chmod 0600 inside Java is troublesome. We have to
// trust that the external 'chmod' program in the path does the right
// thing, and returns the correct exit status. But given our inability to
// re-read the permissions associated with a file, we'll have to make do
// with this.
String password = DBConfiguration.getPassword((JobConf) conf);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(tempFile)));
w.write("[client]\n");
w.write("password=" + password + "\n");
w.close();
return tempFile.toString();
}
示例7: initTableReduceJob
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
* Use this before submitting a TableReduce job. It will
* appropriately set up the JobConf.
*
* @param table The output table.
* @param reducer The reducer class to use.
* @param job The current job configuration to adjust.
* @param partitioner Partitioner to use. Pass <code>null</code> to use
* default partitioner.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
* @throws IOException When determining the region count fails.
*/
public static void initTableReduceJob(String table,
Class<? extends TableReduce> reducer, JobConf job, Class partitioner,
boolean addDependencyJars) throws IOException {
job.setOutputFormat(TableOutputFormat.class);
job.setReducerClass(reducer);
job.set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
job.setStrings("io.serializations", job.get("io.serializations"),
MutationSerialization.class.getName(), ResultSerialization.class.getName());
if (partitioner == HRegionPartitioner.class) {
job.setPartitionerClass(HRegionPartitioner.class);
int regions =
MetaTableAccessor.getRegionCount(HBaseConfiguration.create(job), TableName.valueOf(table));
if (job.getNumReduceTasks() > regions) {
job.setNumReduceTasks(regions);
}
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
if (addDependencyJars) {
addDependencyJars(job);
}
initCredentials(job);
}
示例8: testMapredSplitSampler
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
* Verify SplitSampler contract in mapred.lib.InputSampler, which is added
* back for binary compatibility of M/R 1.x
*/
@Test (timeout = 30000)
@SuppressWarnings("unchecked") // IntWritable comparator not typesafe
public void testMapredSplitSampler() throws Exception {
final int TOT_SPLITS = 15;
final int NUM_SPLITS = 5;
final int STEP_SAMPLE = 5;
final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
org.apache.hadoop.mapred.lib.InputSampler.Sampler<IntWritable,NullWritable>
sampler = new org.apache.hadoop.mapred.lib.InputSampler.SplitSampler
<IntWritable,NullWritable>(NUM_SAMPLES, NUM_SPLITS);
int inits[] = new int[TOT_SPLITS];
for (int i = 0; i < TOT_SPLITS; ++i) {
inits[i] = i * STEP_SAMPLE;
}
Object[] samples = sampler.getSample(
new TestMapredInputSamplerIF(100000, TOT_SPLITS, inits),
new JobConf());
assertEquals(NUM_SAMPLES, samples.length);
Arrays.sort(samples, new IntWritable.Comparator());
for (int i = 0; i < NUM_SAMPLES; ++i) {
// mapred.lib.InputSampler.SplitSampler has a sampling step
assertEquals(i % STEP_SAMPLE + TOT_SPLITS * (i / STEP_SAMPLE),
((IntWritable)samples[i]).get());
}
}
示例9: runJob
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
/**
* Submit/run a map/reduce job.
*
* @param job
* @return true for success
* @throws IOException
*/
public static boolean runJob(JobConf job) throws IOException {
JobClient jc = new JobClient(job);
boolean sucess = true;
RunningJob running = null;
try {
running = jc.submitJob(job);
JobID jobId = running.getID();
System.out.println("Job " + jobId + " is submitted");
while (!running.isComplete()) {
System.out.println("Job " + jobId + " is still running.");
try {
Thread.sleep(60000);
} catch (InterruptedException e) {
}
running = jc.getJob(jobId);
}
sucess = running.isSuccessful();
} finally {
if (!sucess && (running != null)) {
running.killJob();
}
jc.close();
}
return sucess;
}
示例10: LocalFetcher
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public LocalFetcher(JobConf job, TaskAttemptID reduceId,
ShuffleSchedulerImpl<K, V> scheduler,
MergeManager<K,V> merger,
Reporter reporter, ShuffleClientMetrics metrics,
ExceptionReporter exceptionReporter,
SecretKey shuffleKey,
Map<TaskAttemptID, MapOutputFile> localMapFiles) {
super(job, reduceId, scheduler, merger, reporter, metrics,
exceptionReporter, shuffleKey);
this.job = job;
this.localMapFiles = localMapFiles;
setName("localfetcher#" + id);
setDaemon(true);
}
示例11: StreamXmlRecordReader
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public StreamXmlRecordReader(FSDataInputStream in, FileSplit split, Reporter reporter,
JobConf job, FileSystem fs) throws IOException {
super(in, split, reporter, job, fs);
beginMark_ = checkJobGet(CONF_NS + "begin");
endMark_ = checkJobGet(CONF_NS + "end");
maxRecSize_ = job_.getInt(CONF_NS + "maxrec", 50 * 1000);
lookAhead_ = job_.getInt(CONF_NS + "lookahead", 2 * maxRecSize_);
synched_ = false;
slowMatch_ = job_.getBoolean(CONF_NS + "slowmatch", false);
if (slowMatch_) {
beginPat_ = makePatternCDataOrMark(beginMark_);
endPat_ = makePatternCDataOrMark(endMark_);
}
init();
}
示例12: getSplits
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public List<InputSplit> getSplits(JobContext jobCtxt) throws IOException {
final JobConf jobConf = new JobConf(jobCtxt.getConfiguration());
final JobClient client = new JobClient(jobConf);
ClusterStatus stat = client.getClusterStatus(true);
int numTrackers = stat.getTaskTrackers();
final int fileCount = jobConf.getInt(GRIDMIX_DISTCACHE_FILE_COUNT, -1);
// Total size of distributed cache files to be generated
final long totalSize = jobConf.getLong(GRIDMIX_DISTCACHE_BYTE_COUNT, -1);
// Get the path of the special file
String distCacheFileList = jobConf.get(GRIDMIX_DISTCACHE_FILE_LIST);
if (fileCount < 0 || totalSize < 0 || distCacheFileList == null) {
throw new RuntimeException("Invalid metadata: #files (" + fileCount
+ "), total_size (" + totalSize + "), filelisturi ("
+ distCacheFileList + ")");
}
Path sequenceFile = new Path(distCacheFileList);
FileSystem fs = sequenceFile.getFileSystem(jobConf);
FileStatus srcst = fs.getFileStatus(sequenceFile);
// Consider the number of TTs * mapSlotsPerTracker as number of mappers.
int numMapSlotsPerTracker = jobConf.getInt(TTConfig.TT_MAP_SLOTS, 2);
int numSplits = numTrackers * numMapSlotsPerTracker;
List<InputSplit> splits = new ArrayList<InputSplit>(numSplits);
LongWritable key = new LongWritable();
BytesWritable value = new BytesWritable();
// Average size of data to be generated by each map task
final long targetSize = Math.max(totalSize / numSplits,
DistributedCacheEmulator.AVG_BYTES_PER_MAP);
long splitStartPosition = 0L;
long splitEndPosition = 0L;
long acc = 0L;
long bytesRemaining = srcst.getLen();
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, sequenceFile, jobConf);
while (reader.next(key, value)) {
// If adding this file would put this split past the target size,
// cut the last split and put this file in the next split.
if (acc + key.get() > targetSize && acc != 0) {
long splitSize = splitEndPosition - splitStartPosition;
splits.add(new FileSplit(
sequenceFile, splitStartPosition, splitSize, (String[])null));
bytesRemaining -= splitSize;
splitStartPosition = splitEndPosition;
acc = 0L;
}
acc += key.get();
splitEndPosition = reader.getPosition();
}
} finally {
if (reader != null) {
reader.close();
}
}
if (bytesRemaining != 0) {
splits.add(new FileSplit(
sequenceFile, splitStartPosition, bytesRemaining, (String[])null));
}
return splits;
}
示例13: configure
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public void configure(JobConf job) {
try {
HTable exampleTable = new HTable(HBaseConfiguration.create(job),
Bytes.toBytes("exampleDeprecatedTable"));
// mandatory
setHTable(exampleTable);
byte[][] inputColumns = new byte [][] { Bytes.toBytes("columnA"),
Bytes.toBytes("columnB") };
// mandatory
setInputColumns(inputColumns);
Filter exampleFilter = new RowFilter(CompareOp.EQUAL, new RegexStringComparator("aa.*"));
// optional
setRowFilter(exampleFilter);
} catch (IOException exception) {
throw new RuntimeException("Failed to configure for job.", exception);
}
}
示例14: setConf
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
public void setConf(Configuration conf) {
if (conf instanceof JobConf) {
this.conf = (JobConf) conf;
} else {
this.conf = new JobConf(conf);
}
//Initialize the specification for *comparision*
String sortColumns = this.conf.get(SORT_COLUMNS, null);
if (sortColumns != null) {
sortSpec = sortColumns.split(",");
}
//Column-separator
columnSeparator = this.conf.get(COLUMN_SEPARATOR, "");
}
示例15: obtainAuthTokenForJob
import org.apache.hadoop.mapred.JobConf; //导入依赖的package包/类
@Override
public void obtainAuthTokenForJob(JobConf job)
throws IOException, InterruptedException {
try {
Class<?> c = Class.forName(
"org.apache.hadoop.hbase.security.token.TokenUtil");
Methods.call(c, null, "obtainTokenForJob",
new Class[]{JobConf.class, UserGroupInformation.class},
new Object[]{job, ugi});
} catch (ClassNotFoundException cnfe) {
throw new RuntimeException("Failure loading TokenUtil class, "
+"is secure RPC available?", cnfe);
} catch (IOException ioe) {
throw ioe;
} catch (InterruptedException ie) {
throw ie;
} catch (RuntimeException re) {
throw re;
} catch (Exception e) {
throw new UndeclaredThrowableException(e,
"Unexpected error calling TokenUtil.obtainAndCacheToken()");
}
}