本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.NLineInputFormat.setNumLinesPerSplit方法的典型用法代码示例。如果您正苦于以下问题:Java NLineInputFormat.setNumLinesPerSplit方法的具体用法?Java NLineInputFormat.setNumLinesPerSplit怎么用?Java NLineInputFormat.setNumLinesPerSplit使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.NLineInputFormat
的用法示例。
在下文中一共展示了NLineInputFormat.setNumLinesPerSplit方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setupJob
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
public static void setupJob(Job job, int minFeaturesPerSplit, long featureCount)
{
if (minFeaturesPerSplit > 0)
{
if (featureCount < 0)
{
throw new IllegalArgumentException("Expected a feature count");
}
int maxMapTasks = job.getConfiguration().getInt("mapred.tasktracker.map.tasks.maximum", -1);
if (maxMapTasks > 0)
{
int featuresPerSplit = (int) (featureCount / maxMapTasks);
if (featuresPerSplit < minFeaturesPerSplit)
{
featuresPerSplit = minFeaturesPerSplit;
}
job.getConfiguration().setBoolean(USE_NLINE_FORMAT, true);
NLineInputFormat.setNumLinesPerSplit(job, featuresPerSplit);
}
}
}
示例2: setNumLinesPerSplit
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
* Java wrapper for {@link NLineInputFormat#setNumLinesPerSplit(org.apache.hadoop.mapreduce.Job, int)}.
*
* @param ctx the JavaScript context
* @param thisObj the 'this' object
* @param args the function arguments
* @param func the function called (unused)
*/
@JSStaticFunction
public static void setNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
final Function func) {
final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
final Object arg1 = args.length >= 2 ? args[1] : Undefined.instance;
if (args.length < 2) {
throw Utils.makeError(ctx, thisObj, LembosMessages.TWO_ARGS_EXPECTED);
} else if (!JavaScriptUtils.isDefined(arg0)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
} else if (!JavaScriptUtils.isDefined(arg1)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_REQUIRED);
} else if (!(arg0 instanceof JobWrap)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
} else if (!(arg1 instanceof Number)) {
throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_ARG_MUST_BE_NUM);
}
NLineInputFormat.setNumLinesPerSplit(((JobWrap)arg0).getJob(), JavaScriptUtils.fromNumber(arg1).intValue());
}
示例3: doMapReduce
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
DescriptiveStatistics.class, // commons-math
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
示例4: doMapReduce
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = getConf();
Path inputDir = writeInputFile(conf, opts);
conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
Job job = new Job(conf);
job.setJarByClass(PerformanceEvaluation.class);
job.setJobName("HBase Performance Evaluation");
job.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.setInputPaths(job, inputDir);
// this is default, but be explicit about it just in case.
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(LongWritable.class);
job.setMapperClass(EvaluationMapTask.class);
job.setReducerClass(LongSumReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
Histogram.class, // yammer metrics
ObjectMapper.class); // jackson-mapper-asl
TableMapReduceUtil.initCredentials(job);
job.waitForCompletion(true);
}
示例5: run
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] <input_file_with_SPARQL_queries> <output_path> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setJarByClass(HalyardBulkUpdate.class);
job.setMapperClass(SPARQLMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0)) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Update Completed..");
return 0;
}
}
return -1;
}
示例6: prepareHadoopJob
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
* Prepare the Hadoop MR job, including configuring the job and setting up the input/output paths.
*/
private Path prepareHadoopJob(List<WorkUnit> workUnits) throws IOException {
TimingEvent mrJobSetupTimer = this.eventSubmitter.getTimingEvent(TimingEventNames.RunJobTimings.MR_JOB_SETUP);
this.job.setJarByClass(MRJobLauncher.class);
this.job.setMapperClass(TaskRunner.class);
// The job is mapper-only
this.job.setNumReduceTasks(0);
this.job.setInputFormatClass(NLineInputFormat.class);
this.job.setOutputFormatClass(GobblinOutputFormat.class);
this.job.setMapOutputKeyClass(NullWritable.class);
this.job.setMapOutputValueClass(NullWritable.class);
// Turn off speculative execution
this.job.setSpeculativeExecution(false);
// Job input path is where input work unit files are stored
Path jobInputPath = new Path(this.mrJobDir, INPUT_DIR_NAME);
// Prepare job input
Path jobInputFile = prepareJobInput(jobInputPath, workUnits);
NLineInputFormat.addInputPath(this.job, jobInputFile);
// Job output path is where serialized task states are stored
Path jobOutputPath = new Path(this.mrJobDir, OUTPUT_DIR_NAME);
SequenceFileOutputFormat.setOutputPath(this.job, jobOutputPath);
// Serialize source state to a file which will be picked up by the mappers
Path jobStateFilePath = new Path(this.mrJobDir, JOB_STATE_FILE_NAME);
SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());
if (this.jobProps.containsKey(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY)) {
// When there is a limit on the number of mappers, each mapper may run
// multiple tasks if the total number of tasks is larger than the limit.
int maxMappers = Integer.parseInt(this.jobProps.getProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY));
if (workUnits.size() > maxMappers) {
int numTasksPerMapper =
workUnits.size() % maxMappers == 0 ? workUnits.size() / maxMappers : workUnits.size() / maxMappers + 1;
NLineInputFormat.setNumLinesPerSplit(this.job, numTasksPerMapper);
}
}
mrJobSetupTimer.stop();
return jobOutputPath;
}
示例7: randomizeManyInputFiles
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
* To uniformly spread load across all mappers we randomize fullInputList
* with a separate small Mapper & Reducer preprocessing step. This way
* each input line ends up on a random position in the output file list.
* Each mapper indexes a disjoint consecutive set of files such that each
* set has roughly the same size, at least from a probabilistic
* perspective.
*
* For example an input file with the following input list of URLs:
*
* A
* B
* C
* D
*
* might be randomized into the following output list of URLs:
*
* C
* A
* D
* B
*
* The implementation sorts the list of lines by randomly generated numbers.
*/
private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit)
throws IOException {
Job job2 = Job.getInstance(baseConfig);
job2.setJarByClass(getClass());
job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
job2.setInputFormatClass(NLineInputFormat.class);
NLineInputFormat.addInputPath(job2, fullInputList);
NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit);
job2.setMapperClass(LineRandomizerMapper.class);
job2.setReducerClass(LineRandomizerReducer.class);
job2.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job2, outputStep2Dir);
job2.setNumReduceTasks(1);
job2.setOutputKeyClass(LongWritable.class);
job2.setOutputValueClass(Text.class);
return job2;
}
示例8: setupJob
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
@Override
public void setupJob(Job job) {
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setMapperClass(ImputationMapperMinimac3.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setNumReduceTasks(0);
}