Java NLineInputFormat.setNumLinesPerSplit方法代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.NLineInputFormat.setNumLinesPerSplit方法的典型用法代码示例。如果您正苦于以下问题：Java NLineInputFormat.setNumLinesPerSplit方法的具体用法？Java NLineInputFormat.setNumLinesPerSplit怎么用？Java NLineInputFormat.setNumLinesPerSplit使用的例子？那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.NLineInputFormat的用法示例。

在下文中一共展示了NLineInputFormat.setNumLinesPerSplit方法的8个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setupJob

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
public static void setupJob(Job job, int minFeaturesPerSplit, long featureCount)
{
  if (minFeaturesPerSplit > 0)
  {
    if (featureCount < 0)
    {
      throw new IllegalArgumentException("Expected a feature count");
    }
    int maxMapTasks = job.getConfiguration().getInt("mapred.tasktracker.map.tasks.maximum", -1);
    if (maxMapTasks > 0)
    {
      int featuresPerSplit = (int) (featureCount / maxMapTasks);
      if (featuresPerSplit < minFeaturesPerSplit)
      {
        featuresPerSplit = minFeaturesPerSplit;
      }
      job.getConfiguration().setBoolean(USE_NLINE_FORMAT, true);
      NLineInputFormat.setNumLinesPerSplit(job, featuresPerSplit);
    }
  }
}

开发者ID:ngageoint，项目名称:mrgeo，代码行数:22，代码来源:DelimitedVectorInputFormat.java

示例2: setNumLinesPerSplit

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
 * Java wrapper for {@link NLineInputFormat#setNumLinesPerSplit(org.apache.hadoop.mapreduce.Job, int)}.
 *
 * @param ctx the JavaScript context
 * @param thisObj the 'this' object
 * @param args the function arguments
 * @param func the function called (unused)
 */
@JSStaticFunction
public static void setNumLinesPerSplit(final Context ctx, final Scriptable thisObj, final Object[] args,
                                       final Function func) {
    final Object arg0 = args.length >= 1 ? args[0] : Undefined.instance;
    final Object arg1 = args.length >= 2 ? args[1] : Undefined.instance;

    if (args.length < 2) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.TWO_ARGS_EXPECTED);
    } else if (!JavaScriptUtils.isDefined(arg0)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_REQUIRED);
    } else if (!JavaScriptUtils.isDefined(arg1)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_REQUIRED);
    } else if (!(arg0 instanceof JobWrap)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.FIRST_ARG_MUST_BE_JOB);
    } else if (!(arg1 instanceof Number)) {
        throw Utils.makeError(ctx, thisObj, LembosMessages.SECOND_ARG_ARG_MUST_BE_NUM);
    }

    NLineInputFormat.setNumLinesPerSplit(((JobWrap)arg0).getJob(), JavaScriptUtils.fromNumber(arg1).intValue());
}

开发者ID:apigee，项目名称:lembos，代码行数:29，代码来源:NLineInputFormatWrap.java

示例3: doMapReduce

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf, opts);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = new Job(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(NLineInputFormat.class);
  NLineInputFormat.setInputPaths(job, inputDir);
  // this is default, but be explicit about it just in case.
  NLineInputFormat.setNumLinesPerSplit(job, 1);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);

  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));

  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
    DescriptiveStatistics.class, // commons-math
    ObjectMapper.class);         // jackson-mapper-asl

  TableMapReduceUtil.initCredentials(job);

  job.waitForCompletion(true);
}

开发者ID:tenggyut，项目名称:HIndex，代码行数:36，代码来源:PerformanceEvaluation.java

示例4: doMapReduce

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
private void doMapReduce(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
      InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  Path inputDir = writeInputFile(conf, opts);
  conf.set(EvaluationMapTask.CMD_KEY, cmd.getName());
  conf.set(EvaluationMapTask.PE_KEY, getClass().getName());
  Job job = new Job(conf);
  job.setJarByClass(PerformanceEvaluation.class);
  job.setJobName("HBase Performance Evaluation");

  job.setInputFormatClass(NLineInputFormat.class);
  NLineInputFormat.setInputPaths(job, inputDir);
  // this is default, but be explicit about it just in case.
  NLineInputFormat.setNumLinesPerSplit(job, 1);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(LongWritable.class);

  job.setMapperClass(EvaluationMapTask.class);
  job.setReducerClass(LongSumReducer.class);

  job.setNumReduceTasks(1);

  job.setOutputFormatClass(TextOutputFormat.class);
  TextOutputFormat.setOutputPath(job, new Path(inputDir.getParent(), "outputs"));

  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
    Histogram.class,     // yammer metrics   
    ObjectMapper.class); // jackson-mapper-asl

  TableMapReduceUtil.initCredentials(job);

  job.waitForCompletion(true);
}

开发者ID:shenli-uiuc，项目名称:PyroDB，代码行数:36，代码来源:PerformanceEvaluation.java

示例5: run

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] <input_file_with_SPARQL_queries> <output_path> <table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
           HalyardExport.class,
           NTriplesUtil.class,
           Rio.class,
           AbstractRDFHandler.class,
           RDFFormat.class,
           RDFParser.class,
           HTable.class,
           HBaseConfiguration.class,
           AuthenticationProtos.class,
           Trace.class,
           Gauge.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setJarByClass(HalyardBulkUpdate.class);
    job.setMapperClass(SPARQLMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0)) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Update Completed..");
            return 0;
        }
    }
    return -1;
}

开发者ID:Merck，项目名称:Halyard，代码行数:45，代码来源:HalyardBulkUpdate.java

示例6: prepareHadoopJob

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
 * Prepare the Hadoop MR job, including configuring the job and setting up the input/output paths.
 */
private Path prepareHadoopJob(List<WorkUnit> workUnits) throws IOException {
  TimingEvent mrJobSetupTimer = this.eventSubmitter.getTimingEvent(TimingEventNames.RunJobTimings.MR_JOB_SETUP);

  this.job.setJarByClass(MRJobLauncher.class);
  this.job.setMapperClass(TaskRunner.class);

  // The job is mapper-only
  this.job.setNumReduceTasks(0);

  this.job.setInputFormatClass(NLineInputFormat.class);
  this.job.setOutputFormatClass(GobblinOutputFormat.class);
  this.job.setMapOutputKeyClass(NullWritable.class);
  this.job.setMapOutputValueClass(NullWritable.class);

  // Turn off speculative execution
  this.job.setSpeculativeExecution(false);

  // Job input path is where input work unit files are stored
  Path jobInputPath = new Path(this.mrJobDir, INPUT_DIR_NAME);

  // Prepare job input
  Path jobInputFile = prepareJobInput(jobInputPath, workUnits);
  NLineInputFormat.addInputPath(this.job, jobInputFile);

  // Job output path is where serialized task states are stored
  Path jobOutputPath = new Path(this.mrJobDir, OUTPUT_DIR_NAME);
  SequenceFileOutputFormat.setOutputPath(this.job, jobOutputPath);

  // Serialize source state to a file which will be picked up by the mappers
  Path jobStateFilePath = new Path(this.mrJobDir, JOB_STATE_FILE_NAME);
  SerializationUtils.serializeState(this.fs, jobStateFilePath, this.jobContext.getJobState());
  job.getConfiguration().set(ConfigurationKeys.JOB_STATE_FILE_PATH_KEY, jobStateFilePath.toString());

  if (this.jobProps.containsKey(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY)) {
    // When there is a limit on the number of mappers, each mapper may run
    // multiple tasks if the total number of tasks is larger than the limit.
    int maxMappers = Integer.parseInt(this.jobProps.getProperty(ConfigurationKeys.MR_JOB_MAX_MAPPERS_KEY));
    if (workUnits.size() > maxMappers) {
      int numTasksPerMapper =
          workUnits.size() % maxMappers == 0 ? workUnits.size() / maxMappers : workUnits.size() / maxMappers + 1;
      NLineInputFormat.setNumLinesPerSplit(this.job, numTasksPerMapper);
    }
  }

  mrJobSetupTimer.stop();

  return jobOutputPath;
}

开发者ID:Hanmourang，项目名称:Gobblin，代码行数:52，代码来源:MRJobLauncher.java

示例7: randomizeManyInputFiles

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
/**
 * To uniformly spread load across all mappers we randomize fullInputList
 * with a separate small Mapper & Reducer preprocessing step. This way
 * each input line ends up on a random position in the output file list.
 * Each mapper indexes a disjoint consecutive set of files such that each
 * set has roughly the same size, at least from a probabilistic
 * perspective.
 * 
 * For example an input file with the following input list of URLs:
 * 
 * A
 * B
 * C
 * D
 * 
 * might be randomized into the following output list of URLs:
 * 
 * C
 * A
 * D
 * B
 * 
 * The implementation sorts the list of lines by randomly generated numbers.
 */
private Job randomizeManyInputFiles(Configuration baseConfig, Path fullInputList, Path outputStep2Dir, int numLinesPerSplit) 
    throws IOException {
  
  Job job2 = Job.getInstance(baseConfig);
  job2.setJarByClass(getClass());
  job2.setJobName(getClass().getName() + "/" + Utils.getShortClassName(LineRandomizerMapper.class));
  job2.setInputFormatClass(NLineInputFormat.class);
  NLineInputFormat.addInputPath(job2, fullInputList);
  NLineInputFormat.setNumLinesPerSplit(job2, numLinesPerSplit);          
  job2.setMapperClass(LineRandomizerMapper.class);
  job2.setReducerClass(LineRandomizerReducer.class);
  job2.setOutputFormatClass(TextOutputFormat.class);
  FileOutputFormat.setOutputPath(job2, outputStep2Dir);
  job2.setNumReduceTasks(1);
  job2.setOutputKeyClass(LongWritable.class);
  job2.setOutputValueClass(Text.class);
  return job2;
}

开发者ID:europeana，项目名称:search，代码行数:43，代码来源:MapReduceIndexerTool.java

示例8: setupJob

import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat; //导入方法依赖的package包/类
@Override
public void setupJob(Job job) {

	NLineInputFormat.setNumLinesPerSplit(job, 1);

	job.setMapperClass(ImputationMapperMinimac3.class);
	job.setInputFormatClass(NLineInputFormat.class);

	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(Text.class);

	job.setOutputKeyClass(Text.class);
	job.setNumReduceTasks(0);

}

开发者ID:genepi，项目名称:imputationserver，代码行数:16，代码来源:ImputationJobMinimac3.java

注：本文中的org.apache.hadoop.mapreduce.lib.input.NLineInputFormat.setNumLinesPerSplit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。