Java Job.setInputFormatClass方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.Job.setInputFormatClass方法的典型用法代碼示例。如果您正苦於以下問題：Java Job.setInputFormatClass方法的具體用法？Java Job.setInputFormatClass怎麽用？Java Job.setInputFormatClass使用的例子？那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.hadoop.mapreduce.Job的用法示例。

在下文中一共展示了Job.setInputFormatClass方法的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: configureJob

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  String tableName = args[0];
  String columnFamily = args[1];
  System.out.println("****" + tableName);
  conf.set(TableInputFormat.SCAN, TableMapReduceUtil.convertScanToString(new Scan()));
  conf.set(TableInputFormat.INPUT_TABLE, tableName);
  conf.set("index.tablename", tableName);
  conf.set("index.familyname", columnFamily);
  String[] fields = new String[args.length - 2];
  System.arraycopy(args, 2, fields, 0, fields.length);
  conf.setStrings("index.fields", fields);
  Job job = new Job(conf, tableName);
  job.setJarByClass(IndexBuilder.class);
  job.setMapperClass(Map.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(TableInputFormat.class);
  job.setOutputFormatClass(MultiTableOutputFormat.class);
  return job;
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:24，代碼來源:IndexBuilder.java

示例2: doLoad

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
  Path outputDir = getTestDir(TEST_NAME, "load-output");
  LOG.info("Load output dir: " + outputDir);

  NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
  conf.set(TABLE_NAME_KEY, htd.getTableName().getNameAsString());

  Job job = Job.getInstance(conf);
  job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
  job.setJarByClass(this.getClass());
  setMapperClass(job);
  job.setInputFormatClass(NMapInputFormat.class);
  job.setNumReduceTasks(0);
  setJobScannerConf(job);
  FileOutputFormat.setOutputPath(job, outputDir);

  TableMapReduceUtil.addDependencyJars(job);

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);
  assertTrue(job.waitForCompletion(true));
  return job;
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:24，代碼來源:IntegrationTestLoadAndVerify.java

示例3: run

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * @param args the cli arguments
 */
public int run(String[] args) 
    throws IOException, InterruptedException, ClassNotFoundException {
  Job job = Job.getInstance(getConf());
  if (args.length != 2) {
    usage();
    return 2;
  }
  setNumberOfRows(job, parseHumanLong(args[0]));
  Path outputDir = new Path(args[1]);
  FileOutputFormat.setOutputPath(job, outputDir);
  job.setJobName("TeraGen");
  job.setJarByClass(TeraGen.class);
  job.setMapperClass(SortGenMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(RangeInputFormat.class);
  job.setOutputFormatClass(TeraOutputFormat.class);
  return job.waitForCompletion(true) ? 0 : 1;
}

開發者ID:naver，項目名稱:hadoop，代碼行數:24，代碼來源:TeraGen.java

示例4: jobRecommendFriends

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
private Job jobRecommendFriends(String inputPath, String outputPath) throws IOException, InterruptedException, ClassNotFoundException{     
    Job job1 = new Job();
    job1.setJarByClass(WordCount.class);
    job1.setOutputKeyClass(Text.class);
    job1.setOutputValueClass(Text.class);
   
    job1.setMapperClass(MapRecommendation.class);
    job1.setReducerClass(ReduceRecommendation.class);
   
    job1.setOutputFormatClass(TextOutputFormat.class);
    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    FileInputFormat.addInputPath(job1, new Path(inputPath));
    FileOutputFormat.setOutputPath(job1, new Path(outputPath));

    job1.waitForCompletion(true);

    return job1;
   
}

開發者ID:dhruvmalik007，項目名稱:Deep_learning_using_Java，代碼行數:21，代碼來源:Recommendation_program.java

示例5: main

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public static void main(String[] args) throws Exception {
	Configuration conf = new Configuration();
	conf.set("xmlinput.start", "<page>");
	conf.set("xmlinput.end", "</page>");
	
	Job job =Job.getInstance(conf);
	job.setJobName("TermFrequencyCount");
	job.setJarByClass(TF.class);
	
	job.setMapOutputKeyClass(Text.class);
	job.setMapOutputValueClass(IntArrayWritable.class);
	
	job.setOutputKeyClass(Text.class);
	job.setOutputValueClass(DoubleWritable.class);
	
	job.setMapperClass(TFMap.class);
	job.setReducerClass(TFReduce.class);
	
	job.setInputFormatClass(XmlInputFormat.class);
	job.setOutputFormatClass(TextOutputFormat.class);
	
	FileInputFormat.addInputPath(job, new Path(args[0]));
	FileOutputFormat.setOutputPath(job, new Path(args[1]));
	boolean wait = job.waitForCompletion(true);
	System.exit(wait ? 0 : 1);
}

開發者ID:lzmhhh123，項目名稱:Wikipedia-Index，代碼行數:27，代碼來源:TF.java

示例6: configureJob

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String [] args)
throws IOException {
  Path inputPath = new Path(args[0]);
  String tableName = args[1];
  Job job = new Job(conf, NAME + "_" + tableName);
  job.setJarByClass(Uploader.class);
  FileInputFormat.setInputPaths(job, inputPath);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setMapperClass(Uploader.class);
  // No reducers.  Just write straight to table.  Call initTableReducerJob
  // because it sets up the TableOutputFormat.
  TableMapReduceUtil.initTableReducerJob(tableName, null, job);
  job.setNumReduceTasks(0);
  return job;
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:19，代碼來源:SampleUploader.java

示例7: testInputFormat

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
void testInputFormat(Class<? extends InputFormat> clazz)
    throws IOException, InterruptedException, ClassNotFoundException {
  final Job job = MapreduceTestingShim.createJob(UTIL.getConfiguration());
  job.setInputFormatClass(clazz);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapperClass(ExampleVerifier.class);
  job.setNumReduceTasks(0);

  LOG.debug("submitting job.");
  assertTrue("job failed!", job.waitForCompletion(true));
  assertEquals("Saw the wrong number of instances of the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "aaa").getValue());
  assertEquals("Saw any instances of the filtered out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":row", "bbb").getValue());
  assertEquals("Saw the wrong number of instances of columnA.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnA").getValue());
  assertEquals("Saw the wrong number of instances of columnB.", 1, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":family", "columnB").getValue());
  assertEquals("Saw the wrong count of values for the filtered-for row.", 2, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value aaa").getValue());
  assertEquals("Saw the wrong count of values for the filtered-out row.", 0, job.getCounters()
      .findCounter(TestTableInputFormat.class.getName() + ":value", "value bbb").getValue());
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:24，代碼來源:TestTableInputFormat.java

示例8: setInput

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/** Note that the "orderBy" column is called the "splitBy" in this version.
  * We reuse the same field, but it's not strictly ordering it
  * -- just partitioning the results.
  */
public static void setInput(Job job,
    Class<? extends DBWritable> inputClass,
    String tableName, String conditions,
    String splitBy, String... fieldNames) {
  DBInputFormat.setInput(job, inputClass, tableName, conditions,
      splitBy, fieldNames);
  job.setInputFormatClass(DataDrivenDBInputFormat.class);
}

開發者ID:aliyun，項目名稱:aliyun-maxcompute-data-collectors，代碼行數:13，代碼來源:DataDrivenDBInputFormat.java

示例9: testScanFromConfiguration

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Tests an MR Scan initialized from properties set in the Configuration.
 * 
 * @throws IOException
 * @throws ClassNotFoundException
 * @throws InterruptedException
 */
protected void testScanFromConfiguration(String start, String stop, String last)
throws IOException, InterruptedException, ClassNotFoundException {
  String jobName = "ScanFromConfig" + (start != null ? start.toUpperCase() : "Empty") +
    "To" + (stop != null ? stop.toUpperCase() : "Empty");
  Configuration c = new Configuration(TEST_UTIL.getConfiguration());
  c.set(TableInputFormat.INPUT_TABLE, Bytes.toString(TABLE_NAME));
  c.set(TableInputFormat.SCAN_COLUMN_FAMILY, Bytes.toString(INPUT_FAMILY));
  c.set(KEY_STARTROW, start != null ? start : "");
  c.set(KEY_LASTROW, last != null ? last : "");

  if (start != null) {
    c.set(TableInputFormat.SCAN_ROW_START, start);
  }

  if (stop != null) {
    c.set(TableInputFormat.SCAN_ROW_STOP, stop);
  }

  Job job = new Job(c, jobName);
  job.setMapperClass(ScanMapper.class);
  job.setReducerClass(ScanReducer.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(ImmutableBytesWritable.class);
  job.setInputFormatClass(TableInputFormat.class);
  job.setNumReduceTasks(1);
  FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
  TableMapReduceUtil.addDependencyJars(job);
  assertTrue(job.waitForCompletion(true));
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:37，代碼來源:TestTableInputFormatScanBase.java

示例10: runGenerator

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public int runGenerator(int numMappers, long numNodes, Path tmpOutput,
    Integer width, Integer wrapMuplitplier) throws Exception {
  LOG.info("Running Generator with numMappers=" + numMappers +", numNodes=" + numNodes);
  createSchema();
  Job job = Job.getInstance(getConf());

  job.setJobName("Link Generator");
  job.setNumReduceTasks(0);
  job.setJarByClass(getClass());

  FileInputFormat.setInputPaths(job, tmpOutput);
  job.setInputFormatClass(OneFilePerMapperSFIF.class);
  job.setOutputKeyClass(NullWritable.class);
  job.setOutputValueClass(NullWritable.class);

  setJobConf(job, numMappers, numNodes, width, wrapMuplitplier);

  setMapperForGenerator(job);

  job.setOutputFormatClass(NullOutputFormat.class);

  job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
  TableMapReduceUtil.addDependencyJars(job);
  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
  TableMapReduceUtil.initCredentials(job);

  boolean success = jobCompletion(job);

  return success ? 0 : 1;
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:31，代碼來源:IntegrationTestBigLinkedList.java

示例11: main

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage: Month Traffic Statistics <input path> <output path>");
        System.exit(-1);
    }
    String nginxLogInput = args[0];
    String nginxLogOutput = args[1];

    Configuration configuration = new Configuration();
    try {
        Job job = Job.getInstance(configuration);
        job.setJobName("MonthTrafficStatistics");

        job.setJarByClass(MonthTrafficStatisticsMapReduce.class);

        FileInputFormat.addInputPath(job, new Path(nginxLogInput));
        FileOutputFormat.setOutputPath(job, new Path(nginxLogOutput));

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(MonthTrafficStatisticsMapper.class);
        job.setReducerClass(MonthTrafficStatisticsReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }
}

開發者ID:mumuhadoop，項目名稱:mumu-mapreduce，代碼行數:35，代碼來源:MonthTrafficStatisticsMapReduce.java

示例12: main

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage: Year Traffic Statistics <input path> <output path>");
        System.exit(-1);
    }
    String nginxLogInput = args[0];
    String nginxLogOutput = args[1];

    Configuration configuration = new Configuration();
    try {
        Job job = Job.getInstance(configuration);
        job.setJobName("YearTrafficStatistics");

        job.setJarByClass(YearTrafficStatisticsMapReduce.class);

        FileInputFormat.addInputPath(job, new Path(nginxLogInput));
        FileOutputFormat.setOutputPath(job, new Path(nginxLogOutput));

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        job.setMapperClass(YearTrafficStatisticsMapper.class);
        job.setReducerClass(YearTrafficStatisticsReducer.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        e.printStackTrace();
    }
}

開發者ID:mumuhadoop，項目名稱:mumu-mapreduce，代碼行數:35，代碼來源:YearTrafficStatisticsMapReduce.java

示例13: loadHCatTable

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
public List<HCatRecord> loadHCatTable(String dbName,
  String tableName, Map<String, String> partKeyMap,
  HCatSchema tblSchema, List<HCatRecord> records)
  throws Exception {

  Job job = new Job(conf, "HCat load job");

  job.setJarByClass(this.getClass());
  job.setMapperClass(HCatWriterMapper.class);


  // Just writ 10 lines to the file to drive the mapper
  Path path = new Path(fs.getWorkingDirectory(),
    "mapreduce/HCatTableIndexInput");

  job.getConfiguration()
    .setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
  int writeCount = records.size();
  recsToLoad.clear();
  recsToLoad.addAll(records);
  createInputFile(path, writeCount);
  // input/output settings
  HCatWriterMapper.setWrittenRecordCount(0);

  FileInputFormat.setInputPaths(job, path);
  job.setInputFormatClass(TextInputFormat.class);
  job.setOutputFormatClass(HCatOutputFormat.class);
  OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName,
    partKeyMap);

  HCatOutputFormat.setOutput(job, outputJobInfo);
  HCatOutputFormat.setSchema(job, tblSchema);
  job.setMapOutputKeyClass(BytesWritable.class);
  job.setMapOutputValueClass(DefaultHCatRecord.class);

  job.setNumReduceTasks(0);
  SqoopHCatUtilities.addJars(job, new SqoopOptions());
  boolean success = job.waitForCompletion(true);

  if (!success) {
    throw new IOException("Loading HCatalog table with test records failed");
  }
  utils.invokeOutputCommitterForLocalMode(job);
  LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
  return recsToLoad;
}

開發者ID:aliyun，項目名稱:aliyun-maxcompute-data-collectors，代碼行數:47，代碼來源:HCatalogTestUtils.java

示例14: runCopyJob

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Run Map-Reduce Job to perform the files copy.
 */
private void runCopyJob(final Path inputRoot, final Path outputRoot,
    final String snapshotName, final Path snapshotDir, final boolean verifyChecksum,
    final String filesUser, final String filesGroup, final int filesMode,
    final int mappers, final int bandwidthMB)
        throws IOException, InterruptedException, ClassNotFoundException {
  Configuration conf = getConf();
  if (filesGroup != null) conf.set(CONF_FILES_GROUP, filesGroup);
  if (filesUser != null) conf.set(CONF_FILES_USER, filesUser);
  if (mappers > 0) {
    conf.setInt(CONF_NUM_SPLITS, mappers);
    conf.setInt(MR_NUM_MAPS, mappers);
  }
  conf.setInt(CONF_FILES_MODE, filesMode);
  conf.setBoolean(CONF_CHECKSUM_VERIFY, verifyChecksum);
  conf.set(CONF_OUTPUT_ROOT, outputRoot.toString());
  conf.set(CONF_INPUT_ROOT, inputRoot.toString());
  conf.setInt(CONF_BANDWIDTH_MB, bandwidthMB);
  conf.set(CONF_SNAPSHOT_NAME, snapshotName);
  conf.set(CONF_SNAPSHOT_DIR, snapshotDir.toString());

  Job job = new Job(conf);
  job.setJobName("ExportSnapshot-" + snapshotName);
  job.setJarByClass(ExportSnapshot.class);
  TableMapReduceUtil.addDependencyJars(job);
  job.setMapperClass(ExportMapper.class);
  job.setInputFormatClass(ExportSnapshotInputFormat.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  job.setMapSpeculativeExecution(false);
  job.setNumReduceTasks(0);

  // Acquire the delegation Tokens
  Configuration srcConf = HBaseConfiguration.createClusterConf(conf, null, CONF_SOURCE_PREFIX);
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
    new Path[] { inputRoot }, srcConf);
  Configuration destConf = HBaseConfiguration.createClusterConf(conf, null, CONF_DEST_PREFIX);
  TokenCache.obtainTokensForNamenodes(job.getCredentials(),
      new Path[] { outputRoot }, destConf);

  // Run the MR Job
  if (!job.waitForCompletion(true)) {
    // TODO: Replace the fixed string with job.getStatus().getFailureInfo()
    // when it will be available on all the supported versions.
    throw new ExportSnapshotException("Copy Files Map-Reduce Job failed");
  }
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:49，代碼來源:ExportSnapshot.java

示例15: createSubmittableJob

import org.apache.hadoop.mapreduce.Job; //導入方法依賴的package包/類
/**
 * Sets up the actual job.
 *
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(String[] args)
throws IOException {
  Configuration conf = getConf();
  setupTime(conf, HLogInputFormat.START_TIME_KEY);
  setupTime(conf, HLogInputFormat.END_TIME_KEY);
  Path inputDir = new Path(args[0]);
  String[] tables = args[1].split(",");
  String[] tableMap;
  if (args.length > 2) {
    tableMap = args[2].split(",");
    if (tableMap.length != tables.length) {
      throw new IOException("The same number of tables and mapping must be provided.");
    }
  } else {
    // if not mapping is specified map each table to itself
    tableMap = tables;
  }
  conf.setStrings(TABLES_KEY, tables);
  conf.setStrings(TABLE_MAP_KEY, tableMap);
  Job job = new Job(conf, NAME + "_" + inputDir);
  job.setJarByClass(WALPlayer.class);
  FileInputFormat.setInputPaths(job, inputDir);
  job.setInputFormatClass(WALInputFormat.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
  if (hfileOutPath != null) {
    // the bulk HFile case
    if (tables.length != 1) {
      throw new IOException("Exactly one table must be specified for the bulk export option");
    }
    TableName tableName = TableName.valueOf(tables[0]);
    job.setMapperClass(WALKeyValueMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    Path outputDir = new Path(hfileOutPath);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setMapOutputValueClass(KeyValue.class);
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Table table = conn.getTable(tableName);
        RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
      HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
    }
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
        com.google.common.base.Preconditions.class);
  } else {
    // output to live cluster
    job.setMapperClass(WALMapper.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    // No reducers.
    job.setNumReduceTasks(0);
  }
  return job;
}

開發者ID:fengchen8086，項目名稱:ditb，代碼行數:62，代碼來源:WALPlayer.java

注：本文中的org.apache.hadoop.mapreduce.Job.setInputFormatClass方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。