當前位置: 首頁>>代碼示例>>Java>>正文


Java FileInputFormat.setInputDirRecursive方法代碼示例

本文整理匯總了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputDirRecursive方法的典型用法代碼示例。如果您正苦於以下問題:Java FileInputFormat.setInputDirRecursive方法的具體用法?Java FileInputFormat.setInputDirRecursive怎麽用?Java FileInputFormat.setInputDirRecursive使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.hadoop.mapreduce.lib.input.FileInputFormat的用法示例。


在下文中一共展示了FileInputFormat.setInputDirRecursive方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <output_path> <table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
    job.setJarByClass(HalyardBulkLoad.class);
    job.setMapperClass(RDFMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}
 
開發者ID:Merck,項目名稱:Halyard,代碼行數:41,代碼來源:HalyardBulkLoad.java

示例2: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D" + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY + "=true] <hive_table_name> <output_path> <hbase_table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
    int i = args[0].indexOf('.');
    HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
    job.setJarByClass(HalyardHiveLoad.class);
    job.setMapperClass(HiveMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);
    job.setInputFormatClass(HCatInputFormat.class);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, args[0]);
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}
 
開發者ID:Merck,項目名稱:Halyard,代碼行數:40,代碼來源:HalyardHiveLoad.java

示例3: runHdfsCopyJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runHdfsCopyJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 2...");

  Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");

  job.setJarByClass(this.getClass());
  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage2DirectoryCopyMapper.class);
  job.setReducerClass(Stage2DirectoryCopyReducer.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
        + "\n");
  }

  return success ? 0 : 1;
}
 
開發者ID:airbnb,項目名稱:reair,代碼行數:39,代碼來源:MetastoreReplicationJob.java

示例4: runCommitChangeJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runCommitChangeJob(Path input, Path output)
  throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {

  LOG.info("Starting job for step 3...");

  Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");

  job.setJarByClass(this.getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(Stage3CommitChangeMapper.class);
  job.setNumReduceTasks(0);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
      this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));

  FileOutputFormat.setOutputPath(job, output);
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  job.setNumReduceTasks(getConf().getInt(
      ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
      150));

  boolean success = job.waitForCompletion(true);

  if (success) {
    LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
        + "commands in Hive: \n\n"
        + VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
  }
  return success ? 0 : 1;
}
 
開發者ID:airbnb,項目名稱:reair,代碼行數:37,代碼來源:MetastoreReplicationJob.java

示例5: runSyncJob

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
                       Path output)
    throws IOException, InterruptedException, ClassNotFoundException {
  Job job = new Job(getConf(), "HDFS Sync job");
  job.setJarByClass(getClass());

  job.setInputFormatClass(TextInputFormat.class);
  job.setMapperClass(HdfsSyncMapper.class);
  job.setReducerClass(HdfsSyncReducer.class);

  job.setOutputKeyClass(LongWritable.class);
  job.setOutputValueClass(Text.class);

  job.getConfiguration().set(SRC_PATH_CONF, source.toString());
  job.getConfiguration().set(DST_PATH_CONF, destination.toString());
  job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());

  FileInputFormat.setInputPaths(job, input);
  FileInputFormat.setInputDirRecursive(job, true);
  FileInputFormat.setMaxInputSplitSize(job,
          this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
  FileOutputFormat.setOutputPath(job, new Path(output.toString()));
  FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);

  boolean success = job.waitForCompletion(true);

  return success ? 0 : 1;
}
 
開發者ID:airbnb,項目名稱:reair,代碼行數:29,代碼來源:ReplicationJob.java

示例6: run

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //導入方法依賴的package包/類
@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: presplit [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <table_name>");
        return -1;
    }
    TableMapReduceUtil.addDependencyJars(getConf(),
            NTriplesUtil.class,
            Rio.class,
            AbstractRDFHandler.class,
            RDFFormat.class,
            RDFParser.class);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + args[1]);
     job.getConfiguration().set(TABLE_PROPERTY, args[1]);
    job.setJarByClass(HalyardPreSplit.class);
    job.setMapperClass(RDFDecimatingMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setInputFormatClass(RioFileInputFormat.class);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, args[0]);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    job.setReducerClass(PreSplitReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputFormatClass(NullOutputFormat.class);
    if (job.waitForCompletion(true)) {
        LOG.info("PreSplit Calculation Completed..");
        return 0;
    }
    return -1;
}
 
開發者ID:Merck,項目名稱:Halyard,代碼行數:35,代碼來源:HalyardPreSplit.java


注:本文中的org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputDirRecursive方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。