本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputDirRecursive方法的典型用法代码示例。如果您正苦于以下问题:Java FileInputFormat.setInputDirRecursive方法的具体用法?Java FileInputFormat.setInputDirRecursive怎么用?Java FileInputFormat.setInputDirRecursive使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.mapreduce.lib.input.FileInputFormat
的用法示例。
在下文中一共展示了FileInputFormat.setInputDirRecursive方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <output_path> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
job.setJarByClass(HalyardBulkLoad.class);
job.setMapperClass(RDFMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(RioFileInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
HalyardTableUtils.truncateTable(hTable).close();
}
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
示例2: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D" + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY + "=true] <hive_table_name> <output_path> <hbase_table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
int i = args[0].indexOf('.');
HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
job.setJarByClass(HalyardHiveLoad.class);
job.setMapperClass(HiveMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(HCatInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
示例3: runHdfsCopyJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runHdfsCopyJob(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
LOG.info("Starting job for step 2...");
Job job = Job.getInstance(this.getConf(), "Stage 2: HDFS Copy Job");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Stage2DirectoryCopyMapper.class);
job.setReducerClass(Stage2DirectoryCopyReducer.class);
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.setNumReduceTasks(getConf().getInt(
ConfigurationKeys.BATCH_JOB_COPY_PARALLELISM,
150));
boolean success = job.waitForCompletion(true);
if (success) {
LOG.info("Job for step 2 finished successfully! To view logging data, run the following "
+ "commands in Hive: \n\n"
+ VelocityUtils.renderTemplate(STEP2_HQL_TEMPLATE, velocityContext)
+ "\n");
}
return success ? 0 : 1;
}
示例4: runCommitChangeJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runCommitChangeJob(Path input, Path output)
throws IOException, InterruptedException, ClassNotFoundException, TemplateRenderException {
LOG.info("Starting job for step 3...");
Job job = Job.getInstance(this.getConf(), "Stage3: Commit Change Job");
job.setJarByClass(this.getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Stage3CommitChangeMapper.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong(FileInputFormat.SPLIT_MAXSIZE, 60000L));
FileOutputFormat.setOutputPath(job, output);
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
job.setNumReduceTasks(getConf().getInt(
ConfigurationKeys.BATCH_JOB_METASTORE_PARALLELISM,
150));
boolean success = job.waitForCompletion(true);
if (success) {
LOG.info("Job for step 3 finished successfully! To view logging data, run the following "
+ "commands in Hive: \n\n"
+ VelocityUtils.renderTemplate(STEP3_HQL_TEMPLATE, velocityContext));
}
return success ? 0 : 1;
}
示例5: runSyncJob
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
private int runSyncJob(Path source, Path destination, Path tmpDir, Path input,
Path output)
throws IOException, InterruptedException, ClassNotFoundException {
Job job = new Job(getConf(), "HDFS Sync job");
job.setJarByClass(getClass());
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(HdfsSyncMapper.class);
job.setReducerClass(HdfsSyncReducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.getConfiguration().set(SRC_PATH_CONF, source.toString());
job.getConfiguration().set(DST_PATH_CONF, destination.toString());
job.getConfiguration().set(TMP_PATH_CONF, tmpDir.toString());
FileInputFormat.setInputPaths(job, input);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setMaxInputSplitSize(job,
this.getConf().getLong( FileInputFormat.SPLIT_MAXSIZE, 60000L));
FileOutputFormat.setOutputPath(job, new Path(output.toString()));
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例6: run
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: presplit [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + args[1]);
job.getConfiguration().set(TABLE_PROPERTY, args[1]);
job.setJarByClass(HalyardPreSplit.class);
job.setMapperClass(RDFDecimatingMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(LongWritable.class);
job.setInputFormatClass(RioFileInputFormat.class);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.setReducerClass(PreSplitReducer.class);
job.setNumReduceTasks(1);
job.setOutputFormatClass(NullOutputFormat.class);
if (job.waitForCompletion(true)) {
LOG.info("PreSplit Calculation Completed..");
return 0;
}
return -1;
}