本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2类的典型用法代码示例。如果您正苦于以下问题:Java HFileOutputFormat2类的具体用法?Java HFileOutputFormat2怎么用?Java HFileOutputFormat2使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
HFileOutputFormat2类属于org.apache.hadoop.hbase.mapreduce包,在下文中一共展示了HFileOutputFormat2类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setupOutput
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
protected void setupOutput(final Job job, final AddElementsFromHdfs operation, final HBaseStore store) throws IOException {
FileOutputFormat.setOutputPath(job, new Path(operation.getOutputPath()));
final String stagingDir = operation.getOption(HBaseStoreConstants.OPERATION_HDFS_STAGING_PATH);
if (null != stagingDir && !stagingDir.isEmpty()) {
job.getConfiguration().set(HConstants.TEMPORARY_FS_DIRECTORY_KEY, stagingDir);
}
try {
HFileOutputFormat2.configureIncrementalLoad(
job,
store.getTable(),
store.getConnection().getRegionLocator(store.getTableName())
);
} catch (final StoreException e) {
throw new RuntimeException(e);
}
}
示例2: doBulkLoadSinglePut
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
public void doBulkLoadSinglePut(boolean verbose, HTable table) throws IOException, ClassNotFoundException, InterruptedException, Exception {
ClassTools.preLoad(LoadIncrementalHFiles.class);
// setup the bulkload temp folder
HDFSPath bulkLoadPath = new HDFSPath(
getConfiguration(),
"/tmp/" + UUID.randomUUID().toString());
if (bulkLoadPath.existsDir()) {
bulkLoadPath.trash();
}
// setup the job
setMapOutputKeyClass(ImmutableBytesWritable.class);
setMapOutputValueClass(Put.class);
HFileOutputFormat2.configureIncrementalLoad(this, table);
HFileOutputFormat2.setOutputPath(this, bulkLoadPath);
if (waitForCompletion(verbose)) {
// Load generated HFiles into table
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
loader.doBulkLoad(bulkLoadPath, table);
} else {
log.info("loading failed.");
}
}
示例3: init
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public void init() throws IOException {
super.init();
Configuration taskConf = new Configuration();
Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString());
ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId();
writerContext = new TaskAttemptContextImpl(taskConf,
new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP,
taskAttemptId.getTaskId().getId(), taskAttemptId.getId()));
HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2();
try {
writer = hFileOutputFormat2.getRecordWriter(writerContext);
committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext);
workingFilePath = committer.getWorkPath();
} catch (InterruptedException e) {
throw new IOException(e.getMessage(), e);
}
LOG.info("Created hbase file writer: " + workingFilePath);
}
示例4: run
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: bulkload [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + SKIP_INVALID_PROPERTY + "=true] [-D" + SPLIT_BITS_PROPERTY + "=8] [-D" + DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + OVERRIDE_CONTEXT_PROPERTY + "=true] <input_path(s)> <output_path> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + args[1] + " -> " + args[2]);
job.setJarByClass(HalyardBulkLoad.class);
job.setMapperClass(RDFMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(RioFileInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
HalyardTableUtils.truncateTable(hTable).close();
}
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
示例5: run
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: hiveload -D" + RDF_MIME_TYPE_PROPERTY + "='application/ld+json' [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] [-D" + HIVE_DATA_COLUMN_INDEX_PROPERTY + "=3] [-D" + BASE_URI_PROPERTY + "='http://my_base_uri/'] [-D" + HalyardBulkLoad.SPLIT_BITS_PROPERTY + "=8] [-D" + HalyardBulkLoad.DEFAULT_CONTEXT_PROPERTY + "=http://new_context] [-D" + HalyardBulkLoad.OVERRIDE_CONTEXT_PROPERTY + "=true] <hive_table_name> <output_path> <hbase_table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardHiveLoad -> " + args[1] + " -> " + args[2]);
int i = args[0].indexOf('.');
HCatInputFormat.setInput(job, i > 0 ? args[0].substring(0, i) : null, args[0].substring(i + 1));
job.setJarByClass(HalyardHiveLoad.class);
job.setMapperClass(HiveMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(HCatInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], true, getConf().getInt(HalyardBulkLoad.SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
示例6: configureRunnableJobUsingBulkLoad
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
/**
* Submits the job and waits for completion.
* @param job job
* @param outputPath output path
* @throws Exception
*/
private void configureRunnableJobUsingBulkLoad(Job job, Path outputPath, TableName outputTableName,
boolean skipDependencyJars) throws Exception {
job.setMapperClass(getBulkMapperClass());
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
final Configuration configuration = job.getConfiguration();
try (Connection conn = ConnectionFactory.createConnection(configuration);
Admin admin = conn.getAdmin();
Table table = conn.getTable(outputTableName);
RegionLocator regionLocator = conn.getRegionLocator(outputTableName)) {
HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
if (skipDependencyJars) {
job.getConfiguration().unset("tmpjars");
}
boolean status = job.waitForCompletion(true);
if (!status) {
LOG.error("IndexTool job failed!");
throw new Exception("IndexTool job failed: " + job.toString());
}
LOG.info("Loading HFiles from {}", outputPath);
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
loader.doBulkLoad(outputPath, admin, table, regionLocator);
}
FileSystem.get(configuration).delete(outputPath, true);
}
示例7: createSubmittableJob
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
/**
* Sets up the actual job.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public Job createSubmittableJob(String[] args) throws IOException {
Configuration conf = getConf();
String inputDirs = args[0];
String tabName = args[1];
conf.setStrings(TABLES_KEY, tabName);
conf.set(FileInputFormat.INPUT_DIR, inputDirs);
Job job =
Job.getInstance(conf,
conf.get(JOB_NAME_CONF_KEY, NAME + "_" + EnvironmentEdgeManager.currentTime()));
job.setJarByClass(MapReduceHFileSplitterJob.class);
job.setInputFormatClass(HFileInputFormat.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
if (hfileOutPath != null) {
LOG.debug("add incremental job :" + hfileOutPath + " from " + inputDirs);
TableName tableName = TableName.valueOf(tabName);
job.setMapperClass(HFileCellMapper.class);
job.setReducerClass(CellSortReducer.class);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputValueClass(MapReduceExtendedCell.class);
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
HFileOutputFormat2.configureIncrementalLoad(job, table.getDescriptor(), regionLocator);
}
LOG.debug("success configuring load incremental job");
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
org.apache.hbase.thirdparty.com.google.common.base.Preconditions.class);
} else {
throw new IOException("No bulk output directory specified");
}
return job;
}
示例8: run
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] remainArgs = remainArgs(args, conf);
options = new LoadVCFToHBaseOptions();
options.parse(remainArgs);
options.setHadoopConf(remainArgs, conf);
conf.addResource(new Path(options.getConfig() + "hbase-site.xml"));
conf.addResource(new Path(options.getConfig() + "core-site.xml"));
conf.set("vcfHeader", options.getHeaderOutput());
Job job = new Job(conf);
createTable(conf,options.getTableName());
MultipleVCFHeader vcfHeaders = new MultipleVCFHeader();
vcfHeaders.mergeHeader(new Path(options.getInput()),options.getHeaderOutput(), job, false);
job.setJobName("vcf to hbase");
job.setNumReduceTasks(options.getReducerNumber());
job.setInputFormatClass(VCFMultipleInputFormat.class);
job.setJarByClass(LoadVCFToHBase.class);
job.setMapperClass(VCFToHBaseMapper.class);
job.setReducerClass(PutSortReducer.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
FileInputFormat.setInputPaths(job, new Path(options.getInput()));
FileOutputFormat.setOutputPath(job, new Path(options.getHFileOutput()));
HFileOutputFormat2.configureIncrementalLoad(job, new HTable(conf,options.getTableName()));
if (job.waitForCompletion(true)) {
LoadHFile2HBase(conf,options.getTableName(),options.getHFileOutput());
return 0;
} else {
return 1;
}
}
示例9: run
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] remainArgs = remainArgs(args, conf);
options = new DBNSFPToHbaseOptions();
options.parse(remainArgs);
options.setHadoopConf(remainArgs, conf);
tableName = TableName.valueOf(options.getTableName());
conf.set("DEFAULT_COLUMN_FAMILY", "data");
conf.addResource(new Path(options.getConfig() + "hbase-site.xml"));
conf.addResource(new Path(options.getConfig() + "core-site.xml"));
conn = ConnectionFactory.createConnection(conf);
setHeader(new Path(options.getInput()), conf);
long reduceThreshMem = (long) (1 << 28);
conf.setLong("putsortreducer.row.threshold", reduceThreshMem);
Job job = Job.getInstance(conf, "dbNSFPtoHbase");
createTable(tableName);
job.setJarByClass(org.bgi.flexlab.gaea.tools.mapreduce.annotator.databaseload.DBNSFPToHbase.class);
job.setMapperClass(DBNSFPToHbaseMapper.class);
job.setReducerClass(PutSortReducer.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(Put.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Put.class);
FileInputFormat.setInputPaths(job, new Path(options.getInput()));
FileOutputFormat.setOutputPath(job, new Path(options.getHFileOutput()));
// HFileOutputFormat2.configureIncrementalLoad(job, new HTable(conf,options.getTableName()));
HFileOutputFormat2.configureIncrementalLoad(job, conn.getTable(tableName), conn.getRegionLocator(tableName));
if (job.waitForCompletion(true)) {
LoadHFile2HBase(conf, tableName, options.getHFileOutput());
conn.close();
return 0;
} else {
conn.close();
return 1;
}
}
示例10: run
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println("Usage: bulkupdate [-D" + MRJobConfig.QUEUE_NAME + "=proofofconcepts] <input_file_with_SPARQL_queries> <output_path> <table_name>");
return -1;
}
TableMapReduceUtil.addDependencyJars(getConf(),
HalyardExport.class,
NTriplesUtil.class,
Rio.class,
AbstractRDFHandler.class,
RDFFormat.class,
RDFParser.class,
HTable.class,
HBaseConfiguration.class,
AuthenticationProtos.class,
Trace.class,
Gauge.class);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setStrings(TABLE_NAME_PROPERTY, args[2]);
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
Job job = Job.getInstance(getConf(), "HalyardBulkUpdate -> " + args[1] + " -> " + args[2]);
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setJarByClass(HalyardBulkUpdate.class);
job.setMapperClass(SPARQLMapper.class);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
job.setInputFormatClass(NLineInputFormat.class);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), args[2], false, 0)) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(args[1]), hTable);
LOG.info("Bulk Update Completed..");
return 0;
}
}
return -1;
}
示例11: shouldSetupJob
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
@Test
public void shouldSetupJob() throws IOException, StoreException {
// Given
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
final HBaseAddElementsFromHdfsJobFactory factory = new HBaseAddElementsFromHdfsJobFactory();
final Job job = mock(Job.class);
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
.addInputMapperPair(new Path(inputDir).toString(), TextMapperGeneratorImpl.class.getName())
.outputPath(outputDir)
.failurePath(failureDir)
.jobInitialiser(new TextJobInitialiser())
.option(HBaseStoreConstants.OPERATION_HDFS_STAGING_PATH, stagingDir)
.build();
final HBaseStore store = new SingleUseMiniHBaseStore();
final Schema schema = Schema.fromJson(StreamUtil.schemas(getClass()));
final HBaseProperties properties = HBaseProperties.loadStoreProperties(StreamUtil.storeProps(getClass()));
store.initialise("graphId", schema, properties);
given(job.getConfiguration()).willReturn(localConf);
// When
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
verify(job).setJarByClass(factory.getClass());
verify(job).setJobName("Ingest HDFS data: Generator=" + TextMapperGeneratorImpl.class.getName() + ", output=" + outputDir);
verify(job).setMapperClass(AddElementsFromHdfsMapper.class);
verify(job).setMapOutputKeyClass(ImmutableBytesWritable.class);
verify(job).setMapOutputValueClass(KeyValue.class);
verify(job).setReducerClass(AddElementsFromHdfsReducer.class);
verify(job).setOutputKeyClass(ImmutableBytesWritable.class);
verify(job).setOutputValueClass(KeyValue.class);
verify(job).setOutputFormatClass(HFileOutputFormat2.class);
assertEquals(fs.makeQualified(new Path(outputDir)).toString(), job.getConfiguration().get("mapreduce.output.fileoutputformat.outputdir"));
verify(job).setNumReduceTasks(1);
}
示例12: createSubmittableJob
import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; //导入依赖的package包/类
/**
* Sets up the actual job.
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args)
throws IOException {
TableName tableName = TableName.valueOf(args[0]);
conf.set(TABLE_NAME, tableName.getNameAsString());
Path inputDir = new Path(args[1]);
Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName));
job.setJarByClass(Importer.class);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClass(SequenceFileInputFormat.class);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
// make sure we get the filter in the jars
try {
Class<? extends Filter> filter = conf.getClass(FILTER_CLASS_CONF_KEY, null, Filter.class);
if (filter != null) {
TableMapReduceUtil.addDependencyJars(conf, filter);
}
} catch (Exception e) {
throw new IOException(e);
}
if (hfileOutPath != null) {
job.setMapperClass(KeyValueImporter.class);
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)){
job.setReducerClass(KeyValueSortReducer.class);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputKeyClass(ImmutableBytesWritable.class);
job.setMapOutputValueClass(KeyValue.class);
HFileOutputFormat2.configureIncrementalLoad(job, table, regionLocator);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
com.google.common.base.Preconditions.class);
}
} else {
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
job.setMapperClass(Importer.class);
TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
job.setNumReduceTasks(0);
}
return job;
}