本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.TableOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java TableOutputFormat类的具体用法?Java TableOutputFormat怎么用?Java TableOutputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TableOutputFormat类属于org.apache.hadoop.hbase.mapreduce包,在下文中一共展示了TableOutputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: process
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
throws ToolException {
TableOutput tableOutput = (TableOutput)annotation;
// Base setup of the table job
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
// Add dependencies
try {
TableMapReduceUtil.addDependencyJars(job);
} catch (IOException e) {
throw new ToolException(e);
}
// Set table output format
job.setOutputFormatClass(TableOutputFormat.class);
// Set the table name
String tableName = (String)this.evaluateExpression(tableOutput.value());
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, tableName);
}
示例2: main
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
// check debug flag and other options
if (cmd.hasOption("d")) conf.set("conf.debug", "true");
// get details
String table = cmd.getOptionValue("t");
String input = cmd.getOptionValue("i");
// create job and set classes etc.
Job job = Job.getInstance(conf, "Import from file " + input +
" into table " + table);
job.setJarByClass(ImportJsonFromFile.class);
job.setMapperClass(ImportMapper.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(input));
// run the job
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例3: setStoreLocation
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
@Override
public void setStoreLocation(String location, Job job) throws IOException {
if (location.startsWith("hbase://")){
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8));
}else{
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location);
}
String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema");
if (serializedSchema!= null) {
schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema);
}
initialiseHBaseClassLoaderResources(job);
m_conf = initializeLocalJobConfig(job);
// Not setting a udf property and getting the hbase delegation token
// only once like in setLocation as setStoreLocation gets different Job
// objects for each call and the last Job passed is the one that is
// launched. So we end up getting multiple hbase delegation tokens.
addHBaseDelegationToken(m_conf, job);
}
示例4: writeEdges
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Converts runtime edge data to persistent edge data (includes
* source/target vertex data) and writes it to HBase.
*
* @param collection Graph collection
* @throws IOException
*/
private void writeEdges(final GraphCollection collection) throws IOException {
DataSet<PersistentEdge<Vertex>> persistentEdgeDataSet = collection
.getVertices()
// join vertex with edges on edge source vertex id
.join(collection.getEdges())
.where(new Id<>())
.equalTo(new SourceId<>())
// join result with vertices on edge target vertex id
.join(collection.getVertices())
.where("f1.targetId")
.equalTo(new Id<>())
// ((source-vertex-data, edge-data), target-vertex-data)
.with(new BuildPersistentEdge<>(getHBaseConfig().getPersistentEdgeFactory()));
// write (persistent-edge-data) to HBase table
Job job = Job.getInstance();
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, getHBaseConfig().getEdgeTableName());
persistentEdgeDataSet
.map(new BuildEdgeMutation<>(getHBaseConfig().getEdgeHandler()))
.output(new HadoopOutputFormat<>(new TableOutputFormat<>(), job));
}
示例5: setStoreLocation
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
@Override
public void setStoreLocation(String location, Job job) throws IOException {
if (location.startsWith("hbase://")){
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location.substring(8));
}else{
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, location);
}
String serializedSchema = getUDFProperties().getProperty(contextSignature + "_schema");
if (serializedSchema!= null) {
schema_ = (ResourceSchema) ObjectSerializer.deserialize(serializedSchema);
}
m_conf = initializeLocalJobConfig(job);
// Not setting a udf property and getting the hbase delegation token
// only once like in setLocation as setStoreLocation gets different Job
// objects for each call and the last Job passed is the one that is
// launched. So we end up getting multiple hbase delegation tokens.
addHBaseDelegationToken(m_conf, job);
}
示例6: run
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
public void run() throws Exception{
long startTime = System.currentTimeMillis();
Configuration conf = new Configuration();
conf.set(TableOutputFormat.OUTPUT_TABLE, Constants.hbase_user_item_pref_table);
Job job = Job.getInstance(conf, "hbasewriter"+System.currentTimeMillis());
job.setJarByClass(UpdateCFJob.class);
job.setMapperClass(TokenizerMapper.class);
job.setReducerClass(HBaseWriteReducer.class);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(Text.class);
job.setOutputFormatClass(TableOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(input));
long endTime = System.currentTimeMillis();
boolean isFinish = job.waitForCompletion(true);
if(isFinish){
logger.info("UpdateCFJob job ["+job.getJobName()+"] run finish.it costs"+ (endTime - startTime) / 1000 +"s.");
} else {
logger.error("UpdateCFJob job ["+job.getJobName()+"] run failed.");
}
}
示例7: configureSubmittableJobUsingDirectApi
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
* waits for the job completion based on runForeground parameter.
*
* @param job job
* @param outputPath output path
* @param runForeground - if true, waits for job completion, else submits and returns
* immediately.
* @throws Exception
*/
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName,
boolean skipDependencyJars, boolean runForeground)
throws Exception {
job.setMapperClass(getDirectMapperClass());
job.setReducerClass(getDirectReducerClass());
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNameAsString());
//Set the Output classes
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
if (!skipDependencyJars) {
TableMapReduceUtil.addDependencyJars(job);
}
job.setNumReduceTasks(1);
if (!runForeground) {
LOG.info("Running Index Build in Background - Submit async and exit");
job.submit();
return;
}
LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
boolean result = job.waitForCompletion(true);
if (!result) {
LOG.error("IndexTool job failed!");
throw new Exception("IndexTool job failed: " + job.toString());
}
FileSystem.get(conf).delete(outputPath, true);
}
示例8: main
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Main entry point.
*
* @param args
* The command line parameters.
* @throws Exception
* When running the job fails.
*/
public static void main(String[] args) throws Exception {
//Configuration conf = HBaseConfiguration.create();
Configuration conf = AppConfig.getConfiguration();
String table = "jsontable";
String input = "/user/lhfei/test-data.txt";
String column = "data:json";
//args = new String[]{"-tjsontable -isrc/test/resources/test-data.txt -cdata:json"};
if(null != args){
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
// check debug flag and other options
if (cmd.hasOption("d"))
conf.set("conf.debug", "true");
// get details
table = cmd.getOptionValue("t");
input = cmd.getOptionValue("i");
column = cmd.getOptionValue("c");
}
conf.set("conf.column", column);
Job job = Job.getInstance(conf, "Import from file " + input + " into table " + table);
job.setJarByClass(ImportFromFile.class);
job.setMapperClass(ImportMapper.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(input));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例9: main
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
String[] otherArgs =
new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
// check debug flag and other options
if (cmd.hasOption("d")) conf.set("conf.debug", "true");
// get details
String table = cmd.getOptionValue("t");
String input = cmd.getOptionValue("i");
String column = cmd.getOptionValue("c");
conf.set("conf.column", column);
// vv ImportFromFile2
Job job = Job.getInstance(conf, "Import from file " + input +
" into table " + table);
job.setJarByClass(ImportFromFile2.class);
job.setMapperClass(ImportMapper.class);
job.setOutputFormatClass(TableOutputFormat.class);
job.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, table);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, new Path(input));
/*[*/TableMapReduceUtil.addDependencyJars(job);/*]*/ // co ImportFromFile2-1-AddDeps Add dependencies to the configuration.
// ^^ ImportFromFile2
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
示例10: initTableReducerJob
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
public static void initTableReducerJob(String table, Class<? extends TableReducer> reducer,
Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl,
boolean addDependencyJars, Class<? extends OutputFormat> outputFormatClass) throws IOException {
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
job.setOutputFormatClass(outputFormatClass);
if (reducer != null) job.setReducerClass(reducer);
conf.set(TableOutputFormat.OUTPUT_TABLE, table);
// If passed a quorum/ensemble address, pass it on to TableOutputFormat.
if (quorumAddress != null) {
// Calling this will validate the format
ZKUtil.transformClusterKey(quorumAddress);
conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress);
}
if (serverClass != null && serverImpl != null) {
conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
}
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
if (partitioner == HRegionPartitioner.class) {
job.setPartitionerClass(HRegionPartitioner.class);
HTable outputTable = new HTable(conf, table);
int regions = outputTable.getRegionsInfo().size();
if (job.getNumReduceTasks() > regions) {
job.setNumReduceTasks(outputTable.getRegionsInfo().size());
}
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
if (addDependencyJars) {
addDependencyJars(job);
}
TableMapReduceUtil.initCredentials(job);
}
示例11: initTableReducerJob
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Use this before submitting a TableReduce job. It will
* appropriately set up the JobConf.
*
* @param table The output table.
* @param reducer The reducer class to use.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param partitioner Partitioner to use. Pass <code>null</code> to use
* default partitioner.
* @param quorumAddress Distant cluster to write to; default is null for
* output to the cluster that is designated in <code>hbase-site.xml</code>.
* Set this String to the zookeeper ensemble of an alternate remote cluster
* when you would have the reduce write a cluster that is other than the
* default; e.g. copying tables between clusters, the source would be
* designated by <code>hbase-site.xml</code> and this param would have the
* ensemble address of the remote cluster. The format to pass is particular.
* Pass <code> <hbase.zookeeper.quorum>:<hbase.zookeeper.client.port>:<zookeeper.znode.parent>
* </code> such as <code>server,server2,server3:2181:/hbase</code>.
* @param serverClass redefined hbase.regionserver.class
* @param serverImpl redefined hbase.regionserver.impl
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
* @throws IOException When determining the region count fails.
*/
public static void initTableReducerJob(String table,
Class<? extends TableReducer> reducer, Job job,
Class partitioner, String quorumAddress, String serverClass,
String serverImpl, boolean addDependencyJars) throws IOException {
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
job.setOutputFormatClass(TableOutputFormat.class);
if (reducer != null) job.setReducerClass(reducer);
conf.set(TableOutputFormat.OUTPUT_TABLE, table);
// If passed a quorum/ensemble address, pass it on to TableOutputFormat.
if (quorumAddress != null) {
// Calling this will validate the format
ZKUtil.transformClusterKey(quorumAddress);
conf.set(TableOutputFormat.QUORUM_ADDRESS,quorumAddress);
}
if (serverClass != null && serverImpl != null) {
conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);
}
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Writable.class);
if (partitioner == HRegionPartitioner.class) {
job.setPartitionerClass(HRegionPartitioner.class);
HTable outputTable = new HTable(conf, table);
int regions = outputTable.getRegionsInfo().size();
if (job.getNumReduceTasks() > regions) {
job.setNumReduceTasks(outputTable.getRegionsInfo().size());
}
} else if (partitioner != null) {
job.setPartitionerClass(partitioner);
}
if (addDependencyJars) {
addDependencyJars(job);
}
initCredentials(job);
}
示例12: getOutputFormat
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
@Override
public OutputFormat getOutputFormat() throws IOException {
if (outputFormat == null) {
if (m_conf == null) {
throw new IllegalStateException("setStoreLocation has not been called");
} else {
this.outputFormat = new TableOutputFormat();
this.outputFormat.setConf(m_conf);
}
}
return outputFormat;
}
示例13: writeGraphHeads
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Converts runtime graph data to persistent graph data (including vertex
* and edge identifiers) and writes it to HBase.
*
* @param collection Graph collection
* @throws IOException
*/
private void writeGraphHeads(final GraphCollection collection)
throws IOException {
// build (graph-id, vertex-id) tuples from vertices
DataSet<Tuple2<GradoopId, GradoopId>> graphIdToVertexId = collection.getVertices()
.flatMap(new PairGraphIdWithElementId<>());
// build (graph-id, edge-id) tuples from vertices
DataSet<Tuple2<GradoopId, GradoopId>> graphIdToEdgeId = collection.getEdges()
.flatMap(new PairGraphIdWithElementId<>());
// co-group (graph-id, vertex-id) and (graph-id, edge-id) tuples to
// (graph-id, {vertex-id}, {edge-id}) triples
DataSet<Tuple3<GradoopId, GradoopIdSet, GradoopIdSet>>
graphToVertexIdsAndEdgeIds = graphIdToVertexId
.coGroup(graphIdToEdgeId)
.where(0)
.equalTo(0)
.with(new BuildGraphTransactions());
// join (graph-id, {vertex-id}, {edge-id}) triples with
// (graph-id, graph-data) and build (persistent-graph-data)
DataSet<PersistentGraphHead> persistentGraphDataSet = graphToVertexIdsAndEdgeIds
.join(collection.getGraphHeads())
.where(0).equalTo(new Id<>())
.with(new BuildPersistentGraphHead<>(getHBaseConfig().getPersistentGraphHeadFactory()));
// write (persistent-graph-data) to HBase table
Job job = Job.getInstance();
job.getConfiguration().set(
TableOutputFormat.OUTPUT_TABLE, getHBaseConfig().getGraphTableName());
persistentGraphDataSet
// FIXME remove forced cast...
.map(new BuildGraphHeadMutation((GraphHeadHandler<PersistentGraphHead>)
((Object) getHBaseConfig().getGraphHeadHandler())))
.output(new HadoopOutputFormat<>(new TableOutputFormat<>(), job));
}
示例14: writeVertices
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
/**
* Converts runtime vertex data to persistent vertex data (includes
* incoming and outgoing edge data) and writes it to HBase.
*
* @param collection Graph collection
* @throws IOException
*/
private void writeVertices(final GraphCollection collection) throws IOException {
// group edges by source vertex id (vertex-id, [out-edge])
DataSet<Tuple2<GradoopId, Set<Edge>>> vertexToOutgoingEdges = collection.getEdges()
.groupBy(new SourceId<>())
.reduceGroup(new EdgeSetBySourceId<>());
// group edges by target vertex id (vertex-id, [in-edge])
DataSet<Tuple2<GradoopId, Set<Edge>>> vertexToIncomingEdges = collection.getEdges()
.groupBy(new TargetId<>())
.reduceGroup(new EdgeSetByTargetId<>());
// co-group (vertex-data) with (vertex-id, [out-edge])
DataSet<Tuple2<Vertex, Set<Edge>>> vertexDataWithOutgoingEdges = collection
.getVertices()
.coGroup(vertexToOutgoingEdges)
.where(new Id<Vertex>()).equalTo(0)
.with(new BuildVertexDataWithEdges<>());
// co-group
// (vertex, (vertex-id, [out-edge])) with (vertex-id, [in-edge])
DataSet<PersistentVertex<Edge>> persistentVertexDataSet = vertexDataWithOutgoingEdges
.coGroup(vertexToIncomingEdges)
.where("f0.id").equalTo(0)
.with(new BuildPersistentVertex<>(getHBaseConfig().getPersistentVertexFactory()));
// write (persistent-vertex-data) to HBase table
Job job = Job.getInstance();
job.getConfiguration()
.set(TableOutputFormat.OUTPUT_TABLE, getHBaseConfig().getVertexTableName());
persistentVertexDataSet
.map(new BuildVertexMutation<>(getHBaseConfig().getVertexHandler()))
.output(new HadoopOutputFormat<>(new TableOutputFormat<>(), job));
}
示例15: getOutputFormat
import org.apache.hadoop.hbase.mapreduce.TableOutputFormat; //导入依赖的package包/类
@Override
public OutputFormat getOutputFormat() throws IOException {
if (outputFormat == null) {
this.outputFormat = new TableOutputFormat();
m_conf = initialiseHBaseConfig(m_conf);
this.outputFormat.setConf(m_conf);
}
return outputFormat;
}