本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableSnapshotMapperJob方法的典型用法代码示例。如果您正苦于以下问题:Java TableMapReduceUtil.initTableSnapshotMapperJob方法的具体用法?Java TableMapReduceUtil.initTableSnapshotMapperJob怎么用?Java TableMapReduceUtil.initTableSnapshotMapperJob使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
的用法示例。
在下文中一共展示了TableMapReduceUtil.initTableSnapshotMapperJob方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
/**
* Main entry point.
*
* @param args The command line parameters.
* @throws Exception When running the job fails.
*/
public static void main(String[] args) throws Exception {
// vv AnalyzeSnapshotData
Configuration conf = HBaseConfiguration.create();
String[] otherArgs =
new GenericOptionsParser(conf, args).getRemainingArgs();
CommandLine cmd = parseArgs(otherArgs);
if (cmd.hasOption("d")) conf.set("conf.debug", "true");
String table = cmd.getOptionValue("t");
long time = System.currentTimeMillis();
String tmpName = "snapshot-" + table + "-" + time; // co AnalyzeSnapshotData-1-TmpName Compute a name for the snapshot and restore directory, if not specified otherwise.
String snapshot = cmd.getOptionValue("s", tmpName);
Path restoreDir = new Path(cmd.getOptionValue("b", "/tmp/" + tmpName));
String column = cmd.getOptionValue("c");
String output = cmd.getOptionValue("o");
boolean cleanup = Boolean.valueOf(cmd.getOptionValue("x"));
/*...*/
// ^^ AnalyzeSnapshotData
Scan scan = new Scan();
if (column != null) {
byte[][] colkey = KeyValue.parseColumn(Bytes.toBytes(column));
if (colkey.length > 1) {
scan.addColumn(colkey[0], colkey[1]);
} else {
scan.addFamily(colkey[0]);
}
}
// vv AnalyzeSnapshotData
Connection connection = ConnectionFactory.createConnection(conf);
Admin admin = connection.getAdmin();
LOG.info("Performing snapshot of table " + table + " as " + snapshot);
admin.snapshot(snapshot, TableName.valueOf(table)); // co AnalyzeSnapshotData-2-Snap Create a snapshot of the table.
LOG.info("Setting up job");
Job job = Job.getInstance(conf, "Analyze data in snapshot " + table);
job.setJarByClass(AnalyzeSnapshotData.class);
TableMapReduceUtil.initTableSnapshotMapperJob(snapshot, scan,
AnalyzeMapper.class, Text.class, IntWritable.class, job, true,
restoreDir); // co AnalyzeSnapshotData-2-Util Set up the snapshot mapper phase using the supplied utility.
TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
JSONParser.class);
job.setReducerClass(AnalyzeReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(job, new Path(output));
System.exit(job.waitForCompletion(true) ? 0 : 1);
if (cleanup) {
LOG.info("Cleaning up snapshot and restore directory");
admin.deleteSnapshot(snapshot); // co AnalyzeSnapshotData-3-Cleanup Optionally clean up after the job is complete.
restoreDir.getFileSystem(conf).delete(restoreDir, true);
}
admin.close();
connection.close();
// ^^ AnalyzeSnapshotData
}
示例2: testSnapshotScanMapReduce
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
Stopwatch scanOpenTimer = new Stopwatch();
Stopwatch scanTimer = new Stopwatch();
Scan scan = getScan();
String jobName = "testSnapshotScanMapReduce";
Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(getClass());
TableMapReduceUtil.initTableSnapshotMapperJob(
this.snapshotName,
scan,
MyMapper.class,
NullWritable.class,
NullWritable.class,
job,
true,
new Path(restoreDir)
);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
scanTimer.start();
job.waitForCompletion(true);
scanTimer.stop();
Counters counters = job.getCounters();
long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
double throughput = (double)totalBytes / scanTimer.elapsedTime(TimeUnit.SECONDS);
double throughputRows = (double)numRows / scanTimer.elapsedTime(TimeUnit.SECONDS);
double throughputCells = (double)numCells / scanTimer.elapsedTime(TimeUnit.SECONDS);
System.out.println("HBase scan mapreduce: ");
System.out.println("total time to open scanner: " + scanOpenTimer.elapsedMillis() + " ms");
System.out.println("total time to scan: " + scanTimer.elapsedMillis() + " ms");
System.out.println("total bytes: " + totalBytes + " bytes ("
+ StringUtils.humanReadableInt(totalBytes) + ")");
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
System.out.println("total rows : " + numRows);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
System.out.println("total cells : " + numCells);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
}
示例3: testSnapshotScanMapReduce
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
public void testSnapshotScanMapReduce() throws IOException, InterruptedException, ClassNotFoundException {
Stopwatch scanOpenTimer = Stopwatch.createUnstarted();
Stopwatch scanTimer = Stopwatch.createUnstarted();
Scan scan = getScan();
String jobName = "testSnapshotScanMapReduce";
Job job = new Job(conf);
job.setJobName(jobName);
job.setJarByClass(getClass());
TableMapReduceUtil.initTableSnapshotMapperJob(
this.snapshotName,
scan,
MyMapper.class,
NullWritable.class,
NullWritable.class,
job,
true,
new Path(restoreDir)
);
job.setNumReduceTasks(0);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
job.setOutputFormatClass(NullOutputFormat.class);
scanTimer.start();
job.waitForCompletion(true);
scanTimer.stop();
Counters counters = job.getCounters();
long numRows = counters.findCounter(ScanCounter.NUM_ROWS).getValue();
long numCells = counters.findCounter(ScanCounter.NUM_CELLS).getValue();
long totalBytes = counters.findCounter(HBASE_COUNTER_GROUP_NAME, "BYTES_IN_RESULTS").getValue();
double throughput = (double)totalBytes / scanTimer.elapsed(TimeUnit.SECONDS);
double throughputRows = (double)numRows / scanTimer.elapsed(TimeUnit.SECONDS);
double throughputCells = (double)numCells / scanTimer.elapsed(TimeUnit.SECONDS);
System.out.println("HBase scan mapreduce: ");
System.out.println("total time to open scanner: " +
scanOpenTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
System.out.println("total time to scan: " + scanTimer.elapsed(TimeUnit.MILLISECONDS) + " ms");
System.out.println("total bytes: " + totalBytes + " bytes ("
+ StringUtils.humanReadableInt(totalBytes) + ")");
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughput) + "B/s");
System.out.println("total rows : " + numRows);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputRows) + " rows/s");
System.out.println("total cells : " + numCells);
System.out.println("throughput : " + StringUtils.humanReadableInt((long)throughputCells) + " cells/s");
}