本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil.initTableMapperJob方法的典型用法代码示例。如果您正苦于以下问题:Java TableMapReduceUtil.initTableMapperJob方法的具体用法?Java TableMapReduceUtil.initTableMapperJob怎么用?Java TableMapReduceUtil.initTableMapperJob使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil
的用法示例。
在下文中一共展示了TableMapReduceUtil.initTableMapperJob方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
private Job doVerify(Configuration conf, HTableDescriptor htd, String... auths)
throws IOException, InterruptedException, ClassNotFoundException {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
Job job = new Job(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
scan.setAuthorizations(new Authorizations(auths));
TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNameAsString(), scan,
VerifyMapper.class, NullWritable.class, NullWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
return job;
}
示例2: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
LOG.info("Verify output dir: " + outputDir);
Job job = Job.getInstance(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例3: runTestOnTable
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
private void runTestOnTable() throws InterruptedException, ClassNotFoundException {
Job job = null;
try {
Configuration conf = graph.configuration().toHBaseConfiguration();
job = Job.getInstance(conf, "test123");
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
Scan scan = new Scan();
scan.addColumn(FAMILY_NAME, COLUMN_NAME);
scan.setTimeRange(MINSTAMP, MAXSTAMP);
scan.setMaxVersions();
TableMapReduceUtil.initTableMapperJob(TABLE_NAME.getNameAsString(),
scan, ProcessTimeRangeMapper.class, Text.class, Text.class, job,
true, TableInputFormat.class);
job.waitForCompletion(true);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} finally {
if (job != null) {
FileUtil.fullyDelete(
new File(job.getConfiguration().get("hadoop.tmp.dir")));
}
}
}
示例4: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
private void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
Job job = new Job(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getNameAsString());
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例5: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
Job job = new Job(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例6: setupHBaseJob
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
public static void setupHBaseJob(Job job, String sourceTable, String cf) throws IOException {
Scan scan = new Scan();
if(cf != null) {
scan.addFamily(Bytes.toBytes(cf));
}
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
// set other scan attrs
TableMapReduceUtil.initTableMapperJob(
sourceTable, // input table
scan, // Scan instance to control CF and attribute selection
PrunerMapper.class, // mapper class
null, // mapper output key
null, // mapper output value
job);
TableMapReduceUtil.initTableReducerJob(
sourceTable, // output table
null, // reducer class
job);
}
示例7: prepareJob
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
/**
* Prepares a map reduce job.
* @param tn The current table name.
* @param familyName The current family name.
* @param scan The current scan.
* @param conf The current configuration.
* @return A map reduce job.
* @throws IOException
*/
private Job prepareJob(TableName tn, String familyName, Scan scan, Configuration conf)
throws IOException {
Job job = Job.getInstance(conf);
job.setJarByClass(SweepMapper.class);
TableMapReduceUtil.initTableMapperJob(tn.getNameAsString(), scan,
SweepMapper.class, Text.class, Writable.class, job);
job.setInputFormatClass(TableInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(KeyValue.class);
job.setReducerClass(SweepReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
String jobName = getCustomJobName(this.getClass().getSimpleName(), tn.getNameAsString(),
familyName);
job.setJobName(jobName);
if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
String fileLoc = conf.get(CREDENTIALS_LOCATION);
Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
job.getCredentials().addAll(cred);
}
return job;
}
示例8: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
LOG.info("Verify output dir: " + outputDir);
Job job = Job.getInstance(conf);
job.setJarByClass(this.getClass());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(
htd.getTableName().getNameAsString(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJarsForClasses(job.getConfiguration(), AbstractHBaseTool.class);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClass(VerifyReducer.class);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
assertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
assertEquals(0, numOutputRecords);
}
示例9: main
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
/**
* @param args ,the main method accepts an array with a signle element.The element is the name
* of the table to scan
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration conf = HBaseConfiguration.create();
Job job = new Job(conf, "HBase Filtering");
job.setJarByClass(MapredFiltering.class);
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.addFamily(MapperBulkLoadRadiomap.SRV_COL_FAM);
//scan.addColumn(MapperBulkLoadRadiomap.SRV_COL_FAM, TwitsDAO.TWIT_COL);
TableMapReduceUtil.initTableMapperJob(
args[0], scan, Map.class, ImmutableBytesWritable.class, Result.class, job);
job.setOutputFormatClass(NullOutputFormat.class);
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
if (!b) {
System.err.println("Job has not been completed.Abnormal exit.");
System.exit(1);
}
}
示例10: execute
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
@Override
public boolean execute() throws Exception
{
Configuration conf = getConf();
conf.set(TableInputFormat.SCAN_COLUMN_FAMILY, HBaseTableConstants.COLLECTION_TABLE_COLUMN_INTR);
Job job = new Job(conf);
job.setJobName("Prepare recommender: <" + getInputTable() + ">");
// mapper
TableMapReduceUtil.initTableMapperJob(getInputTable(), getScanner(), CollectionExportMap.class,
ImmutableBytesWritable.class, Text.class, job);
// reducer:
job.setReducerClass(CollectionExportReduce.class);
job.setOutputKeyClass(ImmutableBytesWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileOutputFormat.setOutputPath(job, new Path(outputFile));
return task.setCurrentJob(job).waitForCompletion(LOG.isDebugEnabled());
}
示例11: run
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClass(ActiveUserRunner.class);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.class, StatsUserDimension.class,
TimeOutputValue.class, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.class,
// StatsUserDimension.class, TimeOutputValue.class, job);
// 设置reducer相关参数
job.setReducerClass(ActiveUserReducer.class);
job.setOutputKeyClass(StatsUserDimension.class);
job.setOutputValueClass(MapWritableValue.class);
// 设置output相关参数
job.setOutputFormatClass(TransformerOutputFormat.class);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:"
+ endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
示例12: run
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 处理参数
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "new_install_user");
job.setJarByClass(NewInstallUserRunner.class);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.class, StatsUserDimension.class,
TimeOutputValue.class, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(null,
// NewInstallUserMapper.class, StatsUserDimension.class,
// TimeOutputValue.class, job);
job.setReducerClass(NewInstallUserReducer.class);
job.setOutputKeyClass(StatsUserDimension.class);
job.setOutputValueClass(MapWritableValue.class);
// job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TransformerOutputFormat.class);
if (job.waitForCompletion(true)) {
// 执行成功, 需要计算总用户
// this.calculateTotalUsers(conf);
return 0;
} else {
return -1;
}
}
示例13: run
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
@Override
public int run(String[] arg0) throws Exception {
List<Scan> mainSiteScan = new ArrayList<Scan>();
Scan siteScan = new Scan();
siteScan.setAttribute("scan.attributes.table.name", Bytes.toBytes("myTest07WebSite"));
System.out.println(siteScan.getAttribute("scan.attributes.table.name"));
mainSiteScan.add(siteScan);
Scan webSitehitScan = new Scan();
webSitehitScan.setAttribute("scan.attributes.table.name", Bytes.toBytes("myTest07SiteHits"));// lookup for the table which we have created and is having the site hit data.
System.out.println(webSitehitScan.getAttribute("scan.attributes.table.name"));
mainSiteScan.add(webSitehitScan);
Configuration conf = new Configuration();
Job job = new Job(conf);
// will get the server details of Hbase/hadoop
job.setJarByClass(TableWebsiteJob.class);
// setting the class name to the job
TableMapReduceUtil.initTableMapperJob(
mainSiteScan, // tables to read from
TableWebsiteMapper.class,
Text.class,
IntWritable.class,
job);
TableMapReduceUtil.initTableReducerJob(
"myTest07SiteHitsPlusWebSite",
TableWebsiteReducer.class,
job);
job.waitForCompletion(true);
return 0;
// totalhit is the third table which will receive the data
}
示例14: doVerify
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
private int doVerify(Path outputDir, int numReducers) throws IOException, InterruptedException,
ClassNotFoundException {
job = new Job(getConf());
job.setJobName("Link Verifier");
job.setNumReduceTasks(numReducers);
job.setJarByClass(getClass());
setJobScannerConf(job);
Scan scan = new Scan();
scan.addColumn(FAMILY_NAME, COLUMN_PREV);
scan.setCaching(10000);
scan.setCacheBlocks(false);
String[] split = labels.split(COMMA);
scan.setAuthorizations(new Authorizations(split[this.labelIndex * 2],
split[(this.labelIndex * 2) + 1]));
TableMapReduceUtil.initTableMapperJob(tableName.getName(), scan, VerifyMapper.class,
BytesWritable.class, BytesWritable.class, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.class);
job.getConfiguration().setBoolean("mapreduce.map.speculative", false);
job.setReducerClass(VerifyReducer.class);
job.setOutputFormatClass(TextOutputFormat.class);
TextOutputFormat.setOutputPath(job, outputDir);
boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}
示例15: createSubmittableJob
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; //导入方法依赖的package包/类
Job createSubmittableJob(final String[] args) throws IOException {
Configuration configFromArgs = parseArguments(args);
if (configFromArgs == null || sourceTableNameString == null) {
return null;
}
getConf().addResource(configFromArgs);
getConf().setBoolean(Repository.MAP_SPECULATIVE_CONF_KEY, true); // no redundant processing
Job job = Job.getInstance(
getConf(), getConf().get(Repository.JOB_NAME_CONF_KEY, sourceTableNameString));
TableMapReduceUtil.addDependencyJars(job);
Scan scan = new Scan();
// note that user can override scan row-caching by setting TableInputFormat.SCAN_CACHEDROWS
scan.setCaching(getConf().getInt(TableInputFormat.SCAN_CACHEDROWS, 500));
scan.setCacheBlocks(false); // should be false for MapReduce jobs
if (!verboseReport && !reportType.equals(ReportType.VALUE)) {
scan.setFilter(new KeyOnlyFilter(true));
}
if (includeAllCells) {
scan.setMaxVersions();
}
if (sourceColFamily != null) {
scan.addFamily(sourceColFamily);
}
TableMapReduceUtil.initTableMapperJob(sourceTableNameString,
scan,
ColumnInvalidityReportMapper.class,
null, // mapper output key is null
null, // mapper output value is null
job);
job.setOutputFormatClass(NullOutputFormat.class); // no Mapper output, no Reducer
return job;
}