本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.TableInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java TableInputFormat类的具体用法?Java TableInputFormat怎么用?Java TableInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
TableInputFormat类属于org.apache.hadoop.hbase.mapreduce包,在下文中一共展示了TableInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
public void run() throws Exception {
String tableName = "contacts";
Configuration config = HBaseConfiguration.create();
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
config.set(TableInputFormat.SCAN, convertScanToString(scan));
config.set(TableInputFormat.INPUT_TABLE, tableName);
Job job = new Job(config, "index builder");
job.setJarByClass(JobSubmitter.class);
job.setMapperClass(IndexMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(TableInputFormat.class);
job.setOutputFormatClass(MultiTableOutputFormat.class);
boolean b = job.waitForCompletion(true);
if (!b) {
throw new IOException("error with job!");
}
}
示例2: evaluate
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public JavaRDD<ObjectValue> evaluate(Transformation transformation) {
final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation);
JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext();
final Configuration conf = createConf();
conf.set(TableInputFormat.INPUT_TABLE, tableId);
final JavaPairRDD<ImmutableBytesWritable, Result> inputRDD =
sc.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, org.apache.hadoop.hbase.client.Result.class);
// convert all rows in rdd to inner format
final JavaRDD<Value> converted = inputRDD.map(t -> ValueConverter.convertToNotaQL(t._2));
// filter the ones not fulfilling the input filter
final JavaRDD<Value> filtered = converted.filter(v -> transformation.satisfiesInPredicate((ObjectValue) v));
// process all input
return evaluator.process(filtered);
}
示例3: process
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
throws ToolException {
TableInput tableInput = (TableInput)annotation;
// Base setup of the table mapper job
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
try {
// Add dependencies
TableMapReduceUtil.addDependencyJars(job);
String tableName = getTableName(tableInput);
Scan scan = getScan(tableInput);
job.setInputFormatClass(TableInputFormat.class);
conf.set(TableInputFormat.INPUT_TABLE, tableName);
conf.set(TableInputFormat.SCAN, convertScanToString(scan));
} catch (IOException e) {
throw new ToolException(e);
}
}
示例4: testProcessDefaults
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessDefaults() {
try {
Annotation annotation = setupDriver(new TableDriverDefaults());
handler.process(annotation, job, null);
verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo(TEST_INPUT));
assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));
} catch (ToolException | NoSuchFieldException | SecurityException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例5: testProcessExplicitTable
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessExplicitTable() {
try {
Annotation annotation = setupDriver(new TableDriverExplicitTable());
handler.process(annotation, job, null);
verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("my_table"));
assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));
} catch (ToolException | NoSuchFieldException | SecurityException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例6: testProcessCustomScan
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessCustomScan() {
try {
Annotation annotation = setupDriver(new TableDriverWithScan());
handler.process(annotation, job, null);
verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.my_table"));
assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAv//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));
} catch (ToolException | NoSuchFieldException | SecurityException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例7: testConditionalName
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testConditionalName() {
try {
Annotation annotation = setupDriver(new TableDriverNameExpr());
handler.process(annotation, job, null);
verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("myTable"));
TableDriverNameExpr.PREFIX = "test";
handler.process(annotation, job, null);
assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.myTable"));
} catch (ToolException | NoSuchFieldException | SecurityException e) {
e.printStackTrace();
fail(e.getMessage());
}
}
示例8: setup
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
* Handles initializing this class with objects specific to it (i.e., the parser). Common
* initialization that might be leveraged by a subsclass is done in <code>doSetup</code>. Hence a
* subclass may choose to override this method and call <code>doSetup</code> as well before
* handling it's own custom params.
* @param context
*/
@Override
protected void setup(Context context) throws IOException {
doSetup(context);
Configuration conf = context.getConfiguration();
parser = new TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
if (parser.getRowKeyColumnIndex() == -1) {
throw new RuntimeException("No row key column specified");
}
String tableName = conf.get(TableInputFormat.INPUT_TABLE);
HTable hTable = null;
try {
hTable = new HTable(conf, tableName);
this.startKeys = hTable.getStartKeys();
byte[] indexBytes = hTable.getTableDescriptor().getValue(Constants.INDEX_SPEC_KEY);
if (indexBytes != null) {
TableIndices tableIndices = new TableIndices();
tableIndices.readFields(indexBytes);
this.indices = tableIndices.getIndices();
}
} finally {
if (hTable != null) hTable.close();
}
}
示例9: initTableMapperJob
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
public static void initTableMapperJob(String table, Scan scan,
Class<? extends TableMapper> mapper,
Class<? extends WritableComparable> outputKeyClass,
Class<? extends Writable> outputValueClass, Job job,
boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
throws IOException {
job.setInputFormatClass(inputFormatClass);
if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
job.setMapperClass(mapper);
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
conf.set(TableInputFormat.INPUT_TABLE, table);
conf.set(TableInputFormat.SCAN, convertScanToString(scan));
if (addDependencyJars) {
addDependencyJars(job);
}
TableMapReduceUtil.initCredentials(job);
}
示例10: getConfiguredScanForJob
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
Scan s = new Scan();
// Set Scan Versions
s.setMaxVersions(Integer.MAX_VALUE);
s.setCacheBlocks(false);
// Set Scan Column Family
if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
}
// Set RowFilter or Prefix Filter if applicable.
Filter rowFilter = getRowFilter(args);
if (rowFilter!= null) {
LOG.info("Setting Row Filter for counter.");
s.setFilter(rowFilter);
}
// Set TimeRange if defined
long timeRange[] = getTimeRange(args);
if (timeRange != null) {
LOG.info("Setting TimeRange for counter.");
s.setTimeRange(timeRange[0], timeRange[1]);
}
LOG.warn("Got the Scan: " + s);
return s;
}
示例11: run
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("ERROR: Wrong number of parameters: " + args.length);
System.err.println("Usage: CellCounter ");
System.err.println(" <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
"[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
System.err.println(" Note: -D properties will be applied to the conf used. ");
System.err.println(" Additionally, the following SCAN properties can be specified");
System.err.println(" to get fine grained control on what is counted..");
System.err.println(" -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
"string : used to separate the rowId/column family name and qualifier name.");
System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
"operation to a limited subset of rows from the table based on regex or prefix pattern.");
return -1;
}
Job job = createSubmittableJob(getConf(), otherArgs);
return (job.waitForCompletion(true) ? 0 : 1);
}
示例12: initializeHBaseConfig
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Configuration initializeHBaseConfig() {
Configuration hbaseConfig = HBaseConfiguration.create();
hbaseConfig.set(TableInputFormat.INPUT_TABLE, Consts.TARGET_TABLE);
hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");
hbaseConfig.set("hbase.distributed.cluster", "true");
hbaseConfig.set("hbase.zookeeper.quorum", Consts.ZOOKKEEPER_QUORUM);
hbaseConfig.set("mapreduce.job.maps", "4");
hbaseConfig.set("mapred.map.tasks", "4");
hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");
return hbaseConfig;
}
示例13: initializeHBaseConfig
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Configuration initializeHBaseConfig() {
Configuration hbaseConfig = HBaseConfiguration.create();
hbaseConfig.set(TableInputFormat.INPUT_TABLE, TARGET_TABLE);
hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");
hbaseConfig.set("hbase.distributed.cluster", "true");
hbaseConfig.set("hbase.zookeeper.quorum", ZOOKKEEPER_QUORUM);
hbaseConfig.set("mapreduce.job.maps", "4");
hbaseConfig.set("mapred.map.tasks", "4");
hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");
return hbaseConfig;
}
示例14: prepareJob
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
* Prepares a map reduce job.
* @param tn The current table name.
* @param familyName The current family name.
* @param scan The current scan.
* @param conf The current configuration.
* @return A map reduce job.
* @throws IOException
*/
private Job prepareJob(TableName tn, String familyName, Scan scan, Configuration conf)
throws IOException {
Job job = Job.getInstance(conf);
job.setJarByClass(SweepMapper.class);
TableMapReduceUtil.initTableMapperJob(tn.getNameAsString(), scan,
SweepMapper.class, Text.class, Writable.class, job);
job.setInputFormatClass(TableInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(KeyValue.class);
job.setReducerClass(SweepReducer.class);
job.setOutputFormatClass(NullOutputFormat.class);
String jobName = getCustomJobName(this.getClass().getSimpleName(), tn.getNameAsString(),
familyName);
job.setJobName(jobName);
if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
String fileLoc = conf.get(CREDENTIALS_LOCATION);
Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
job.getCredentials().addAll(cred);
}
return job;
}
示例15: initTableMapperJob
import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
* Use this before submitting a TableMap job. It will appropriately set up
* the job.
*
* @param table The Splice table name to read from.
* @param scan The scan instance with the columns, time range etc.
* @param mapper The mapper class to use.
* @param outputKeyClass The class of the output key.
* @param outputValueClass The class of the output value.
* @param job The current job to adjust. Make sure the passed job is
* carrying all necessary HBase configuration.
* @param addDependencyJars upload HBase jars and jars for any of the configured
* job classes via the distributed cache (tmpjars).
* @throws IOException When setting up the details fails.
*/
public static void initTableMapperJob(String table,Scan scan,
Class<? extends Mapper> mapper,
Class<? extends WritableComparable> outputKeyClass,
Class<? extends Object> outputValueClass,Job job,
boolean addDependencyJars,Class<? extends InputFormat> inputFormatClass)
throws IOException{
job.setInputFormatClass(inputFormatClass);
if(outputValueClass!=null) job.setMapOutputValueClass(outputValueClass);
if(outputKeyClass!=null) job.setMapOutputKeyClass(outputKeyClass);
if(mapper!=null) job.setMapperClass(mapper);
job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME,table);
job.getConfiguration().set(TableInputFormat.SCAN,convertScanToString(scan));
if(addDependencyJars){
addDependencyJars(job);
}
}