当前位置: 首页>>代码示例>>Java>>正文


Java TableInputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.TableInputFormat的典型用法代码示例。如果您正苦于以下问题:Java TableInputFormat类的具体用法?Java TableInputFormat怎么用?Java TableInputFormat使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


TableInputFormat类属于org.apache.hadoop.hbase.mapreduce包,在下文中一共展示了TableInputFormat类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
public void run() throws Exception {
    String tableName = "contacts";

    Configuration config = HBaseConfiguration.create();

    Scan scan = new Scan();
    scan.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
    scan.setCacheBlocks(false);  // don't set to true for MR jobs

    config.set(TableInputFormat.SCAN, convertScanToString(scan));
    config.set(TableInputFormat.INPUT_TABLE, tableName);

    Job job = new Job(config, "index builder");
    job.setJarByClass(JobSubmitter.class);
    job.setMapperClass(IndexMapper.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TableInputFormat.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);

    boolean b = job.waitForCompletion(true);
    if (!b) {
        throw new IOException("error with job!");
    }
}
 
开发者ID:at15,项目名称:cs433,代码行数:25,代码来源:JobSubmitter.java

示例2: evaluate

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public JavaRDD<ObjectValue> evaluate(Transformation transformation) {
    final SparkTransformationEvaluator evaluator = new SparkTransformationEvaluator(transformation);
    JavaSparkContext sc = NotaQL.SparkFactory.getSparkContext();

    final Configuration conf = createConf();
    conf.set(TableInputFormat.INPUT_TABLE, tableId);

    final JavaPairRDD<ImmutableBytesWritable, Result> inputRDD =
            sc.newAPIHadoopRDD(conf, TableInputFormat.class, ImmutableBytesWritable.class, org.apache.hadoop.hbase.client.Result.class);

    // convert all rows in rdd to inner format
    final JavaRDD<Value> converted = inputRDD.map(t -> ValueConverter.convertToNotaQL(t._2));
    // filter the ones not fulfilling the input filter
    final JavaRDD<Value> filtered = converted.filter(v -> transformation.satisfiesInPredicate((ObjectValue) v));

    // process all input
    return evaluator.process(filtered);
}
 
开发者ID:notaql,项目名称:notaql,代码行数:20,代码来源:HBaseEngineEvaluator.java

示例3: process

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public void process(Annotation annotation, Job job, Object target)
		throws ToolException {

	TableInput tableInput = (TableInput)annotation;

	// Base setup of the table mapper job
	Configuration conf = job.getConfiguration();
	HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));

	try {
		// Add dependencies
		TableMapReduceUtil.addDependencyJars(job);

		String tableName = getTableName(tableInput);
		Scan scan = getScan(tableInput);

		job.setInputFormatClass(TableInputFormat.class);
		conf.set(TableInputFormat.INPUT_TABLE, tableName);
		conf.set(TableInputFormat.SCAN, convertScanToString(scan));

	} catch (IOException e) {
		throw new ToolException(e);
	}
}
 
开发者ID:conversant,项目名称:mara,代码行数:26,代码来源:TableInputAnnotationHandler.java

示例4: testProcessDefaults

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessDefaults() {

	try {
		Annotation annotation = setupDriver(new TableDriverDefaults());

		handler.process(annotation, job, null);

		verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
		assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo(TEST_INPUT));
		assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

	} catch (ToolException | NoSuchFieldException | SecurityException e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
开发者ID:conversant,项目名称:mara,代码行数:18,代码来源:TableInputAnnotationHandlerTest.java

示例5: testProcessExplicitTable

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessExplicitTable() {

	try {
		Annotation annotation = setupDriver(new TableDriverExplicitTable());
		handler.process(annotation, job, null);

		verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
		assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("my_table"));
		assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAf//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

	} catch (ToolException | NoSuchFieldException | SecurityException e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
开发者ID:conversant,项目名称:mara,代码行数:17,代码来源:TableInputAnnotationHandlerTest.java

示例6: testProcessCustomScan

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testProcessCustomScan() {

	try {
		Annotation annotation = setupDriver(new TableDriverWithScan());

		handler.process(annotation, job, null);

		verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
		assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.my_table"));
		assertThat(conf.get(TableInputFormat.SCAN), equalTo("AgAAAAAAAv//////////AQAAAAAAAAAAAH//////////AQAAAAAAAAAA"));

	} catch (ToolException | NoSuchFieldException | SecurityException e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
开发者ID:conversant,项目名称:mara,代码行数:18,代码来源:TableInputAnnotationHandlerTest.java

示例7: testConditionalName

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Test
public void testConditionalName() {
	try {
		Annotation annotation = setupDriver(new TableDriverNameExpr());

		handler.process(annotation, job, null);

		verify(job, times(1)).setInputFormatClass(TableInputFormat.class);
		assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("myTable"));

		TableDriverNameExpr.PREFIX = "test";
		handler.process(annotation, job, null);
		assertThat(conf.get(TableInputFormat.INPUT_TABLE), equalTo("test.myTable"));

	} catch (ToolException | NoSuchFieldException | SecurityException e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
开发者ID:conversant,项目名称:mara,代码行数:20,代码来源:TableInputAnnotationHandlerTest.java

示例8: setup

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
 * Handles initializing this class with objects specific to it (i.e., the parser). Common
 * initialization that might be leveraged by a subsclass is done in <code>doSetup</code>. Hence a
 * subclass may choose to override this method and call <code>doSetup</code> as well before
 * handling it's own custom params.
 * @param context
 */
@Override
protected void setup(Context context) throws IOException {
  doSetup(context);

  Configuration conf = context.getConfiguration();

  parser = new TsvParser(conf.get(ImportTsv.COLUMNS_CONF_KEY), separator);
  if (parser.getRowKeyColumnIndex() == -1) {
    throw new RuntimeException("No row key column specified");
  }
  String tableName = conf.get(TableInputFormat.INPUT_TABLE);
  HTable hTable = null;
  try {
    hTable = new HTable(conf, tableName);
    this.startKeys = hTable.getStartKeys();
    byte[] indexBytes = hTable.getTableDescriptor().getValue(Constants.INDEX_SPEC_KEY);
    if (indexBytes != null) {
      TableIndices tableIndices = new TableIndices();
      tableIndices.readFields(indexBytes);
      this.indices = tableIndices.getIndices();
    }
  } finally {
    if (hTable != null) hTable.close();
  }
}
 
开发者ID:tenggyut,项目名称:HIndex,代码行数:33,代码来源:IndexTsvImporterMapper.java

示例9: initTableMapperJob

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
public static void initTableMapperJob(String table, Scan scan,
    Class<? extends TableMapper> mapper,
    Class<? extends WritableComparable> outputKeyClass,
    Class<? extends Writable> outputValueClass, Job job,
    boolean addDependencyJars, Class<? extends InputFormat> inputFormatClass)
throws IOException {
  job.setInputFormatClass(inputFormatClass);
  if (outputValueClass != null) job.setMapOutputValueClass(outputValueClass);
  if (outputKeyClass != null) job.setMapOutputKeyClass(outputKeyClass);
  job.setMapperClass(mapper);
  Configuration conf = job.getConfiguration();
  HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
  conf.set(TableInputFormat.INPUT_TABLE, table);
  conf.set(TableInputFormat.SCAN, convertScanToString(scan));
  if (addDependencyJars) {
    addDependencyJars(job);
  }
  TableMapReduceUtil.initCredentials(job);
}
 
开发者ID:XiaoMi,项目名称:themis,代码行数:20,代码来源:ThemisTableMapReduceUtil.java

示例10: getConfiguredScanForJob

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Scan getConfiguredScanForJob(Configuration conf, String[] args) throws IOException {
  Scan s = new Scan();
  // Set Scan Versions
  s.setMaxVersions(Integer.MAX_VALUE);
  s.setCacheBlocks(false);
  // Set Scan Column Family
  if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) {
    s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY)));
  }
  // Set RowFilter or Prefix Filter if applicable.
  Filter rowFilter = getRowFilter(args);
  if (rowFilter!= null) {
    LOG.info("Setting Row Filter for counter.");
    s.setFilter(rowFilter);
  }
  // Set TimeRange if defined
  long timeRange[] = getTimeRange(args);
  if (timeRange != null) {
    LOG.info("Setting TimeRange for counter.");
    s.setTimeRange(timeRange[0], timeRange[1]);
  }
  LOG.warn("Got the Scan: " + s);
  return s;
}
 
开发者ID:GoogleCloudPlatform,项目名称:cloud-bigtable-examples,代码行数:25,代码来源:CellCounter.java

示例11: run

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
  String[] otherArgs = new GenericOptionsParser(getConf(), args).getRemainingArgs();
  if (otherArgs.length < 2) {
    System.err.println("ERROR: Wrong number of parameters: " + args.length);
    System.err.println("Usage: CellCounter ");
    System.err.println("       <tablename> <outputDir> <reportSeparator> [^[regex pattern] or " +
      "[Prefix] for row filter]] --starttime=[starttime] --endtime=[endtime]");
    System.err.println("  Note: -D properties will be applied to the conf used. ");
    System.err.println("  Additionally, the following SCAN properties can be specified");
    System.err.println("  to get fine grained control on what is counted..");
    System.err.println("   -D " + TableInputFormat.SCAN_COLUMN_FAMILY + "=<familyName>");
    System.err.println(" <reportSeparator> parameter can be used to override the default report separator " +
        "string : used to separate the rowId/column family name and qualifier name.");
    System.err.println(" [^[regex pattern] or [Prefix] parameter can be used to limit the cell counter count " +
        "operation to a limited subset of rows from the table based on regex or prefix pattern.");
    return -1;
  }
  Job job = createSubmittableJob(getConf(), otherArgs);
  return (job.waitForCompletion(true) ? 0 : 1);
}
 
开发者ID:GoogleCloudPlatform,项目名称:cloud-bigtable-examples,代码行数:22,代码来源:CellCounter.java

示例12: initializeHBaseConfig

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Configuration initializeHBaseConfig() {

		Configuration hbaseConfig = HBaseConfiguration.create();

		hbaseConfig.set(TableInputFormat.INPUT_TABLE, Consts.TARGET_TABLE);
		hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
		hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
		hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
		hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");

		hbaseConfig.set("hbase.distributed.cluster", "true");
		hbaseConfig.set("hbase.zookeeper.quorum", Consts.ZOOKKEEPER_QUORUM);

		hbaseConfig.set("mapreduce.job.maps", "4");
		hbaseConfig.set("mapred.map.tasks", "4");
		hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");

		return hbaseConfig;
	}
 
开发者ID:atulsm,项目名称:Test_Projects,代码行数:20,代码来源:HbaseReadTimeSeries.java

示例13: initializeHBaseConfig

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
private static Configuration initializeHBaseConfig() {

		Configuration hbaseConfig = HBaseConfiguration.create();

		hbaseConfig.set(TableInputFormat.INPUT_TABLE, TARGET_TABLE);
		hbaseConfig.set(TableInputFormat.SCAN_BATCHSIZE, "5000");
		hbaseConfig.set(TableInputFormat.SCAN_CACHEDROWS, "10000");
		hbaseConfig.set(TableInputFormat.SCAN_MAXVERSIONS, "1");
		hbaseConfig.set(TableInputFormat.SCAN_COLUMNS, "base:pCol");

		hbaseConfig.set("hbase.distributed.cluster", "true");
		hbaseConfig.set("hbase.zookeeper.quorum", ZOOKKEEPER_QUORUM);

		hbaseConfig.set("mapreduce.job.maps", "4");
		hbaseConfig.set("mapred.map.tasks", "4");
		hbaseConfig.set("hbase.mapreduce.splitsPerRegion", "4");

		return hbaseConfig;
	}
 
开发者ID:atulsm,项目名称:Test_Projects,代码行数:20,代码来源:StreamingKafkaHbaseWrite.java

示例14: prepareJob

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
 * Prepares a map reduce job.
 * @param tn The current table name.
 * @param familyName The current family name.
 * @param scan The current scan.
 * @param conf The current configuration.
 * @return A map reduce job.
 * @throws IOException
 */
private Job prepareJob(TableName tn, String familyName, Scan scan, Configuration conf)
    throws IOException {
  Job job = Job.getInstance(conf);
  job.setJarByClass(SweepMapper.class);
  TableMapReduceUtil.initTableMapperJob(tn.getNameAsString(), scan,
      SweepMapper.class, Text.class, Writable.class, job);

  job.setInputFormatClass(TableInputFormat.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(KeyValue.class);
  job.setReducerClass(SweepReducer.class);
  job.setOutputFormatClass(NullOutputFormat.class);
  String jobName = getCustomJobName(this.getClass().getSimpleName(), tn.getNameAsString(),
      familyName);
  job.setJobName(jobName);
  if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
    String fileLoc = conf.get(CREDENTIALS_LOCATION);
    Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
    job.getCredentials().addAll(cred);
  }
  return job;
}
 
开发者ID:intel-hadoop,项目名称:HBase-LOB,代码行数:32,代码来源:SweepJob.java

示例15: initTableMapperJob

import org.apache.hadoop.hbase.mapreduce.TableInputFormat; //导入依赖的package包/类
/**
 * Use this before submitting a TableMap job. It will appropriately set up
 * the job.
 *
 * @param table  The Splice table name to read from.
 * @param scan  The scan instance with the columns, time range etc.
 * @param mapper  The mapper class to use.
 * @param outputKeyClass  The class of the output key.
 * @param outputValueClass  The class of the output value.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary HBase configuration.
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When setting up the details fails.
 */
public static void initTableMapperJob(String table,Scan scan,
                                      Class<? extends Mapper> mapper,
                                      Class<? extends WritableComparable> outputKeyClass,
                                      Class<? extends Object> outputValueClass,Job job,
                                      boolean addDependencyJars,Class<? extends InputFormat> inputFormatClass)
        throws IOException{
    job.setInputFormatClass(inputFormatClass);
    if(outputValueClass!=null) job.setMapOutputValueClass(outputValueClass);
    if(outputKeyClass!=null) job.setMapOutputKeyClass(outputKeyClass);
    if(mapper!=null) job.setMapperClass(mapper);
    job.getConfiguration().set(MRConstants.SPLICE_INPUT_TABLE_NAME,table);
    job.getConfiguration().set(TableInputFormat.SCAN,convertScanToString(scan));
    if(addDependencyJars){
        addDependencyJars(job);
    }

}
 
开发者ID:splicemachine,项目名称:spliceengine,代码行数:33,代码来源:SpliceTableMapReduceUtil.java


注:本文中的org.apache.hadoop.hbase.mapreduce.TableInputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。