Java HFileOutputFormat类代码示例

本文整理汇总了Java中org.apache.hadoop.hbase.mapreduce.HFileOutputFormat类的典型用法代码示例。如果您正苦于以下问题：Java HFileOutputFormat类的具体用法？Java HFileOutputFormat怎么用？Java HFileOutputFormat使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

HFileOutputFormat类属于org.apache.hadoop.hbase.mapreduce包，在下文中一共展示了HFileOutputFormat类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: jobSetup

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
@Override
protected void jobSetup(Job job) throws IOException, ImportException {
  super.jobSetup(job);

  // we shouldn't have gotten here if bulk load dir is not set
  // so let's throw a ImportException
  if(getContext().getDestination() == null){
    throw new ImportException("Can't run HBaseBulkImportJob without a " +
        "valid destination directory.");
  }

  TableMapReduceUtil.addDependencyJars(job.getConfiguration(), Preconditions.class);
  FileOutputFormat.setOutputPath(job, getContext().getDestination());
  HTable hTable = new HTable(job.getConfiguration(), options.getHBaseTable());
  HFileOutputFormat.configureIncrementalLoad(job, hTable);
}

开发者ID:aliyun，项目名称:aliyun-maxcompute-data-collectors，代码行数:17，代码来源:HBaseBulkImportJob.java

示例2: main

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
   Configuration conf = new Configuration();
   conf.set("hbase.table.name", args[2]);    
   
   Job job = new Job(conf, "createipas");
   
   job.setJarByClass(CreateIpAS.class);
   job.setOutputKeyClass(ImmutableBytesWritable.class);
   job.setOutputValueClass(KeyValue.class);
   
   job.setMapperClass(Map.class);
   job.setReducerClass(Reducer.class);
       
   job.setInputFormatClass(TextInputFormat.class);
   job.setOutputFormatClass(HFileOutputFormat.class);
   
   HTable hTable = new HTable(conf, args[2]);
   HFileOutputFormat.configureIncrementalLoad(job, hTable);
   
   FileInputFormat.addInputPath(job, new Path(args[0]));
   FileOutputFormat.setOutputPath(job, new Path(args[1]));
       
   job.waitForCompletion(true);
}

开发者ID:dsarlis，项目名称:datix，代码行数:25，代码来源:CreateIpAS.java

示例3: open

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
@Override
public void open(int taskNumber, int numTasks) throws IOException {
	this.hadoopConfig = getHadoopConfig(this.config);
	
	/**
	 * PLASE NOTE:
	 * If you are a Eclipse+Maven Integration user and you have two (or more) warnings here, please
	 * close the pact-hbase project OR set the maven profile to hadoop_yarn
	 * 
	 * pact-hbase requires hadoop_yarn, but Eclipse is not able to parse maven profiles properly. Therefore,
	 * it imports the pact-hbase project even if it is not included in the standard profile (hadoop_v1)
	 */
	final TaskAttemptID attemptId = new TaskAttemptID(this.jtID, this.jobId, TaskType.MAP, taskNumber - 1, 0);

	this.context = new org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl(this.hadoopConfig, attemptId);
	final HFileOutputFormat outFormat = new HFileOutputFormat();
	try {
		this.writer = outFormat.getRecordWriter(this.context);
	} catch (InterruptedException iex) {
		throw new IOException("Opening the writer was interrupted.", iex);
	}
}

开发者ID:citlab，项目名称:vs.msc.ws14，代码行数:23，代码来源:GenericTableOutputFormat.java

示例4: startBulkLoad

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
@SuppressWarnings("rawtypes")
private Job startBulkLoad(Configuration conf, String inputTable,
		String tableName, Class<? extends TableMapper> clazz, Path outputDir)
		throws Exception {

	// Create our job to bulk load into HBase
	Job job = Job.getInstance(conf, "HBase Bulk Loader");
	job.setJarByClass(getClass());

	// Initialize our mapper by specifying the input table
	TableMapReduceUtil.initTableMapperJob(inputTable, new Scan(), clazz,
			ImmutableBytesWritable.class, KeyValue.class, job);

	HFileOutputFormat.configureIncrementalLoad(job, new HTable(conf,
			tableName));
	HFileOutputFormat.setOutputPath(job, outputDir);

	// launch the job
	job.waitForCompletion(true);
	return job;
}

开发者ID:Pivotal-Field-Engineering，项目名称:pmr-common，代码行数:22，代码来源:TwitterTableSplit.java

示例5: main

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("hbase.table.name", TABLE_NAME);

    Job job = new Job(conf);

    job.setJarByClass(Q2Loader.class);
    /* set mapper and reducer keys and values */
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    /* set the output of the job to be in HFile format */
    HTable hTable = new HTable(conf, TABLE_NAME);
    HFileOutputFormat.configureIncrementalLoad(job, hTable);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

}

开发者ID:dsarlis，项目名称:Cloud-Burst，代码行数:30，代码来源:Q2Loader.java

示例6: main

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("hbase.table.name", TABLE_NAME);

    Job job = new Job(conf);

    job.setJarByClass(Q3Loader.class);
    // set mapper and reducer keys and values
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    // set the output of the job to be in HFile format
    HTable hTable = new HTable(conf, TABLE_NAME);
    HFileOutputFormat.configureIncrementalLoad(job, hTable);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);

}

开发者ID:dsarlis，项目名称:Cloud-Burst，代码行数:30，代码来源:Q3Loader.java

示例7: main

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("hbase.table.name", TABLE_NAME);

    Job job = new Job(conf);

    job.setJarByClass(Q4Loader.class);
    /* set mapper and reducer keys and values */
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    /* set the output of the job to be in HFile format */
    HTable hTable = new HTable(conf, TABLE_NAME);
    HFileOutputFormat.configureIncrementalLoad(job, hTable);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.waitForCompletion(true);
}

开发者ID:dsarlis，项目名称:Cloud-Burst，代码行数:29，代码来源:Q4Loader.java

示例8: main

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
        JobContext conf = (JobContext) new Configuration();
//        conf.set("hbase.table.name", TABLE_NAME);

        Job job = new Job(conf);

        job.setJarByClass(HBaseLoader.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(KeyValue.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);

        HTable hTable = new HTable(conf, TABLE_NAME);
        HFileOutputFormat.configureIncrementalLoad(job, hTable);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);

    }

开发者ID:dsarlis，项目名称:Cloud-Burst，代码行数:28，代码来源:HBaseLoader.java

示例9: run

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_II_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));

        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));

        setJobClasspath(job);

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(IICreateHFileMapper.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(KeyValue.class);

        String tableName = getOptionValue(OPTION_HTABLE_NAME);
        HTable htable = new HTable(HBaseConfiguration.create(getConf()), tableName);
        HFileOutputFormat.configureIncrementalLoad(job, htable);

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        throw e;
    }
}

开发者ID:KylinOLAP，项目名称:Kylin，代码行数:38，代码来源:IICreateHFileJob.java

示例10: preJobLaunch

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
@Override
protected void preJobLaunch(CommandLine cmd, Job job) throws Exception {
	job.setJobName("Twitter HBase Bulk Load");
	htable = new HTable(getConf(), cmd.getOptionValue(HTABLE_OPT));

	HFileOutputFormat.configureIncrementalLoad(job, htable);
	HFileOutputFormat.setOutputPath(job, outputDir);
}

开发者ID:Pivotal-Field-Engineering，项目名称:pmr-common，代码行数:9，代码来源:TwitterBulkLoad.java

示例11: createSubmittableJob

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
/**
 * Sets up the actual job.
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
		throws IOException {

	String tableName = args[0];
	Path inputDir = new Path(args[1]);
	Job job = new Job(conf, "HBaseToHFileMapReduce");
	job.setJarByClass(HBaseToHFileMapReduce.class);
	FileInputFormat.setInputPaths(job, inputDir);
	job.setInputFormatClass(TextInputFormat.class);
	job.setMapperClass(HourlyImporter.class);

	if (args.length < 3) {
		// ++++ insert into table directly using TableOutputFormat ++++
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
	} else {
		// ++++ to generate HFile instead ++++
		HTable table = new HTable(conf, tableName);
		job.setReducerClass(PutSortReducer.class);
		Path outputDir = new Path(args[2]);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setMapOutputKeyClass(ImmutableBytesWritable.class);
		job.setMapOutputValueClass(Put.class);
		HFileOutputFormat.configureIncrementalLoad(job, table);			
	}		
	
	TableMapReduceUtil.addDependencyJars(job);
	return job;
}

开发者ID:willddy，项目名称:bigdata_pattern，代码行数:41，代码来源:HBaseToHFileMapReduce.java

示例12: configureIncrementalLoad

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static void configureIncrementalLoad(Job job, HTable table) throws IOException {
  HFileOutputFormat.configureIncrementalLoad(job, table);
  // Override OutputFormatClass
  job.setOutputFormatClass(IndexHFileOutputFormat.class);
}

开发者ID:tenggyut，项目名称:HIndex，代码行数:6，代码来源:IndexHFileOutputFormat.java

示例13: createSubmittableJob

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public static Job createSubmittableJob(Configuration conf, String[] args)
  throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
  Path inputDir = new Path(args[0]);
  Path outputDir = new Path(args[1]);
  boolean createPartitionFile = Boolean.parseBoolean(args[2]);

  Job job = Job.getInstance(conf,
    "Import delicious RSS feed into Hush tables.");
  job.setJarByClass(BulkImportJobExample.class);

  job.setInputFormatClass(TextInputFormat.class);
  // conf.setLong("hbase.hregion.max.filesize", 64 * 1024);
  FileInputFormat.setInputPaths(job, inputDir);

  job.setMapperClass(BulkImportMapper.class);
  job.setMapOutputKeyClass(ImmutableBytesWritable.class);
  job.setMapOutputValueClass(Put.class);

  job.setPartitionerClass(TotalOrderPartitioner.class);

  job.setReducerClass(PutSortReducer.class);
  job.setOutputKeyClass(ImmutableBytesWritable.class);
  job.setOutputValueClass(KeyValue.class);

  job.setOutputFormatClass(HFileOutputFormat.class);
  HFileOutputFormat.setOutputPath(job, outputDir);

  HFileOutputFormat.setCompressOutput(job, true);
  HFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  job.getConfiguration().set("hfile.compression", "gz");

  //job.getConfiguration().setFloat("mapred.job.shuffle.input.buffer.percent", 0.5f);
  //job.setNumReduceTasks(30);

  Path partitionsPath = new Path(job.getWorkingDirectory(),
    "partitions_" + System.currentTimeMillis());
  TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionsPath);

  if (createPartitionFile) {
    VerboseInputSampler.Sampler<KeyValue, ImmutableBytesWritable> sampler =
      new VerboseInputSampler.VerboseRandomSampler<KeyValue, ImmutableBytesWritable>(0.05, 1000000, 30);       // use 0.1 for real sampling

    LOG.info("Sampling key space");
    VerboseInputSampler.writePartitionFile(job, sampler);
    LOG.info("Samping done");
  }

  URI cacheUri = new URI(partitionsPath.toString() + "#" +
    TotalOrderPartitioner.DEFAULT_PATH);
  DistributedCache.addCacheFile(cacheUri, job.getConfiguration());
  DistributedCache.createSymlink(job.getConfiguration());

  return job;
}

开发者ID:lhfei，项目名称:hbase-in-action，代码行数:55，代码来源:BulkImportJobExample.java

示例14: createSubmittableJob

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
/**
 * Sets up the actual job.
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args)
		throws IOException, ClassNotFoundException {

	// Support non-XML supported characters
	// by re-encoding the passed separator as a Base64 string.
	String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
	if (actualSeparator != null) {
		conf.set(SEPARATOR_CONF_KEY,
				new String(Base64.encodeBytes(actualSeparator.getBytes())));
	}

	// See if a non-default Mapper was set
	String mapperClassName = conf.get(MAPPER_CONF_KEY);
	Class mapperClass = mapperClassName != null ? Class
			.forName(mapperClassName) : DEFAULT_MAPPER;

	String tableName = args[0];
	Path inputDir = new Path(args[1]);
	Job job = new Job(conf, NAME + "_" + tableName);
	job.setJarByClass(mapperClass);
	FileInputFormat.setInputPaths(job, inputDir);

	String inputCodec = conf.get(INPUT_LZO_KEY);
	if (inputCodec == null) {
		FileInputFormat.setMaxInputSplitSize(job, 67108864l); // max split
																// size =
																// 64m
		job.setInputFormatClass(TextInputFormat.class);
	} else {
		if (inputCodec.equalsIgnoreCase("lzo"))
			job.setInputFormatClass(LzoTextInputFormat.class);
		else {
			usage("not supported compression codec!");
			System.exit(-1);
		}
	}

	job.setMapperClass(mapperClass);

	String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
	if (hfileOutPath != null) {
		HTable table = new HTable(conf, tableName);
		job.setReducerClass(PutSortReducer.class);
		Path outputDir = new Path(hfileOutPath);
		FileOutputFormat.setOutputPath(job, outputDir);
		job.setMapOutputKeyClass(ImmutableBytesWritable.class);
		job.setMapOutputValueClass(Put.class);
		HFileOutputFormat.configureIncrementalLoad(job, table);
	} else {
		// No reducers. Just write straight to table. Call
		// initTableReducerJob
		// to set up the TableOutputFormat.
		TableMapReduceUtil.initTableReducerJob(tableName, null, job);
		job.setNumReduceTasks(0);
	}

	TableMapReduceUtil.addDependencyJars(job);
	TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
			com.google.common.base.Function.class /*
												 * Guava used by TsvParser
												 */);
	return job;
}

开发者ID:javachen，项目名称:learning-hadoop，代码行数:75，代码来源:ImportTsv.java

示例15: run

import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat; //导入依赖的package包/类
public int run(String[] args) throws Exception {
    Options options = new Options();

    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_HTABLE_NAME);
        parseOptions(options, args);

        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase();

        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());

        CubeInstance cube = cubeMgr.getCube(cubeName);
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));

        setJobClasspath(job);

        addInputDirs(getOptionValue(OPTION_INPUT_PATH), job);
        FileOutputFormat.setOutputPath(job, output);

        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(CubeHFileMapper.class);
        job.setReducerClass(KeyValueSortReducer.class);

        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        Configuration conf = HBaseConfiguration.create(getConf());
        // add metadata to distributed cache
        attachKylinPropsAndMetadata(cube, job.getConfiguration());

        String tableName = getOptionValue(OPTION_HTABLE_NAME).toUpperCase();
        HTable htable = new HTable(conf, tableName);

        //Automatic config !
        HFileOutputFormat.configureIncrementalLoad(job, htable);

        // set block replication to 3 for hfiles
        conf.set(DFSConfigKeys.DFS_REPLICATION_KEY, "3");

        this.deletePath(job.getConfiguration(), output);

        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CubeHFileJob", e);
        printUsage(options);
        throw e;
    }
}

开发者ID:KylinOLAP，项目名称:Kylin，代码行数:53，代码来源:CubeHFileJob.java

注：本文中的org.apache.hadoop.hbase.mapreduce.HFileOutputFormat类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。