当前位置: 首页>>代码示例>>Java>>正文


Java AvroParquetOutputFormat.setSchema方法代码示例

本文整理汇总了Java中parquet.avro.AvroParquetOutputFormat.setSchema方法的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetOutputFormat.setSchema方法的具体用法?Java AvroParquetOutputFormat.setSchema怎么用?Java AvroParquetOutputFormat.setSchema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parquet.avro.AvroParquetOutputFormat的用法示例。


在下文中一共展示了AvroParquetOutputFormat.setSchema方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeAvro

import parquet.avro.AvroParquetOutputFormat; //导入方法依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
	// Set up the Hadoop Input Format
	Job job = Job.getInstance();

	// Set up Hadoop Output Format
	HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);

	FileOutputFormat.setOutputPath(job, new Path(outputPath));

	AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
	ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
	ParquetOutputFormat.setEnableDictionary(job, true);

	// Output & Execute
	data.output(hadoopOutputFormat);
}
 
开发者ID:FelixNeutatz,项目名称:parquet-flinktacular,代码行数:17,代码来源:ParquetAvroExample.java

示例2: run

import parquet.avro.AvroParquetOutputFormat; //导入方法依赖的package包/类
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(AvroParquetMapReduce.class);

  job.setInputFormatClass(AvroParquetInputFormat.class);
  AvroParquetInputFormat.setInputPaths(job, inputPath);

  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);

  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:40,代码来源:AvroParquetMapReduce.java

示例3: run

import parquet.avro.AvroParquetOutputFormat; //导入方法依赖的package包/类
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(AvroGenericParquetMapReduce.class);

  job.setInputFormatClass(AvroParquetInputFormat.class);
  AvroParquetInputFormat.setInputPaths(job, inputPath);

  // force Avro to supply us GenericRecord objects in the mapper by mutating the
  // schema and changing the class name
  //
  Schema schema = Schema.createRecord("foobar",
      Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
  List<Schema.Field> fields = Lists.newArrayList();
  for (Schema.Field field : Stock.SCHEMA$.getFields()) {
    fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
        field.defaultValue(), field.order()));
  }
  schema.setFields(fields);

  AvroParquetInputFormat.setAvroReadSchema(job, schema);

  job.setMapperClass(Map.class);

  job.setOutputKeyClass(Void.class);
  job.setOutputValueClass(GenericRecord.class);

  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  AvroParquetOutputFormat.setSchema(job, avroSchema);

  job.setNumReduceTasks(0);

  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:55,代码来源:AvroGenericParquetMapReduce.java

示例4: run

import parquet.avro.AvroParquetOutputFormat; //导入方法依赖的package包/类
/**
 * The MapReduce driver - setup and launch the job.
 *
 * @param args the command-line arguments
 * @return the process exit code
 * @throws Exception if something goes wrong
 */
public int run(final String[] args) throws Exception {

  Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
  int result = cli.runCmd();

  if (result != 0) {
    return result;
  }

  Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
  Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));

  Configuration conf = super.getConf();

  Job job = new Job(conf);
  job.setJarByClass(AvroProjectionParquetMapReduce.class);

  job.setInputFormatClass(AvroParquetInputFormat.class);
  AvroParquetInputFormat.setInputPaths(job, inputPath);

  // predicate pushdown
  AvroParquetInputFormat.setUnboundRecordFilter(job, GoogleStockFilter.class);

  // projection pushdown
  Schema projection = Schema.createRecord(Stock.SCHEMA$.getName(),
      Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
  List<Schema.Field> fields = Lists.newArrayList();
  for (Schema.Field field : Stock.SCHEMA$.getFields()) {
    if ("symbol".equals(field.name()) || "open".equals(field.name())) {
      fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
          field.defaultValue(), field.order()));
    }
  }
  projection.setFields(fields);
  AvroParquetInputFormat.setRequestedProjection(job, projection);


  job.setMapperClass(Map.class);
  job.setReducerClass(Reduce.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(DoubleWritable.class);

  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  FileOutputFormat.setOutputPath(job, outputPath);
  AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);

  return job.waitForCompletion(true) ? 0 : 1;
}
 
开发者ID:Hanmourang,项目名称:hiped2,代码行数:57,代码来源:AvroProjectionParquetMapReduce.java

示例5: main

import parquet.avro.AvroParquetOutputFormat; //导入方法依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
  if (args.length == 0) {
    System.out
        .println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
    return;
  }

  String table = args[0];
  String columnFamily = args[1];
  String outputPath = args[2];
  String compressionCodec = args[3];
  String schemaFilePath = args[4];

  String rowKeyColumn = "";
  if (args.length > 5) {
    rowKeyColumn = args[5];
  }

  Job job = Job.getInstance();
  job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
  job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);

  HBaseConfiguration.addHbaseResources(job.getConfiguration());

  job.setJarByClass(ExportHBaseTableToParquet.class);
  job.setJobName("ExportHBaseTableToParquet ");

  Scan scan = new Scan();
  scan.setCaching(500); // 1 is the default in Scan, which will be bad for
                        // MapReduce jobs
  scan.setCacheBlocks(false); // don't set to true for MR jobs
  scan.addFamily(Bytes.toBytes(columnFamily));

  TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
      scan, // Scan instance to control CF and attribute selection
      MyMapper.class, // mapper
      null, // mapper output key
      null, // mapper output value
      job);
  job.setOutputFormatClass(AvroParquetOutputFormat.class);
  AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));

  Schema.Parser parser = new Schema.Parser();

  FileSystem fs = FileSystem.get(job.getConfiguration());
  AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));

  if (compressionCodec.equals("snappy")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
  } else if (compressionCodec.equals("gzip")) {
    AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
  } else {
    // nothing
  }

  job.setNumReduceTasks(0);

  boolean b = job.waitForCompletion(true);
}
 
开发者ID:tmalaska,项目名称:HBase-ToHDFS,代码行数:60,代码来源:ExportHBaseTableToParquet.java


注:本文中的parquet.avro.AvroParquetOutputFormat.setSchema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。