本文整理汇总了Java中parquet.avro.AvroParquetOutputFormat类的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetOutputFormat类的具体用法?Java AvroParquetOutputFormat怎么用?Java AvroParquetOutputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
AvroParquetOutputFormat类属于parquet.avro包,在下文中一共展示了AvroParquetOutputFormat类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeAvro
import parquet.avro.AvroParquetOutputFormat; //导入依赖的package包/类
public static void writeAvro(DataSet<Tuple2<Void, Person>> data, String outputPath) throws IOException {
// Set up the Hadoop Input Format
Job job = Job.getInstance();
// Set up Hadoop Output Format
HadoopOutputFormat hadoopOutputFormat = new HadoopOutputFormat(new AvroParquetOutputFormat(), job);
FileOutputFormat.setOutputPath(job, new Path(outputPath));
AvroParquetOutputFormat.setSchema(job, Person.getClassSchema());
ParquetOutputFormat.setCompression(job, CompressionCodecName.SNAPPY);
ParquetOutputFormat.setEnableDictionary(job, true);
// Output & Execute
data.output(hadoopOutputFormat);
}
示例2: run
import parquet.avro.AvroParquetOutputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);
return job.waitForCompletion(true) ? 0 : 1;
}
示例3: run
import parquet.avro.AvroParquetOutputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroGenericParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
// force Avro to supply us GenericRecord objects in the mapper by mutating the
// schema and changing the class name
//
Schema schema = Schema.createRecord("foobar",
Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
List<Schema.Field> fields = Lists.newArrayList();
for (Schema.Field field : Stock.SCHEMA$.getFields()) {
fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
field.defaultValue(), field.order()));
}
schema.setFields(fields);
AvroParquetInputFormat.setAvroReadSchema(job, schema);
job.setMapperClass(Map.class);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(GenericRecord.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, avroSchema);
job.setNumReduceTasks(0);
return job.waitForCompletion(true) ? 0 : 1;
}
示例4: run
import parquet.avro.AvroParquetOutputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroProjectionParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
// predicate pushdown
AvroParquetInputFormat.setUnboundRecordFilter(job, GoogleStockFilter.class);
// projection pushdown
Schema projection = Schema.createRecord(Stock.SCHEMA$.getName(),
Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
List<Schema.Field> fields = Lists.newArrayList();
for (Schema.Field field : Stock.SCHEMA$.getFields()) {
if ("symbol".equals(field.name()) || "open".equals(field.name())) {
fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
field.defaultValue(), field.order()));
}
}
projection.setFields(fields);
AvroParquetInputFormat.setRequestedProjection(job, projection);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);
return job.waitForCompletion(true) ? 0 : 1;
}
示例5: main
import parquet.avro.AvroParquetOutputFormat; //导入依赖的package包/类
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
if (args.length == 0) {
System.out
.println("ExportHBaseTableToParquet {tableName} {ColumnFamily} {outputPath} {compressionCodec snappy,gzip} {schemaLocationOnHdfs} {rowkey.column.optional");
return;
}
String table = args[0];
String columnFamily = args[1];
String outputPath = args[2];
String compressionCodec = args[3];
String schemaFilePath = args[4];
String rowKeyColumn = "";
if (args.length > 5) {
rowKeyColumn = args[5];
}
Job job = Job.getInstance();
job.getConfiguration().set(ROW_KEY_COLUMN_CONF, rowKeyColumn);
job.getConfiguration().set(SCHEMA_FILE_LOCATION_CONF, schemaFilePath);
HBaseConfiguration.addHbaseResources(job.getConfiguration());
job.setJarByClass(ExportHBaseTableToParquet.class);
job.setJobName("ExportHBaseTableToParquet ");
Scan scan = new Scan();
scan.setCaching(500); // 1 is the default in Scan, which will be bad for
// MapReduce jobs
scan.setCacheBlocks(false); // don't set to true for MR jobs
scan.addFamily(Bytes.toBytes(columnFamily));
TableMapReduceUtil.initTableMapperJob(table, // input HBase table name
scan, // Scan instance to control CF and attribute selection
MyMapper.class, // mapper
null, // mapper output key
null, // mapper output value
job);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
AvroParquetOutputFormat.setOutputPath(job, new Path(outputPath));
Schema.Parser parser = new Schema.Parser();
FileSystem fs = FileSystem.get(job.getConfiguration());
AvroParquetOutputFormat.setSchema(job, parser.parse(fs.open(new Path(schemaFilePath))));
if (compressionCodec.equals("snappy")) {
AvroParquetOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);
} else if (compressionCodec.equals("gzip")) {
AvroParquetOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
} else {
// nothing
}
job.setNumReduceTasks(0);
boolean b = job.waitForCompletion(true);
}