本文整理汇总了Java中parquet.avro.AvroParquetInputFormat类的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetInputFormat类的具体用法?Java AvroParquetInputFormat怎么用?Java AvroParquetInputFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
AvroParquetInputFormat类属于parquet.avro包,在下文中一共展示了AvroParquetInputFormat类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import parquet.avro.AvroParquetInputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);
return job.waitForCompletion(true) ? 0 : 1;
}
示例2: run
import parquet.avro.AvroParquetInputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroGenericParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
// force Avro to supply us GenericRecord objects in the mapper by mutating the
// schema and changing the class name
//
Schema schema = Schema.createRecord("foobar",
Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
List<Schema.Field> fields = Lists.newArrayList();
for (Schema.Field field : Stock.SCHEMA$.getFields()) {
fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
field.defaultValue(), field.order()));
}
schema.setFields(fields);
AvroParquetInputFormat.setAvroReadSchema(job, schema);
job.setMapperClass(Map.class);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(GenericRecord.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, avroSchema);
job.setNumReduceTasks(0);
return job.waitForCompletion(true) ? 0 : 1;
}
示例3: run
import parquet.avro.AvroParquetInputFormat; //导入依赖的package包/类
/**
* The MapReduce driver - setup and launch the job.
*
* @param args the command-line arguments
* @return the process exit code
* @throws Exception if something goes wrong
*/
public int run(final String[] args) throws Exception {
Cli cli = Cli.builder().setArgs(args).addOptions(CliCommonOpts.MrIoOpts.values()).build();
int result = cli.runCmd();
if (result != 0) {
return result;
}
Path inputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.INPUT));
Path outputPath = new Path(cli.getArgValueAsString(CliCommonOpts.MrIoOpts.OUTPUT));
Configuration conf = super.getConf();
Job job = new Job(conf);
job.setJarByClass(AvroProjectionParquetMapReduce.class);
job.setInputFormatClass(AvroParquetInputFormat.class);
AvroParquetInputFormat.setInputPaths(job, inputPath);
// predicate pushdown
AvroParquetInputFormat.setUnboundRecordFilter(job, GoogleStockFilter.class);
// projection pushdown
Schema projection = Schema.createRecord(Stock.SCHEMA$.getName(),
Stock.SCHEMA$.getDoc(), Stock.SCHEMA$.getNamespace(), false);
List<Schema.Field> fields = Lists.newArrayList();
for (Schema.Field field : Stock.SCHEMA$.getFields()) {
if ("symbol".equals(field.name()) || "open".equals(field.name())) {
fields.add(new Schema.Field(field.name(), field.schema(), field.doc(),
field.defaultValue(), field.order()));
}
}
projection.setFields(fields);
AvroParquetInputFormat.setRequestedProjection(job, projection);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DoubleWritable.class);
job.setOutputFormatClass(AvroParquetOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
AvroParquetOutputFormat.setSchema(job, StockAvg.SCHEMA$);
return job.waitForCompletion(true) ? 0 : 1;
}
示例4: readAvro
import parquet.avro.AvroParquetInputFormat; //导入依赖的package包/类
public static DataSet<Tuple2<Void, Person>> readAvro(ExecutionEnvironment env, String inputPath) throws
IOException {
Job job = Job.getInstance();
HadoopInputFormat hadoopInputFormat = new HadoopInputFormat(new AvroParquetInputFormat(), Void.class, Person
.class, job);
FileInputFormat.addInputPath(job, new Path(inputPath));
// schema projection: don't read type of phonenumber
Schema phone = Schema.createRecord("PhoneNumber", null, null, false);
phone.setFields(Arrays.asList(
new Schema.Field("number", Schema.create(Schema.Type.BYTES), null, null)));
Schema array = Schema.createArray(phone);
Schema union = Schema.createUnion(Lists.newArrayList(Schema.create(Schema.Type.BYTES), Schema.create(Schema
.Type
.NULL)));
Schema projection = Schema.createRecord("Person", null, null, false);
projection.setFields(
Arrays.asList(
new Schema.Field("name", Schema.create(Schema.Type.BYTES), null, null),
new Schema.Field("id", Schema.create(Schema.Type.INT), null, null),
new Schema.Field("email", union, null, null),
new Schema.Field("phone", array, null, null)
)
);
AvroParquetInputFormat.setRequestedProjection(job, projection);
// push down predicates: get all persons with name = "Felix"
BinaryColumn name = binaryColumn("name");
FilterPredicate namePred = eq(name, Binary.fromString("Felix"));
ParquetInputFormat.setFilterPredicate(job.getConfiguration(), namePred);
DataSet<Tuple2<Void, Person>> data = env.createInput(hadoopInputFormat);
return data;
}