本文整理汇总了Java中org.apache.parquet.avro.AvroParquetWriter类的典型用法代码示例。如果您正苦于以下问题:Java AvroParquetWriter类的具体用法?Java AvroParquetWriter怎么用?Java AvroParquetWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
AvroParquetWriter类属于org.apache.parquet.avro包,在下文中一共展示了AvroParquetWriter类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initWriter
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
private ParquetWriterWrapper initWriter(Path file, int recordLimit, final FileSystem fileSystem,
final Configuration conf, final CompressionCodecName compression, final int blockSize,
final int pageSize) throws IOException {
numRecords.set(0);
String[] columnNames = converterDescriptor.getColumnConverters().stream()
.map(columnConverterDescriptor -> columnConverterDescriptor.getColumnName())
.toArray(size -> new String[size]);
Schema[] columnTypes = converterDescriptor.getColumnConverters().stream()
.map(columnConverterDescriptor -> columnConverterDescriptor.getTypeDescriptor())
.toArray(size -> new Schema[size]);
avroRecord = ParquetUtils.createAvroRecordSchema(getTableName(), columnNames, columnTypes);
// TODO confirm how without fs will it be able to write on hdfs
writer = AvroParquetWriter.<GenericRecord>builder(file).withCompressionCodec(compression)
.withPageSize(pageSize).withConf(conf).withSchema(avroRecord).build();
return this;
}
示例2: generateAvroPrimitiveTypes
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
static File generateAvroPrimitiveTypes(File parentDir, String filename, int nrows, Date date) throws IOException {
File f = new File(parentDir, filename);
Schema schema = new Schema.Parser().parse(Resources.getResource("PrimitiveAvro.avsc").openStream());
AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(new Path(f.getPath()), schema);
try {
DateFormat format = new SimpleDateFormat("yy-MMM-dd:hh.mm.ss.SSS aaa");
for (int i = 0; i < nrows; i++) {
GenericData.Record record = new GenericRecordBuilder(schema)
.set("mynull", null)
.set("myboolean", i % 2 == 0)
.set("myint", 1 + i)
.set("mylong", 2L + i)
.set("myfloat", 3.1f + i)
.set("mydouble", 4.1 + i)
.set("mydate", format.format(new Date(date.getTime() - (i * 1000 * 3600))))
.set("myuuid", UUID.randomUUID())
.set("mystring", "hello world: " + i)
.set("myenum", i % 2 == 0 ? "a" : "b")
.build();
writer.write(record);
}
} finally {
writer.close();
}
return f;
}
示例3: createDataFile
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
private static Path createDataFile() throws IOException {
File parquetFile = File.createTempFile("test-", "." + FILE_EXTENSION);
readerSchema = new Schema.Parser().parse(
ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people.avsc"));
projectionSchema = new Schema.Parser().parse(
ParquetFileReaderTest.class.getResourceAsStream("/file/reader/schemas/people_projection.avsc"));
try (ParquetWriter writer = AvroParquetWriter.<GenericRecord>builder(new Path(parquetFile.toURI()))
.withConf(fs.getConf()).withWriteMode(ParquetFileWriter.Mode.OVERWRITE).withSchema(readerSchema).build()) {
IntStream.range(0, NUM_RECORDS).forEach(index -> {
GenericRecord datum = new GenericData.Record(readerSchema);
datum.put(FIELD_INDEX, index);
datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
try {
OFFSETS_BY_INDEX.put(index, Long.valueOf(index));
writer.write(datum);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
});
}
Path path = new Path(new Path(fsUri), parquetFile.getName());
fs.moveFromLocalFile(new Path(parquetFile.getAbsolutePath()), path);
return path;
}
示例4: getParquetFileStream
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
public ParquetWriter<GenericRecord> getParquetFileStream() throws IOException {
Schema avroSchema = getAvroSchema();
Path file = new Path("/tmp/data/EmployeeData" + fileIndex++ + ".parquet");
// create avro schema
ParquetWriter<GenericRecord> parquetWriter =
AvroParquetWriter.<GenericRecord>builder(file).withSchema(avroSchema).build();
return parquetWriter;
}
示例5: write
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
/**
* 将avro格式的数据写入到parquet文件中
*
* @param parquetPath
*/
public void write(String parquetPath) {
Schema.Parser parser = new Schema.Parser();
try {
Schema schema = parser.parse(AvroParquetOperation.class.getClassLoader().getResourceAsStream("StringPair.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("left", "L");
datum.put("right", "R");
Path path = new Path(parquetPath);
System.out.println(path);
AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(path, schema);
writer.write(datum);
writer.close();
} catch (IOException e) {
e.printStackTrace();
}
}
示例6: call
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
public Job call() throws Exception {
// We're explicitly disabling speculative execution
conf.set("mapreduce.map.speculative", "false");
conf.set("mapreduce.map.maxattempts", "1");
MapreduceUtils.addJarsToJob(conf,
SemanticVersion.class,
ParquetWriter.class,
AvroParquetWriter.class,
FsInput.class,
CompressionCodec.class,
ParquetProperties.class,
BytesInput.class
);
Job job = Job.getInstance(conf);
// IO formats
job.setInputFormatClass(AvroParquetInputFormat.class);
job.setOutputFormatClass(NullOutputFormat.class);
// Mapper & job output
job.setMapperClass(AvroParquetConvertMapper.class);
job.setOutputKeyClass(NullWritable.class);
job.setOutputValueClass(NullWritable.class);
// It's map only job
job.setNumReduceTasks(0);
// General configuration
job.setJarByClass(getClass());
return job;
}
示例7: AvroParquetFileWriter
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
public AvroParquetFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException {
Path path = new Path(logFilePath.getLogFilePath());
LOG.debug("Creating Brand new Writer for path {}", path);
CompressionCodecName codecName = CompressionCodecName
.fromCompressionCodec(codec != null ? codec.getClass() : null);
topic = logFilePath.getTopic();
// Not setting blockSize, pageSize, enableDictionary, and validating
writer = AvroParquetWriter.builder(path)
.withSchema(schemaRegistryClient.getSchema(topic))
.withCompressionCodec(codecName)
.build();
}
示例8: run
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1,
"CSV path is required.");
if (header != null) {
// if a header is given on the command line, don't assume one is in the file
noHeader = true;
}
CSVProperties props = new CSVProperties.Builder()
.delimiter(delimiter)
.escape(escape)
.quote(quote)
.header(header)
.hasHeader(!noHeader)
.linesToSkip(linesToSkip)
.charset(charsetName)
.build();
String source = targets.get(0);
Schema csvSchema;
if (avroSchemaFile != null) {
csvSchema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
Set<String> required = ImmutableSet.of();
if (requiredFields != null) {
required = ImmutableSet.copyOf(requiredFields);
}
String filename = new File(source).getName();
String recordName;
if (filename.contains(".")) {
recordName = filename.substring(0, filename.indexOf("."));
} else {
recordName = filename;
}
csvSchema = AvroCSV.inferNullableSchema(
recordName, open(source), props, required);
}
long count = 0;
try (AvroCSVReader<Record> reader = new AvroCSVReader<>(
open(source), props, csvSchema, Record.class, true)) {
CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
try (ParquetWriter<Record> writer = AvroParquetWriter
.<Record>builder(qualifiedPath(outputPath))
.withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0)
.withWriteMode(overwrite ?
ParquetFileWriter.Mode.OVERWRITE : ParquetFileWriter.Mode.CREATE)
.withCompressionCodec(codec)
.withDictionaryEncoding(true)
.withDictionaryPageSize(dictionaryPageSize)
.withPageSize(pageSize)
.withRowGroupSize(rowGroupSize)
.withDataModel(GenericData.get())
.withConf(getConf())
.withSchema(csvSchema)
.build()) {
for (Record record : reader) {
writer.write(record);
}
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
}
}
return 0;
}
示例9: run
import org.apache.parquet.avro.AvroParquetWriter; //导入依赖的package包/类
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
Preconditions.checkArgument(targets != null && targets.size() == 1,
"A data file is required.");
String source = targets.get(0);
CompressionCodecName codec = Codecs.parquetCodec(compressionCodecName);
Schema schema;
if (avroSchemaFile != null) {
schema = Schemas.fromAvsc(open(avroSchemaFile));
} else {
schema = getAvroSchema(source);
}
Schema projection = filterSchema(schema, columns);
Path outPath = qualifiedPath(outputPath);
FileSystem outFS = outPath.getFileSystem(getConf());
if (overwrite && outFS.exists(outPath)) {
console.debug("Deleting output file {} (already exists)", outPath);
outFS.delete(outPath);
}
Iterable<Record> reader = openDataFile(source, projection);
boolean threw = true;
long count = 0;
try {
try (ParquetWriter<Record> writer = AvroParquetWriter
.<Record>builder(qualifiedPath(outputPath))
.withWriterVersion(v2 ? PARQUET_2_0 : PARQUET_1_0)
.withConf(getConf())
.withCompressionCodec(codec)
.withRowGroupSize(rowGroupSize)
.withDictionaryPageSize(dictionaryPageSize < 64 ? 64 : dictionaryPageSize)
.withDictionaryEncoding(dictionaryPageSize != 0)
.withPageSize(pageSize)
.withDataModel(GenericData.get())
.withSchema(projection)
.build()) {
for (Record record : reader) {
writer.write(record);
count += 1;
}
}
threw = false;
} catch (RuntimeException e) {
throw new RuntimeException("Failed on record " + count, e);
} finally {
if (reader instanceof Closeable) {
Closeables.close((Closeable) reader, threw);
}
}
return 0;
}