本文整理汇总了Java中org.apache.avro.file.DataFileWriter.setCodec方法的典型用法代码示例。如果您正苦于以下问题:Java DataFileWriter.setCodec方法的具体用法?Java DataFileWriter.setCodec怎么用?Java DataFileWriter.setCodec使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.file.DataFileWriter
的用法示例。
在下文中一共展示了DataFileWriter.setCodec方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: writeToStream
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
/**
* Writes the given {@link DataEvent}s from the eventStream into the
* {@link OutputStream} using avro's object container format (see
* {@link DataFileWriter}). Please note: As this method is creating the
* {@link OutputStream} via the {@link Supplier}, the {@link OutputStream}
* is as well closed by this method.
*
*
* @param eventStream
* a stream of valid and normalized {@link DataEvent}.
* @param out
* a {@link Supplier} of an output stream
* @throws IOException
*/
public static void writeToStream(Stream<DataEvent> eventStream, Supplier<OutputStream> outSupplier) throws IOException {
final OutputStream out = outSupplier.get();
@Cleanup
final DataFileWriter<DataEvent> writer = new DataFileWriter<>(DataEventSerializer.getWRITER());
writer.setSyncInterval(1024 * 1024);
writer.setCodec(CodecFactory.deflateCodec(9));
writer.setMeta("created_at", new Date().getTime());
writer.create(DataEvent.SCHEMA$, out);
eventStream.forEach(event -> {
try {
writer.append(event);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
示例2: close
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
@Override
public void close(TaskAttemptContext context) throws IOException {
// Create an Avro container file and a writer to it.
DataFileWriter<K> avroFileWriter;
avroFileWriter = new DataFileWriter<K>(new ReflectDatumWriter<K>(writerSchema));
avroFileWriter.setCodec(compressionCodec);
// Writes the meta-data.
avroFileWriter.setMeta(Constants.AVRO_NUMBER_OF_RECORDS, this.numberOfRecords);
// Writes the file.
avroFileWriter.create(this.writerSchema, this.outputStream);
for (AvroKey<K> record : this.recordsList)
avroFileWriter.append(record.datum());
// Close the stream.
avroFileWriter.close();
}
示例3: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
public static void writeToAvro(File inputFile, OutputStream outputStream)
throws IOException {
DataFileWriter<Stock> writer =
new DataFileWriter<Stock>(
new SpecificDatumWriter<Stock>());
writer.setCodec(CodecFactory.snappyCodec());
writer.create(Stock.SCHEMA$, outputStream);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
writer.append(stock);
}
IOUtils.closeStream(writer);
IOUtils.closeStream(outputStream);
}
示例4: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
public static void writeToAvro(File inputFile, OutputStream outputStream)
throws IOException {
DataFileWriter<GenericRecord> writer =
new DataFileWriter<GenericRecord>(
new GenericDatumWriter<GenericRecord>());
writer.setCodec(CodecFactory.snappyCodec());
writer.create(SCHEMA, outputStream);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
AvroKeyValue<CharSequence, Stock> record
= new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA));
record.setKey(stock.getSymbol());
record.setValue(stock);
writer.append(record.get());
}
IOUtils.closeStream(writer);
IOUtils.closeStream(outputStream);
}
示例5: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
public static void writeToAvro(File srcPath,
OutputStream outputStream)
throws IOException {
DataFileWriter<Object> writer =
new DataFileWriter<Object>(
new GenericDatumWriter<Object>())
.setSyncInterval(100); //<co id="ch02_smallfilewrite_comment2"/>
writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch02_smallfilewrite_comment3"/>
writer.create(SCHEMA, outputStream); //<co id="ch02_smallfilewrite_comment4"/>
for (Object obj : FileUtils.listFiles(srcPath, null, false)) {
File file = (File) obj;
String filename = file.getAbsolutePath();
byte content[] = FileUtils.readFileToByteArray(file);
GenericRecord record = new GenericData.Record(SCHEMA); //<co id="ch02_smallfilewrite_comment5"/>
record.put(FIELD_FILENAME, filename); //<co id="ch02_smallfilewrite_comment6"/>
record.put(FIELD_CONTENTS, ByteBuffer.wrap(content)); //<co id="ch02_smallfilewrite_comment7"/>
writer.append(record); //<co id="ch02_smallfilewrite_comment8"/>
System.out.println(
file.getAbsolutePath()
+ ": "
+ DigestUtils.md5Hex(content));
}
IOUtils.cleanup(null, writer);
IOUtils.cleanup(null, outputStream);
}
示例6: configureDataFileWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
static void configureDataFileWriter(DataFileWriter<GenericData.Record> writer,
JobConf job) throws UnsupportedEncodingException {
if (FileOutputFormat.getCompressOutput(job)) {
int level = job.getInt(DEFLATE_LEVEL_KEY,
DEFAULT_DEFLATE_LEVEL);
String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC)
? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
writer.setSyncInterval(job.getInt(SYNC_INTERVAL_KEY,
DEFAULT_SYNC_INTERVAL));
// copy metadata from job
for (Map.Entry<String,String> e : job) {
if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),
e.getValue());
if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
URLDecoder.decode(e.getValue(), "ISO-8859-1")
.getBytes("ISO-8859-1"));
}
}
示例7: getRecordWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
@Override
public RecordWriter<NullWritable, Object> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
if (schema == null)
throw new IOException("Must provide a schema");
Configuration conf = context.getConfiguration();
DataFileWriter<Object> writer = new DataFileWriter<Object>(new PigAvroDatumWriter(schema));
if (FileOutputFormat.getCompressOutput(context)) {
int level = conf.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = conf.get(OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC)
? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
Path path = getDefaultWorkFile(context, EXT);
writer.create(schema, path.getFileSystem(conf).create(path));
return new PigAvroRecordWriter(writer);
}
示例8: configureDataFileWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
static <K> void configureDataFileWriter(DataFileWriter<K> writer,
JobConf job) throws UnsupportedEncodingException {
if (FileOutputFormat.getCompressOutput(job)) {
int level = job.getInt(org.apache.avro.mapred.AvroOutputFormat.DEFLATE_LEVEL_KEY,
org.apache.avro.mapred.AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
String codecName = job.get(AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory = codecName.equals(DEFLATE_CODEC) ?
CodecFactory.deflateCodec(level) : CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
writer.setSyncInterval(job.getInt(org.apache.avro.mapred.AvroOutputFormat.SYNC_INTERVAL_KEY,
DEFAULT_SYNC_INTERVAL));
// copy metadata from job
for (Map.Entry<String,String> e : job) {
if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()),e.getValue());
if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
URLDecoder.decode(e.getValue(), "ISO-8859-1")
.getBytes("ISO-8859-1"));
}
}
示例9: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
@SuppressWarnings("resource")
public static void writeToAvro(File inputFile, OutputStream outputStream)
throws IOException {
DataFileWriter<Stock> writer = //a writer to write Avro
new DataFileWriter<Stock>(new SpecificDatumWriter<Stock>()).setSyncInterval(100);
writer.setCodec(CodecFactory.snappyCodec());
writer.create(Stock.SCHEMA$, outputStream); //identify the schema
for(String line: FileUtils.readLines(inputFile)) {
writer.append(createStock(line)); //write to Avro file
}
IOUtils.closeStream(writer);
IOUtils.closeStream(outputStream);
}
示例10: configureDataFileWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
static <T> void configureDataFileWriter(DataFileWriter<T> writer,
TaskAttemptContext context) throws UnsupportedEncodingException {
if (FileOutputFormat.getCompressOutput(context)) {
int level = context.getConfiguration()
.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
String codecName = context.getConfiguration()
.get(org.apache.avro.mapred.AvroJob.OUTPUT_CODEC, DEFLATE_CODEC);
CodecFactory factory =
codecName.equals(DEFLATE_CODEC) ? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
writer.setCodec(factory);
}
writer.setSyncInterval(context.getConfiguration()
.getInt(SYNC_INTERVAL_KEY, DEFAULT_SYNC_INTERVAL));
// copy metadata from job
for (Map.Entry<String, String> e : context.getConfiguration()) {
if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.TEXT_PREFIX)) {
writer.setMeta(e.getKey()
.substring(org.apache.avro.mapred.AvroJob.TEXT_PREFIX.length()),
e.getValue());
}
if (e.getKey().startsWith(org.apache.avro.mapred.AvroJob.BINARY_PREFIX)) {
writer.setMeta(e.getKey()
.substring(org.apache.avro.mapred.AvroJob.BINARY_PREFIX.length()),
URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
}
}
}
示例11: open
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
@Override
public void open(WritableByteChannel channel) throws IOException {
this.schema = new Schema.Parser().parse(getJsonSchema());
DataFileWriter<?> writer;
if (getRecordFormatter() == null) {
writer = reflectWriter = new DataFileWriter<>(new ReflectDatumWriter<ElementT>(schema));
} else {
writer =
genericWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(schema));
}
writer.setCodec(getCodec().getCodec());
for (Map.Entry<String, Object> entry : getMetadata().entrySet()) {
Object v = entry.getValue();
if (v instanceof String) {
writer.setMeta(entry.getKey(), (String) v);
} else if (v instanceof Long) {
writer.setMeta(entry.getKey(), (Long) v);
} else if (v instanceof byte[]) {
writer.setMeta(entry.getKey(), (byte[]) v);
} else {
throw new IllegalStateException(
"Metadata value type must be one of String, Long, or byte[]. Found "
+ v.getClass().getSimpleName());
}
}
writer.create(schema, Channels.newOutputStream(channel));
}
示例12: writeAVROFile
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
/**
* Metodo que genera escribe un fichero AVRO con el contenido de listGenericRecord
*
* @param avroFile File con la referencia al fichero
* @param schema Schema con el esquema AVRO
* @param listGenericRecord List con la lista de objetos a escribir
* @param codec CodecFactory con el codec de compresion a usar
*
*/
public static void writeAVROFile(File avroFile, Schema schema, List<GenericRecord> listGenericRecord, CodecFactory codec) throws IOException {
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.setCodec(codec);
dataFileWriter.create(schema, avroFile);
for (GenericRecord genericRecord : listGenericRecord) {
dataFileWriter.append(genericRecord);
}
dataFileWriter.close();
}
示例13: createDataFileWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
/**
* Create a new {@link DataFileWriter} for writing Avro records.
*
* @param codecFactory a {@link CodecFactory} object for building the compression codec
* @throws IOException if there is something wrong creating a new {@link DataFileWriter}
*/
private DataFileWriter<GenericRecord> createDataFileWriter(CodecFactory codecFactory) throws IOException {
@SuppressWarnings("resource")
DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(this.datumWriter);
writer.setCodec(codecFactory);
// Open the file and return the DataFileWriter
return writer.create(this.schema, this.stagingFileOutputStream);
}
示例14: getRecordWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
@Override
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
Progressable progress) throws IOException {
if (schema == null) {
SchemaLoader loader = new SchemaLoader(job);
this.schema = loader.load(
job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME));
this.converter = new JsonConverter(schema);
this.readKey = job.getBoolean(READ_KEY, true);
}
DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
new GenericDatumWriter<GenericRecord>(schema));
if (getCompressOutput(job)) {
int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC,
org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC)
? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
writer.setCodec(codec);
}
writer.setSyncInterval(job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY,
DataFileConstants.DEFAULT_SYNC_INTERVAL));
Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
writer.create(schema, path.getFileSystem(job).create(path));
return new AvroAsJSONRecordWriter(writer, converter, readKey);
}
示例15: createDataFileWriter
import org.apache.avro.file.DataFileWriter; //导入方法依赖的package包/类
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
IOException
{
Schema schema = dataFileReader.getSchema();
DatumWriter<GenericRecord> datumWriter =
new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> writer =
new DataFileWriter<GenericRecord>(datumWriter);
// Get the codec of the reader
String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
int level = conf.getInt("avro.mapred.deflate.level", 1);
String codecName = conf.get("avro.output.codec", codecStr);
CodecFactory factory =
codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
// Set the codec of the writer
writer.setCodec(factory);
writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
Math.max(conf.getInt("io.file.buffer.size",
16000), 16000)));
writer.create(schema,
new Path(tempFileName).getFileSystem(conf)
.create(new Path(tempFileName)));
return writer;
}