本文整理汇总了Java中org.apache.avro.file.DataFileWriter类的典型用法代码示例。如果您正苦于以下问题:Java DataFileWriter类的具体用法?Java DataFileWriter怎么用?Java DataFileWriter使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DataFileWriter类属于org.apache.avro.file包,在下文中一共展示了DataFileWriter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createAvroFile
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
public File createAvroFile(String fileName, long recordCount, File parent) throws Exception {
final File target = FileTestUtil.file(testClass, fileName, parent);
try (DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>(schema))) {
if (codecFactory != null) {
writer.setCodec(codecFactory);
}
writer.create(schema, target);
for (long i = 0; i < recordCount; i++) {
writer.append(recordCreatorFn.apply(schema, i));
}
}
return target;
}
示例2: createDataFile
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
private static Path createDataFile() throws IOException {
File avroFile = File.createTempFile("test-", "." + FILE_EXTENSION);
DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(schema);
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(writer)) {
dataFileWriter.setFlushOnEveryBlock(true);
dataFileWriter.setSyncInterval(32);
dataFileWriter.create(schema, avroFile);
IntStream.range(0, NUM_RECORDS).forEach(index -> {
GenericRecord datum = new GenericData.Record(schema);
datum.put(FIELD_INDEX, index);
datum.put(FIELD_NAME, String.format("%d_name_%s", index, UUID.randomUUID()));
datum.put(FIELD_SURNAME, String.format("%d_surname_%s", index, UUID.randomUUID()));
try {
OFFSETS_BY_INDEX.put(index, dataFileWriter.sync() - 16L);
dataFileWriter.append(datum);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
});
}
Path path = new Path(new Path(fsUri), avroFile.getName());
fs.moveFromLocalFile(new Path(avroFile.getAbsolutePath()), path);
return path;
}
示例3: configure
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
@Override
public void configure(Context context) {
int syncIntervalBytes =
context.getInteger(SYNC_INTERVAL_BYTES, DEFAULT_SYNC_INTERVAL_BYTES);
String compressionCodec =
context.getString(COMPRESSION_CODEC, DEFAULT_COMPRESSION_CODEC);
writer = new ReflectDatumWriter<T>(getSchema());
dataFileWriter = new DataFileWriter<T>(writer);
dataFileWriter.setSyncInterval(syncIntervalBytes);
try {
CodecFactory codecFactory = CodecFactory.fromString(compressionCodec);
dataFileWriter.setCodec(codecFactory);
} catch (AvroRuntimeException e) {
logger.warn("Unable to instantiate avro codec with name (" +
compressionCodec + "). Compression disabled. Exception follows.", e);
}
}
示例4: configure
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
@Override
public void configure(Context context) {
String consumerKey = context.getString("consumerKey");
String consumerSecret = context.getString("consumerSecret");
String accessToken = context.getString("accessToken");
String accessTokenSecret = context.getString("accessTokenSecret");
twitterStream = new TwitterStreamFactory().getInstance();
twitterStream.setOAuthConsumer(consumerKey, consumerSecret);
twitterStream.setOAuthAccessToken(new AccessToken(accessToken,
accessTokenSecret));
twitterStream.addListener(this);
avroSchema = createAvroSchema();
dataFileWriter = new DataFileWriter<GenericRecord>(
new GenericDatumWriter<GenericRecord>(avroSchema));
maxBatchSize = context.getInteger("maxBatchSize", maxBatchSize);
maxBatchDurationMillis = context.getInteger("maxBatchDurationMillis",
maxBatchDurationMillis);
}
示例5: writeToStream
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
/**
* Writes the given {@link DataEvent}s from the eventStream into the
* {@link OutputStream} using avro's object container format (see
* {@link DataFileWriter}). Please note: As this method is creating the
* {@link OutputStream} via the {@link Supplier}, the {@link OutputStream}
* is as well closed by this method.
*
*
* @param eventStream
* a stream of valid and normalized {@link DataEvent}.
* @param out
* a {@link Supplier} of an output stream
* @throws IOException
*/
public static void writeToStream(Stream<DataEvent> eventStream, Supplier<OutputStream> outSupplier) throws IOException {
final OutputStream out = outSupplier.get();
@Cleanup
final DataFileWriter<DataEvent> writer = new DataFileWriter<>(DataEventSerializer.getWRITER());
writer.setSyncInterval(1024 * 1024);
writer.setCodec(CodecFactory.deflateCodec(9));
writer.setMeta("created_at", new Date().getTime());
writer.create(DataEvent.SCHEMA$, out);
eventStream.forEach(event -> {
try {
writer.append(event);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
}
示例6: MemberInfoDynSer
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
/**
* 动态序列化:通过动态解析Schema文件进行内容设置,并序列化内容
*
* @throws IOException
*/
public void MemberInfoDynSer() throws IOException {
// 1.解析schema文件内容
Parser parser = new Parser();
Schema mSchema = parser.parse(this.getClass().getResourceAsStream("/Members.avsc"));
// 2.构建数据写对象
DatumWriter<GenericRecord> mGr = new SpecificDatumWriter<GenericRecord>(mSchema);
DataFileWriter<GenericRecord> mDfw = new DataFileWriter<GenericRecord>(mGr);
// 3.创建序列化文件
mDfw.create(mSchema, new File("/Users/a/Desktop/tmp/members.avro"));
// 4.添加序列化数据
for (int i = 0; i < 20; i++) {
GenericRecord gr = new GenericData.Record(mSchema);
int r = i * new Random().nextInt(50);
gr.put("userName", "light-" + r);
gr.put("userPwd", "2016-" + r);
gr.put("realName", "滔滔" + r + "号");
mDfw.append(gr);
}
// 5.关闭数据文件写对象
mDfw.close();
System.out.println("Dyn Builder Ser Start Complete.");
}
示例7: close
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
@Override
public void close(TaskAttemptContext context) throws IOException {
// Create an Avro container file and a writer to it.
DataFileWriter<K> avroFileWriter;
avroFileWriter = new DataFileWriter<K>(new ReflectDatumWriter<K>(writerSchema));
avroFileWriter.setCodec(compressionCodec);
// Writes the meta-data.
avroFileWriter.setMeta(Constants.AVRO_NUMBER_OF_RECORDS, this.numberOfRecords);
// Writes the file.
avroFileWriter.create(this.writerSchema, this.outputStream);
for (AvroKey<K> record : this.recordsList)
avroFileWriter.append(record.datum());
// Close the stream.
avroFileWriter.close();
}
示例8: putRecords
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
public static byte[] putRecords(Collection<SinkRecord> records, AvroData avroData) throws IOException {
final DataFileWriter<Object> writer = new DataFileWriter<>(new GenericDatumWriter<>());
ByteArrayOutputStream out = new ByteArrayOutputStream();
Schema schema = null;
for (SinkRecord record : records) {
if (schema == null) {
schema = record.valueSchema();
org.apache.avro.Schema avroSchema = avroData.fromConnectSchema(schema);
writer.create(avroSchema, out);
}
Object value = avroData.fromConnectData(schema, record.value());
// AvroData wraps primitive types so their schema can be included. We need to unwrap
// NonRecordContainers to just their value to properly handle these types
if (value instanceof NonRecordContainer) {
value = ((NonRecordContainer) value).getValue();
}
writer.append(value);
}
writer.flush();
return out.toByteArray();
}
示例9: writeRowsHelper
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
private void writeRowsHelper(List<TableRow> rows, Schema avroSchema,
String destinationPattern, int shard) throws IOException {
String filename = destinationPattern.replace("*", String.format("%012d", shard));
try (WritableByteChannel channel = FileSystems.create(
FileSystems.matchNewResource(filename, false /* isDirectory */), MimeTypes.BINARY);
DataFileWriter<GenericRecord> tableRowWriter =
new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema))
.create(avroSchema, Channels.newOutputStream(channel))) {
for (Map<String, Object> record : rows) {
GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
for (Map.Entry<String, Object> field : record.entrySet()) {
genericRecordBuilder.set(field.getKey(), field.getValue());
}
tableRowWriter.append(genericRecordBuilder.build());
}
} catch (IOException e) {
throw new IllegalStateException(
String.format("Could not create destination for extract job %s", filename), e);
}
}
示例10: AvroKeyValueWriter
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
AvroKeyValueWriter(Schema keySchema, Schema valueSchema,
CodecFactory compressionCodec, OutputStream outputStream,
int syncInterval) throws IOException {
// Create the generic record schema for the key/value pair.
mKeyValuePairSchema = AvroKeyValue
.getSchema(keySchema, valueSchema);
// Create an Avro container file and a writer to it.
DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>(
mKeyValuePairSchema);
mAvroFileWriter = new DataFileWriter<GenericRecord>(
genericDatumWriter);
mAvroFileWriter.setCodec(compressionCodec);
mAvroFileWriter.setSyncInterval(syncInterval);
mAvroFileWriter.create(mKeyValuePairSchema, outputStream);
// Create a reusable output record.
mOutputRecord = new AvroKeyValue<Object, Object>(
new GenericData.Record(mKeyValuePairSchema));
}
示例11: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
public static void writeToAvro(File inputFile, OutputStream outputStream)
throws IOException {
DataFileWriter<Stock> writer =
new DataFileWriter<Stock>(
new SpecificDatumWriter<Stock>());
writer.setCodec(CodecFactory.snappyCodec());
writer.create(Stock.SCHEMA$, outputStream);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
writer.append(stock);
}
IOUtils.closeStream(writer);
IOUtils.closeStream(outputStream);
}
示例12: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
public static void writeToAvro(File inputFile, OutputStream outputStream)
throws IOException {
DataFileWriter<GenericRecord> writer =
new DataFileWriter<GenericRecord>(
new GenericDatumWriter<GenericRecord>());
writer.setCodec(CodecFactory.snappyCodec());
writer.create(SCHEMA, outputStream);
for (Stock stock : AvroStockUtils.fromCsvFile(inputFile)) {
AvroKeyValue<CharSequence, Stock> record
= new AvroKeyValue<CharSequence, Stock>(new GenericData.Record(SCHEMA));
record.setKey(stock.getSymbol());
record.setValue(stock);
writer.append(record.get());
}
IOUtils.closeStream(writer);
IOUtils.closeStream(outputStream);
}
示例13: writeToAvro
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
public static void writeToAvro(File srcPath,
OutputStream outputStream)
throws IOException {
DataFileWriter<Object> writer =
new DataFileWriter<Object>(
new GenericDatumWriter<Object>())
.setSyncInterval(100); //<co id="ch02_smallfilewrite_comment2"/>
writer.setCodec(CodecFactory.snappyCodec()); //<co id="ch02_smallfilewrite_comment3"/>
writer.create(SCHEMA, outputStream); //<co id="ch02_smallfilewrite_comment4"/>
for (Object obj : FileUtils.listFiles(srcPath, null, false)) {
File file = (File) obj;
String filename = file.getAbsolutePath();
byte content[] = FileUtils.readFileToByteArray(file);
GenericRecord record = new GenericData.Record(SCHEMA); //<co id="ch02_smallfilewrite_comment5"/>
record.put(FIELD_FILENAME, filename); //<co id="ch02_smallfilewrite_comment6"/>
record.put(FIELD_CONTENTS, ByteBuffer.wrap(content)); //<co id="ch02_smallfilewrite_comment7"/>
writer.append(record); //<co id="ch02_smallfilewrite_comment8"/>
System.out.println(
file.getAbsolutePath()
+ ": "
+ DigestUtils.md5Hex(content));
}
IOUtils.cleanup(null, writer);
IOUtils.cleanup(null, outputStream);
}
示例14: serializing
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
/**
* Serialize our Users to disk.
*/
private void serializing(List<User> listUsers) {
long tiempoInicio = System.currentTimeMillis();
// We create a DatumWriter, which converts Java objects into an in-memory serialized format.
// The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type.
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
// We create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call.
DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
try {
File file = createFile();
dataFileWriter.create(((User) listUsers.get(0)).getSchema(), file);
for (User user : listUsers) {
// We write our users to the file via calls to the dataFileWriter.append method.
dataFileWriter.append(user);
}
// When we are done writing, we close the data file.
dataFileWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
terminaProceso("serializing", tiempoInicio);
}
示例15: serialise
import org.apache.avro.file.DataFileWriter; //导入依赖的package包/类
@Override
public byte[] serialise(final Object object) throws SerialisationException {
Schema schema = ReflectData.get().getSchema(object.getClass());
DatumWriter<Object> datumWriter = new ReflectDatumWriter<>(schema);
DataFileWriter<Object> dataFileWriter = new DataFileWriter<>(datumWriter);
ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
try {
dataFileWriter.create(schema, byteOut);
dataFileWriter.append(object);
dataFileWriter.flush();
} catch (final IOException e) {
throw new SerialisationException("Unable to serialise given object of class: " + object.getClass().getName(), e);
} finally {
close(dataFileWriter);
}
return byteOut.toByteArray();
}