本文整理匯總了Java中org.apache.avro.file.DataFileStream.close方法的典型用法代碼示例。如果您正苦於以下問題:Java DataFileStream.close方法的具體用法?Java DataFileStream.close怎麽用?Java DataFileStream.close使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.avro.file.DataFileStream
的用法示例。
在下文中一共展示了DataFileStream.close方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: testCompressFile
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void testCompressFile() throws Exception {
String avroCodec = "snappy";
localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec);
setUp();
task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME);
List<SinkRecord> sinkRecords = createRecords(7);
// Perform write
task.put(sinkRecords);
task.close(context.assignment());
task.stop();
List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3);
for(S3ObjectSummary summary: summaries){
InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent();
DatumReader<Object> reader = new GenericDatumReader<>();
DataFileStream<Object> streamReader = new DataFileStream<>(in, reader);
// make sure that produced Avro file has proper codec set
Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG));
streamReader.close();
}
long[] validOffsets = {0, 3, 6};
verify(sinkRecords, validOffsets);
}
示例2: getTimeColumnStatsCollector
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private LongColumnPreIndexStatsCollector getTimeColumnStatsCollector(Schema schema, File localAvroFile)
throws FileNotFoundException, IOException {
String timeColumnName = schema.getTimeColumnName();
FieldSpec spec = schema.getTimeFieldSpec();
LOGGER.info("Spec for " + timeColumnName + " is " + spec);
LongColumnPreIndexStatsCollector timeColumnStatisticsCollector = new LongColumnPreIndexStatsCollector(spec.getName(), new StatsCollectorConfig(schema, null));
LOGGER.info("StatsCollector :" + timeColumnStatisticsCollector);
DataFileStream<GenericRecord> dataStream =
new DataFileStream<GenericRecord>(new FileInputStream(localAvroFile), new GenericDatumReader<GenericRecord>());
while (dataStream.hasNext()) {
GenericRecord next = dataStream.next();
timeColumnStatisticsCollector.collect(next.get(timeColumnName));
}
dataStream.close();
timeColumnStatisticsCollector.seal();
return timeColumnStatisticsCollector;
}
示例3: loadMemberCount
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private Long loadMemberCount(Path path, String timestamp) throws IOException
{
FileSystem fs = getFileSystem();
Assert.assertTrue(fs.exists(new Path(path, timestamp)));
for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
GenericRecord r = dataFileStream.next();
Long count = (Long)((GenericRecord)r.get("value")).get("count");
Assert.assertNotNull(count);
System.out.println("found count: " + count);
return count;
}
finally
{
dataFileStream.close();
}
}
throw new RuntimeException("found no data");
}
示例4: getSchema
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* This method is called by {@link #getAvroSchema}. The default implementation
* returns the schema of an avro file; or the schema of the last file in a first-level
* directory (it does not contain sub-directories).
*
* @param path path of a file or first level directory
* @param fs file system
* @return avro schema
* @throws IOException
*/
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
if (lastFile == null) {
return null;
}
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
InputStream hdfsInputStream = fs.open(lastFile);
DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
Schema ret = avroDataStream.getSchema();
avroDataStream.close();
return ret;
}
示例5: deserialize
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public List<GenericRecord> deserialize(byte[] avroMessage) throws IOException {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
ByteArrayInputStream is = new ByteArrayInputStream(avroMessage);
DataFileStream<GenericRecord> dataFileReader;
dataFileReader = new DataFileStream<>(is, datumReader);
GenericRecord record = null;
List<GenericRecord> readRecords = new ArrayList<>();
while (dataFileReader.hasNext()) {
readRecords.add(dataFileReader.next(record));
}
dataFileReader.close();
return readRecords;
}
示例6: verifyOutputAvroFiles
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix,
List<String> bodies) throws IOException {
int found = 0;
int expected = bodies.size();
for (String outputFile : getAllFiles(dir)) {
String name = (new File(outputFile)).getName();
if (name.startsWith(prefix)) {
FSDataInputStream input = fs.open(new Path(outputFile));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(input, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
ByteBuffer body = (ByteBuffer) record.get("body");
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode(body).toString();
LOG.debug("Removing event: {}", bodyStr);
bodies.remove(bodyStr);
found++;
}
avroStream.close();
input.close();
}
}
Assert.assertTrue("Found = " + found + ", Expected = " +
expected + ", Left = " + bodies.size() + " " + bodies,
bodies.size() == 0);
}
示例7: testGzipDurabilityWithSerializer
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void testGzipDurabilityWithSerializer() throws Exception {
Context context = new Context();
context.put("serializer", "AVRO_EVENT");
HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
writer.configure(context);
writer.open(fileURI, factory.getCodec(new Path(fileURI)),
SequenceFile.CompressionType.BLOCK);
String[] bodies = { "yarf!", "yarfing!" };
writeBodies(writer, bodies);
int found = 0;
int expected = bodies.length;
List<String> expectedBodies = Lists.newArrayList(bodies);
GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(cmpIn, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode((ByteBuffer) record.get("body"))
.toString();
expectedBodies.remove(bodyStr);
found++;
}
avroStream.close();
cmpIn.close();
Assert.assertTrue("Found = " + found + ", Expected = " + expected
+ ", Left = " + expectedBodies.size() + " " + expectedBodies,
expectedBodies.size() == 0);
}
示例8: deserialize
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public Employee deserialize(InputStream in) throws IOException
{
GenericDatumReader datum = new GenericDatumReader(schema);
DataFileStream<GenericData.Record> reader = new DataFileStream<GenericData.Record>(in, datum);
Employee employee = null;
try
{
GenericData.Record record = new GenericData.Record(schema);
if(reader.hasNext())
{
employee = new Employee();
reader.next(record);
employee.setName(AvroUtils.get("name", record, "").toString());
employee.setAge(Integer.parseInt(AvroUtils.get("yrs", record, "0").toString()));
employee.setMails(AvroUtils.getArray(record, "emails", Object::toString));
}
return employee;
}
finally
{
reader.close();
}
}
示例9: processAvroMessage
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Processes a blob which contains one or more Avro messages and has the
* schema embedded. This is the pattern when Avro objects are read from a
* file (either local file system or HDFS). Every Avro object in the blob is
* converted to JSON and then submitted to the output port.
*
* @param avroMessage
* The Blob that holds one or more Avro objects and the schema
* @param outStream
* The stream to which the JSON string must be submitted
* @param outTuple
* The tuple holding the JSON string
* @throws Exception
*/
private void processAvroMessage(Blob avroMessage, StreamingOutput<OutputTuple> outStream, OutputTuple outTuple)
throws Exception {
ByteArrayInputStream is = new ByteArrayInputStream(avroMessage.getData());
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileReader = new DataFileStream<GenericRecord>(is, reader);
GenericRecord consumedDatum = null;
while (dataFileReader.hasNext()) {
consumedDatum = dataFileReader.next(consumedDatum);
if (LOGGER.isTraceEnabled())
LOGGER.log(TraceLevel.TRACE, "JSON representation of Avro message: " + consumedDatum.toString());
// Submit new tuple to output port 0
outTuple.setString(outputJsonMessage, consumedDatum.toString());
outStream.submit(outTuple);
}
is.close();
dataFileReader.close();
}
示例10: init
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public void init(Map<String, String> props, Schema indexingSchema, String kafkaTopicName) throws Exception {
// Load Avro schema
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
_avroSchema = reader.getSchema();
reader.close();
_rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema);
_reader = new GenericDatumReader<GenericData.Record>(_avroSchema);
}
示例11: execute
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public boolean execute() throws IOException {
// Create Kafka producer
Properties properties = new Properties();
properties.put("metadata.broker.list", _kafkaBrokerList);
properties.put("serializer.class", "kafka.serializer.DefaultEncoder");
properties.put("request.required.acks", "1");
ProducerConfig producerConfig = new ProducerConfig(properties);
Producer<String, byte[]> producer = new Producer<String, byte[]>(producerConfig);
try {
// Open the Avro file
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(new File(_avroFile));
// Iterate over every record
for (GenericRecord genericRecord : reader) {
// Write the message to Kafka
String recordJson = genericRecord.toString();
byte[] bytes = recordJson.getBytes("utf-8");
KeyedMessage<String, byte[]> data = new KeyedMessage<String, byte[]>(_kafkaTopic, bytes);
producer.send(data);
// Sleep for one second
Uninterruptibles.sleepUninterruptibly(1000, TimeUnit.MILLISECONDS);
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
savePID(System.getProperty("java.io.tmpdir") + File.separator + ".streamAvro.pid");
return true;
}
示例12: extractSchemaFromAvro
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* gives back a basic pinot schema object with field type as unknown and not aware of whether SV or MV
* this is just a util method for testing
* @param avroFile
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public static Schema extractSchemaFromAvro(File avroFile) throws IOException {
final Schema schema = new Schema();
final DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
final org.apache.avro.Schema avroSchema = dataStreamReader.getSchema();
dataStreamReader.close();
return getPinotSchemaFromAvroSchema(avroSchema, getDefaultFieldTypes(avroSchema), TimeUnit.DAYS);
}
示例13: getAllColumnsInAvroFile
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public static List<String> getAllColumnsInAvroFile(File avroFile) throws IOException {
final List<String> ret = new ArrayList<String>();
final DataFileStream<GenericRecord> reader = getAvroReader(avroFile);
for (final Field f : reader.getSchema().getFields()) {
ret.add(f.name());
}
reader.close();
return ret;
}
示例14: extractSchemaFromAvroWithoutTime
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public static Schema extractSchemaFromAvroWithoutTime(File avroFile) throws FileNotFoundException, IOException {
DataFileStream<GenericRecord> dataStream =
new DataFileStream<GenericRecord>(new FileInputStream(avroFile), new GenericDatumReader<GenericRecord>());
Schema schema = new Schema();
for (final Field field : dataStream.getSchema().getFields()) {
final String columnName = field.name();
final String pinotType = field.getProp("pinotType");
final FieldSpec fieldSpec;
if (pinotType != null && "METRIC".equals(pinotType)) {
fieldSpec = new MetricFieldSpec();
fieldSpec.setFieldType(FieldType.METRIC);
} else {
fieldSpec = new DimensionFieldSpec();
fieldSpec.setFieldType(FieldType.DIMENSION); // default
}
fieldSpec.setName(columnName);
fieldSpec.setDataType(getColumnType(dataStream.getSchema().getField(columnName)));
fieldSpec.setSingleValueField(isSingleValueField(dataStream.getSchema().getField(columnName)));
fieldSpec.setDelimiter(",");
schema.addSchema(columnName, fieldSpec);
}
dataStream.close();
return schema;
}
示例15: build
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public <T> void build(String filePath, AvroConsumer<T> builder) throws IOException
{
List<Path> paths = getPaths(filePath);
for (Path path: paths)
{
DataFileStream<Object> stream = null;
try
{
stream = getAvroDataStream(path);
while (stream.hasNext())
{
builder.consume(stream.next());
}
}
finally
{
if (stream != null)
{
stream.close();
}
}
}
builder.done();
}