本文整理匯總了Java中org.apache.avro.file.DataFileStream類的典型用法代碼示例。如果您正苦於以下問題:Java DataFileStream類的具體用法?Java DataFileStream怎麽用?Java DataFileStream使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
DataFileStream類屬於org.apache.avro.file包,在下文中一共展示了DataFileStream類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: testCompressFile
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
public void testCompressFile() throws Exception {
String avroCodec = "snappy";
localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec);
setUp();
task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME);
List<SinkRecord> sinkRecords = createRecords(7);
// Perform write
task.put(sinkRecords);
task.close(context.assignment());
task.stop();
List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3);
for(S3ObjectSummary summary: summaries){
InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent();
DatumReader<Object> reader = new GenericDatumReader<>();
DataFileStream<Object> streamReader = new DataFileStream<>(in, reader);
// make sure that produced Avro file has proper codec set
Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG));
streamReader.close();
}
long[] validOffsets = {0, 3, 6};
verify(sinkRecords, validOffsets);
}
示例2: testCompressedWriteAndReadASingleFile
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testCompressedWriteAndReadASingleFile() throws Throwable {
List<GenericClass> values =
ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
File outputFile = tmpFolder.newFile("output.avro");
writePipeline.apply(Create.of(values))
.apply(
AvroIO.write(GenericClass.class)
.to(outputFile.getAbsolutePath())
.withoutSharding()
.withCodec(CodecFactory.deflateCodec(9)));
writePipeline.run();
PAssert.that(
readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
.containsInAnyOrder(values);
readPipeline.run();
try (DataFileStream dataFileStream =
new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
assertEquals("deflate", dataFileStream.getMetaString("avro.codec"));
}
}
示例3: testWriteThenReadASingleFileWithNullCodec
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testWriteThenReadASingleFileWithNullCodec() throws Throwable {
List<GenericClass> values =
ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
File outputFile = tmpFolder.newFile("output.avro");
writePipeline.apply(Create.of(values))
.apply(
AvroIO.write(GenericClass.class)
.to(outputFile.getAbsolutePath())
.withoutSharding()
.withCodec(CodecFactory.nullCodec()));
writePipeline.run();
PAssert.that(
readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
.containsInAnyOrder(values);
readPipeline.run();
try (DataFileStream dataFileStream =
new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
assertEquals("null", dataFileStream.getMetaString("avro.codec"));
}
}
示例4: deserialize
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
* Deserializes the bytes as an array of Generic containers.
*
* <p>The bytes include a standard Avro header that contains a magic byte, the
* record's Avro schema (and so on), followed by the byte representation of the record.
*
* <p>Implementation detail: This method uses Avro's {@code DataFileWriter}.
* @schema Schema associated with this container
* @return A Generic Container class
*/
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
GenericContainer ret = null;
List<GenericContainer> retList = new ArrayList<>();
if (container != null) {
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
ByteArrayInputStream in = new ByteArrayInputStream(container);
DataFileStream<GenericContainer> reader =
new DataFileStream<GenericContainer>(in, datumReader);
while (reader.hasNext()) {
ret = reader.next(ret);
retList.add(ret);
}
return retList.toArray(new GenericContainer[retList.size()]);
} else {
return null;
}
}
示例5: deserialize
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
* Deserializes the bytes as an array of Generic containers.
*
* <p>The bytes include a standard Avro header that contains a magic byte, the
* record's Avro schema (and so on), followed by the byte representation of the record.
*
* <p>Implementation detail: This method uses Avro's {@code DataFileWriter}.
*
* @return A Generic Container class
* @schema Schema associated with this container
*/
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
GenericContainer ret = null;
List<GenericContainer> retList = new ArrayList<>();
if (container != null) {
DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
ByteArrayInputStream in = new ByteArrayInputStream(container);
DataFileStream<GenericContainer> reader = new DataFileStream<GenericContainer>(
in,
datumReader
);
while (reader.hasNext()) {
ret = reader.next(ret);
retList.add(ret);
}
return retList.toArray(new GenericContainer[retList.size()]);
} else {
return null;
}
}
示例6: init
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public void init() throws Exception {
final File file = new File(_fileName);
if (!file.exists()) {
throw new FileNotFoundException("File is not existed!");
}
//_schemaExtractor = FieldExtractorFactory.get(_dataReaderSpec);
if (_fileName.endsWith("gz")) {
_dataStream =
new DataFileStream<GenericRecord>(new GZIPInputStream(new FileInputStream(file)),
new GenericDatumReader<GenericRecord>());
} else {
_dataStream =
new DataFileStream<GenericRecord>(new FileInputStream(file), new GenericDatumReader<GenericRecord>());
}
updateSchema(_schemaExtractor.getSchema());
}
示例7: getRealtimeSegment
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
private IndexSegment getRealtimeSegment() throws IOException {
RealtimeSegmentImpl realtimeSegmentImpl = new RealtimeSegmentImpl(PINOT_SCHEMA, 100000);
try {
DataFileStream<GenericRecord> avroReader =
AvroUtils.getAvroReader(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(
AVRO_DATA))));
while (avroReader.hasNext()) {
GenericRecord avroRecord = avroReader.next();
GenericRow genericRow = AVRO_RECORD_TRANSFORMER.transform(avroRecord);
// System.out.println(genericRow);
realtimeSegmentImpl.index(genericRow);
}
} catch (Exception e) {
e.printStackTrace();
}
System.out.println("Current raw events indexed: " + realtimeSegmentImpl.getRawDocumentCount() + ", totalDocs = "
+ realtimeSegmentImpl.getTotalDocs());
realtimeSegmentImpl.setSegmentName("testTable_testTable");
realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
return realtimeSegmentImpl;
}
示例8: before
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
TimeUnit.DAYS, "test");
config.setTimeColumnName("daysSinceEpoch");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
示例9: initializeSchema
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public void initializeSchema(InputStream inputStream)
{
try {
this.datumReader = new GenericDatumReader<GenericRecord>();
this.dataFileStream = new DataFileStream<GenericRecord>(inputStream, datumReader);
this.schema = dataFileStream.getSchema();
this.instanceInformation = getHeader();
this.isSparseData = isSparseData();
if (classAttribute < 0) {
this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
} else if (classAttribute > 0) {
this.instanceInformation.setClassIndex(classAttribute - 1);
}
} catch (IOException ioException) {
logger.error(AVRO_LOADER_SCHEMA_READ_ERROR + " : {}", ioException);
throw new RuntimeException(AVRO_LOADER_SCHEMA_READ_ERROR + " : " + ioException);
}
}
示例10: AvroDataStreamParser
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
public AvroDataStreamParser(
ProtoConfigurableEntity.Context context,
Schema schema,
String streamName,
InputStream inputStream,
long recordCount,
int maxObjectLength
) throws IOException {
this.context = context;
avroSchema = schema;
this.streamName = streamName;
this.recordCount = recordCount;
datumReader = new GenericDatumReader<>(avroSchema, avroSchema, GenericData.get()); //Reader schema argument is optional
overrunInputStream = new OverrunInputStream(inputStream, maxObjectLength, true);
dataFileStream = new DataFileStream<>(overrunInputStream, datumReader);
seekToOffset();
}
示例11: getSchema
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
* This method is called by {@link #getAvroSchema}. The default implementation
* returns the schema of an avro file; or the schema of the last file in a first-level
* directory (it does not contain sub-directories).
*
* @param path path of a file or first level directory
* @param fs file system
* @return avro schema
* @throws IOException
*/
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
if (lastFile == null) {
return null;
}
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
InputStream hdfsInputStream = fs.open(lastFile);
DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
Schema ret = avroDataStream.getSchema();
avroDataStream.close();
return ret;
}
示例12: getOutputFileDateRange
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
* Reads the date range from the metadata stored in an Avro file.
*
* @param fs file system to access path
* @param path path to get date range for
* @return date range
* @throws IOException IOException
*/
public static DateRange getOutputFileDateRange(FileSystem fs, Path path) throws IOException
{
path = fs.listStatus(path, PathUtils.nonHiddenPathFilter)[0].getPath();
FSDataInputStream dataInputStream = fs.open(path);
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
try
{
return new DateRange(new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_START))),
new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_END))));
}
finally
{
dataFileStream.close();
dataInputStream.close();
}
}
示例13: loadMemberCount
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
private Long loadMemberCount(Path path, String timestamp) throws IOException
{
FileSystem fs = getFileSystem();
Assert.assertTrue(fs.exists(new Path(path, timestamp)));
for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
{
_log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
FSDataInputStream is = fs.open(stat.getPath());
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
try
{
GenericRecord r = dataFileStream.next();
Long count = (Long)((GenericRecord)r.get("value")).get("count");
Assert.assertNotNull(count);
System.out.println("found count: " + count);
return count;
}
finally
{
dataFileStream.close();
}
}
throw new RuntimeException("found no data");
}
示例14: main
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("ListFilesInAvroMain <pathToAvroFile>");
return;
}
final String FIELD_FILENAME = "filename";
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(config);
Path destFile = new Path(args[0]);
InputStream is = hdfs.open(destFile);
DataFileStream<Object> reader = new DataFileStream<Object>(is,
new GenericDatumReader<Object>());
int counter = 0;
for (Object o : reader) {
GenericRecord r = (GenericRecord) o;
System.out.println(counter++ + ":" + r.get(FIELD_FILENAME).toString());
}
IOUtils.cleanup(null, is);
IOUtils.cleanup(null, reader);
}
示例15: supportsFile
import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public boolean supportsFile(
final URL file ) {
try (DataFileStream<AvroSimpleFeatureCollection> ds = new DataFileStream<AvroSimpleFeatureCollection>(
file.openStream(),
new SpecificDatumReader<AvroSimpleFeatureCollection>())) {
if (ds.getHeader() != null) {
return true;
}
}
catch (final IOException e) {
// just log as info as this may not have been intended to be read as
// avro vector data
LOGGER.info(
"Unable to read file as Avro vector data '" + file.getPath() + "'",
e);
}
return false;
}