Java DataFileStream類代碼示例

本文整理匯總了Java中org.apache.avro.file.DataFileStream類的典型用法代碼示例。如果您正苦於以下問題：Java DataFileStream類的具體用法？Java DataFileStream怎麽用？Java DataFileStream使用的例子？那麽, 這裏精選的類代碼示例或許可以為您提供幫助。

DataFileStream類屬於org.apache.avro.file包，在下文中一共展示了DataFileStream類的15個代碼示例，這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚，您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: testCompressFile

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
public void testCompressFile() throws Exception {
  String avroCodec = "snappy";
  localProps.put(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG, avroCodec);
  setUp();
  task = new S3SinkTask(connectorConfig, context, storage, partitioner, format, SYSTEM_TIME);

  List<SinkRecord> sinkRecords = createRecords(7);
  // Perform write
  task.put(sinkRecords);
  task.close(context.assignment());
  task.stop();

  List<S3ObjectSummary> summaries = listObjects(S3_TEST_BUCKET_NAME, "/", s3);
  for(S3ObjectSummary summary: summaries){
    InputStream in = s3.getObject(summary.getBucketName(), summary.getKey()).getObjectContent();
    DatumReader<Object> reader = new GenericDatumReader<>();
    DataFileStream<Object> streamReader = new DataFileStream<>(in, reader);
    // make sure that produced Avro file has proper codec set
    Assert.assertEquals(avroCodec, streamReader.getMetaString(StorageSinkConnectorConfig.AVRO_CODEC_CONFIG));
    streamReader.close();
  }

  long[] validOffsets = {0, 3, 6};
  verify(sinkRecords, validOffsets);
}

開發者ID:confluentinc，項目名稱:kafka-connect-storage-cloud，代碼行數:27，代碼來源:DataWriterAvroTest.java

示例2: testCompressedWriteAndReadASingleFile

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testCompressedWriteAndReadASingleFile() throws Throwable {
  List<GenericClass> values =
      ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
  File outputFile = tmpFolder.newFile("output.avro");

  writePipeline.apply(Create.of(values))
      .apply(
          AvroIO.write(GenericClass.class)
              .to(outputFile.getAbsolutePath())
              .withoutSharding()
              .withCodec(CodecFactory.deflateCodec(9)));
  writePipeline.run();

  PAssert.that(
          readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
      .containsInAnyOrder(values);
  readPipeline.run();

  try (DataFileStream dataFileStream =
      new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
    assertEquals("deflate", dataFileStream.getMetaString("avro.codec"));
  }
}

開發者ID:apache，項目名稱:beam，代碼行數:27，代碼來源:AvroIOTest.java

示例3: testWriteThenReadASingleFileWithNullCodec

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testWriteThenReadASingleFileWithNullCodec() throws Throwable {
  List<GenericClass> values =
      ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
  File outputFile = tmpFolder.newFile("output.avro");

  writePipeline.apply(Create.of(values))
      .apply(
          AvroIO.write(GenericClass.class)
              .to(outputFile.getAbsolutePath())
              .withoutSharding()
              .withCodec(CodecFactory.nullCodec()));
  writePipeline.run();

  PAssert.that(
          readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
      .containsInAnyOrder(values);
  readPipeline.run();

  try (DataFileStream dataFileStream =
      new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
    assertEquals("null", dataFileStream.getMetaString("avro.codec"));
  }
}

開發者ID:apache，項目名稱:beam，代碼行數:27，代碼來源:AvroIOTest.java

示例4: deserialize

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
 * Deserializes the bytes as an array of Generic containers.
 *
 * <p>The bytes include a standard Avro header that contains a magic byte, the
 * record's Avro schema (and so on), followed by the byte representation of the record.
 *
 * <p>Implementation detail:  This method uses Avro's {@code DataFileWriter}.
 * @schema Schema associated with this container
 * @return A Generic Container class
 */
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
  GenericContainer ret = null;
  List<GenericContainer> retList = new ArrayList<>();
  if (container != null) {
    DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
    ByteArrayInputStream in = new ByteArrayInputStream(container);
    DataFileStream<GenericContainer> reader =
        new DataFileStream<GenericContainer>(in, datumReader);
    while (reader.hasNext()) {
      ret = reader.next(ret);
      retList.add(ret);
    }
    return retList.toArray(new GenericContainer[retList.size()]);
  } else {
    return null;
  }
}

開發者ID:confluentinc，項目名稱:support-metrics-client，代碼行數:28，代碼來源:AvroDeserializer.java

示例5: deserialize

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
 * Deserializes the bytes as an array of Generic containers.
 *
 * <p>The bytes include a standard Avro header that contains a magic byte, the
 * record's Avro schema (and so on), followed by the byte representation of the record.
 *
 * <p>Implementation detail:  This method uses Avro's {@code DataFileWriter}.
 *
 * @return A Generic Container class
 * @schema Schema associated with this container
 */
public GenericContainer[] deserialize(Schema schema, byte[] container) throws IOException {
  GenericContainer ret = null;
  List<GenericContainer> retList = new ArrayList<>();
  if (container != null) {
    DatumReader<GenericContainer> datumReader = new GenericDatumReader<>(schema);
    ByteArrayInputStream in = new ByteArrayInputStream(container);
    DataFileStream<GenericContainer> reader = new DataFileStream<GenericContainer>(
        in,
        datumReader
    );
    while (reader.hasNext()) {
      ret = reader.next(ret);
      retList.add(ret);
    }
    return retList.toArray(new GenericContainer[retList.size()]);
  } else {
    return null;
  }
}

開發者ID:confluentinc，項目名稱:support-metrics-common，代碼行數:31，代碼來源:AvroDeserializer.java

示例6: init

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public void init() throws Exception {
  final File file = new File(_fileName);
  if (!file.exists()) {
    throw new FileNotFoundException("File is not existed!");
  }
  //_schemaExtractor = FieldExtractorFactory.get(_dataReaderSpec);
  if (_fileName.endsWith("gz")) {
    _dataStream =
        new DataFileStream<GenericRecord>(new GZIPInputStream(new FileInputStream(file)),
            new GenericDatumReader<GenericRecord>());
  } else {
    _dataStream =
        new DataFileStream<GenericRecord>(new FileInputStream(file), new GenericDatumReader<GenericRecord>());
  }

  updateSchema(_schemaExtractor.getSchema());
}

開發者ID:Hanmourang，項目名稱:Pinot，代碼行數:19，代碼來源:AvroRecordReader.java

示例7: getRealtimeSegment

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
private IndexSegment getRealtimeSegment() throws IOException {
  RealtimeSegmentImpl realtimeSegmentImpl = new RealtimeSegmentImpl(PINOT_SCHEMA, 100000);

  try {
    DataFileStream<GenericRecord> avroReader =
        AvroUtils.getAvroReader(new File(TestUtils.getFileFromResourceUrl(getClass().getClassLoader().getResource(
            AVRO_DATA))));
    while (avroReader.hasNext()) {
      GenericRecord avroRecord = avroReader.next();
      GenericRow genericRow = AVRO_RECORD_TRANSFORMER.transform(avroRecord);
      // System.out.println(genericRow);
      realtimeSegmentImpl.index(genericRow);
    }
  } catch (Exception e) {
    e.printStackTrace();
  }
  System.out.println("Current raw events indexed: " + realtimeSegmentImpl.getRawDocumentCount() + ", totalDocs = "
      + realtimeSegmentImpl.getTotalDocs());
  realtimeSegmentImpl.setSegmentName("testTable_testTable");
  realtimeSegmentImpl.setSegmentMetadata(getRealtimeSegmentZKMetadata());
  return realtimeSegmentImpl;

}

開發者ID:Hanmourang，項目名稱:Pinot，代碼行數:24，代碼來源:RealtimeQueriesSentinelTest.java

示例8: before

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@BeforeClass
public static void before() throws Exception {
  final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
  if (INDEX_DIR.exists()) {
    FileUtils.deleteQuietly(INDEX_DIR);
  }

  System.out.println(INDEX_DIR.getAbsolutePath());
  final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);

  final SegmentGeneratorConfig config =
      SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
          TimeUnit.DAYS, "test");
  config.setTimeColumnName("daysSinceEpoch");
  driver.init(config);
  driver.build();

  final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
  final org.apache.avro.Schema avroSchema = avroReader.getSchema();
  final String[] columns = new String[avroSchema.getFields().size()];
  int i = 0;
  for (final Field f : avroSchema.getFields()) {
    columns[i] = f.name();
    i++;
  }
}

開發者ID:Hanmourang，項目名稱:Pinot，代碼行數:27，代碼來源:BlocksTest.java

示例9: initializeSchema

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public void initializeSchema(InputStream inputStream)
{
  try {
    this.datumReader = new GenericDatumReader<GenericRecord>();
    this.dataFileStream = new DataFileStream<GenericRecord>(inputStream, datumReader);
    this.schema = dataFileStream.getSchema();

    this.instanceInformation = getHeader();
    this.isSparseData = isSparseData();

    if (classAttribute < 0) {
      this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
    } else if (classAttribute > 0) {
      this.instanceInformation.setClassIndex(classAttribute - 1);
    }

  } catch (IOException ioException) {
    logger.error(AVRO_LOADER_SCHEMA_READ_ERROR + " : {}", ioException);
    throw new RuntimeException(AVRO_LOADER_SCHEMA_READ_ERROR + " : " + ioException);
  }
}

開發者ID:apache，項目名稱:incubator-samoa，代碼行數:23，代碼來源:AvroBinaryLoader.java

示例10: AvroDataStreamParser

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
public AvroDataStreamParser(
    ProtoConfigurableEntity.Context context,
    Schema schema,
    String streamName,
    InputStream inputStream,
    long recordCount,
    int maxObjectLength
) throws IOException {
  this.context = context;
  avroSchema = schema;
  this.streamName = streamName;
  this.recordCount = recordCount;
  datumReader = new GenericDatumReader<>(avroSchema, avroSchema, GenericData.get()); //Reader schema argument is optional
  overrunInputStream = new OverrunInputStream(inputStream, maxObjectLength, true);
  dataFileStream = new DataFileStream<>(overrunInputStream, datumReader);
  seekToOffset();
}

開發者ID:streamsets，項目名稱:datacollector，代碼行數:18，代碼來源:AvroDataStreamParser.java

示例11: getSchema

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
 * This method is called by {@link #getAvroSchema}. The default implementation
 * returns the schema of an avro file; or the schema of the last file in a first-level
 * directory (it does not contain sub-directories).
 *
 * @param path  path of a file or first level directory
 * @param fs  file system
 * @return avro schema
 * @throws IOException
 */
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
    /* get path of the last file */
    Path lastFile = AvroStorageUtils.getLast(path, fs);
    if (lastFile == null) {
        return null;
    }

    /* read in file and obtain schema */
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(lastFile);
    DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
    Schema ret = avroDataStream.getSchema();
    avroDataStream.close();

    return ret;
}

開發者ID:linkedin，項目名稱:Cubert，代碼行數:27，代碼來源:AvroStorageUtils.java

示例12: getOutputFileDateRange

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
/**
 * Reads the date range from the metadata stored in an Avro file.
 * 
 * @param fs file system to access path
 * @param path path to get date range for
 * @return date range
 * @throws IOException IOException
 */
public static DateRange getOutputFileDateRange(FileSystem fs, Path path) throws IOException
{
  path = fs.listStatus(path, PathUtils.nonHiddenPathFilter)[0].getPath();
  FSDataInputStream dataInputStream = fs.open(path);
  DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
  
  try
  {
    return new DateRange(new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_START))),
                         new Date(Long.parseLong(dataFileStream.getMetaString(METADATA_DATE_END))));
  }
  finally
  {
    dataFileStream.close();
    dataInputStream.close();
  }
}

開發者ID:apache，項目名稱:incubator-datafu，代碼行數:27，代碼來源:AvroDateRangeMetadata.java

示例13: loadMemberCount

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
private Long loadMemberCount(Path path, String timestamp) throws IOException
{
  FileSystem fs = getFileSystem();
  Assert.assertTrue(fs.exists(new Path(path, timestamp)));
  for (FileStatus stat : fs.globStatus(new Path(path,timestamp + "/*.avro")))
  {
    _log.info(String.format("found: %s (%d bytes)",stat.getPath(),stat.getLen()));
    FSDataInputStream is = fs.open(stat.getPath());
    DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
    DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(is, reader);
    
    try
    {
      GenericRecord r = dataFileStream.next();
      Long count = (Long)((GenericRecord)r.get("value")).get("count");   
      Assert.assertNotNull(count);       
      System.out.println("found count: " + count);
      return count;
    }
    finally
    {
      dataFileStream.close();
    }
  }
  throw new RuntimeException("found no data");
}

開發者ID:apache，項目名稱:incubator-datafu，代碼行數:27，代碼來源:Examples.java

示例14: main

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
public static void main(String[] args) throws Exception {

    if (args.length == 0) {
      System.out.println("ListFilesInAvroMain <pathToAvroFile>");
      return;
    }

    final String FIELD_FILENAME = "filename";

    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(config);
    Path destFile = new Path(args[0]);
    InputStream is = hdfs.open(destFile);

    DataFileStream<Object> reader = new DataFileStream<Object>(is,
        new GenericDatumReader<Object>());

    int counter = 0;
    for (Object o : reader) {
      GenericRecord r = (GenericRecord) o;
      System.out.println(counter++ + ":" + r.get(FIELD_FILENAME).toString());
    }
    IOUtils.cleanup(null, is);
    IOUtils.cleanup(null, reader);
  }

開發者ID:tmalaska，項目名稱:FileIngestor，代碼行數:26，代碼來源:ListFilesInAvroMain.java

示例15: supportsFile

import org.apache.avro.file.DataFileStream; //導入依賴的package包/類
@Override
public boolean supportsFile(
		final URL file ) {

	try (DataFileStream<AvroSimpleFeatureCollection> ds = new DataFileStream<AvroSimpleFeatureCollection>(
			file.openStream(),
			new SpecificDatumReader<AvroSimpleFeatureCollection>())) {
		if (ds.getHeader() != null) {
			return true;
		}
	}
	catch (final IOException e) {
		// just log as info as this may not have been intended to be read as
		// avro vector data
		LOGGER.info(
				"Unable to read file as Avro vector data '" + file.getPath() + "'",
				e);
	}

	return false;
}

開發者ID:locationtech，項目名稱:geowave，代碼行數:22，代碼來源:AvroIngestPlugin.java

注：本文中的org.apache.avro.file.DataFileStream類示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台，相關代碼片段篩選自各路編程大神貢獻的開源項目，源碼版權歸原作者所有，傳播和使用請參考對應項目的License；未經允許，請勿轉載。