當前位置: 首頁>>代碼示例>>Java>>正文


Java DataFileStream.getSchema方法代碼示例

本文整理匯總了Java中org.apache.avro.file.DataFileStream.getSchema方法的典型用法代碼示例。如果您正苦於以下問題:Java DataFileStream.getSchema方法的具體用法?Java DataFileStream.getSchema怎麽用?Java DataFileStream.getSchema使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.avro.file.DataFileStream的用法示例。


在下文中一共展示了DataFileStream.getSchema方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: before

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
public static void before() throws Exception {
  final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
  if (INDEX_DIR.exists()) {
    FileUtils.deleteQuietly(INDEX_DIR);
  }

  System.out.println(INDEX_DIR.getAbsolutePath());
  final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);

  final SegmentGeneratorConfig config =
      SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
          TimeUnit.DAYS, "test");
  config.setTimeColumnName("daysSinceEpoch");
  driver.init(config);
  driver.build();

  final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
  final org.apache.avro.Schema avroSchema = avroReader.getSchema();
  final String[] columns = new String[avroSchema.getFields().size()];
  int i = 0;
  for (final Field f : avroSchema.getFields()) {
    columns[i] = f.name();
    i++;
  }
}
 
開發者ID:Hanmourang,項目名稱:Pinot,代碼行數:27,代碼來源:BlocksTest.java

示例2: getSchema

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * This method is called by {@link #getAvroSchema}. The default implementation
 * returns the schema of an avro file; or the schema of the last file in a first-level
 * directory (it does not contain sub-directories).
 *
 * @param path  path of a file or first level directory
 * @param fs  file system
 * @return avro schema
 * @throws IOException
 */
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
    /* get path of the last file */
    Path lastFile = AvroStorageUtils.getLast(path, fs);
    if (lastFile == null) {
        return null;
    }

    /* read in file and obtain schema */
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(lastFile);
    DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
    Schema ret = avroDataStream.getSchema();
    avroDataStream.close();

    return ret;
}
 
開發者ID:linkedin,項目名稱:Cubert,代碼行數:27,代碼來源:AvroStorageUtils.java

示例3: before

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
  public static void before() throws Exception {
    final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
    if (INDEX_DIR.exists()) {
      FileUtils.deleteQuietly(INDEX_DIR);
    }

//    System.out.println(INDEX_DIR.getAbsolutePath());
    final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);

    final SegmentGeneratorConfig config =
        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
            TimeUnit.DAYS, "test");
    config.setTimeColumnName("daysSinceEpoch");
    driver.init(config);
    driver.build();

    final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
    final org.apache.avro.Schema avroSchema = avroReader.getSchema();
    final String[] columns = new String[avroSchema.getFields().size()];
    int i = 0;
    for (final Field f : avroSchema.getFields()) {
      columns[i] = f.name();
      i++;
    }
  }
 
開發者ID:linkedin,項目名稱:pinot,代碼行數:27,代碼來源:BlocksTest.java

示例4: verifyOutputAvroFiles

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix,
                                   List<String> bodies) throws IOException {
  int found = 0;
  int expected = bodies.size();
  for (String outputFile : getAllFiles(dir)) {
    String name = (new File(outputFile)).getName();
    if (name.startsWith(prefix)) {
      FSDataInputStream input = fs.open(new Path(outputFile));
      DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
      DataFileStream<GenericRecord> avroStream =
          new DataFileStream<GenericRecord>(input, reader);
      GenericRecord record = new GenericData.Record(avroStream.getSchema());
      while (avroStream.hasNext()) {
        avroStream.next(record);
        ByteBuffer body = (ByteBuffer) record.get("body");
        CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
        String bodyStr = decoder.decode(body).toString();
        LOG.debug("Removing event: {}", bodyStr);
        bodies.remove(bodyStr);
        found++;
      }
      avroStream.close();
      input.close();
    }
  }
  Assert.assertTrue("Found = " + found + ", Expected = "  +
      expected + ", Left = " + bodies.size() + " " + bodies,
          bodies.size() == 0);
}
 
開發者ID:moueimei,項目名稱:flume-release-1.7.0,代碼行數:30,代碼來源:TestHDFSEventSink.java

示例5: testGzipDurabilityWithSerializer

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void testGzipDurabilityWithSerializer() throws Exception {
  Context context = new Context();
  context.put("serializer", "AVRO_EVENT");

  HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
  writer.configure(context);

  writer.open(fileURI, factory.getCodec(new Path(fileURI)),
      SequenceFile.CompressionType.BLOCK);

  String[] bodies = { "yarf!", "yarfing!" };
  writeBodies(writer, bodies);

  int found = 0;
  int expected = bodies.length;
  List<String> expectedBodies = Lists.newArrayList(bodies);

  GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> avroStream =
      new DataFileStream<GenericRecord>(cmpIn, reader);
  GenericRecord record = new GenericData.Record(avroStream.getSchema());
  while (avroStream.hasNext()) {
    avroStream.next(record);
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode((ByteBuffer) record.get("body"))
        .toString();
    expectedBodies.remove(bodyStr);
    found++;
  }
  avroStream.close();
  cmpIn.close();

  Assert.assertTrue("Found = " + found + ", Expected = " + expected
      + ", Left = " + expectedBodies.size() + " " + expectedBodies,
      expectedBodies.size() == 0);
}
 
開發者ID:moueimei,項目名稱:flume-release-1.7.0,代碼行數:39,代碼來源:TestHDFSCompressedDataStream.java

示例6: init

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public void init(Map<String, String> props, Schema indexingSchema, String kafkaTopicName) throws Exception {
  // Load Avro schema
  DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
  _avroSchema = reader.getSchema();
  reader.close();
  _rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema);
  _reader = new GenericDatumReader<GenericData.Record>(_avroSchema);
}
 
開發者ID:Hanmourang,項目名稱:Pinot,代碼行數:10,代碼來源:ClusterTest.java

示例7: extractSchemaFromAvro

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * gives back a basic pinot schema object with field type as unknown and not aware of whether SV or MV
 * this is just a util method for testing
 * @param avroFile
 * @return
 * @throws FileNotFoundException
 * @throws IOException
 */
public static Schema extractSchemaFromAvro(File avroFile) throws IOException {

  final Schema schema = new Schema();
  final DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
  final org.apache.avro.Schema avroSchema = dataStreamReader.getSchema();
  dataStreamReader.close();

  return getPinotSchemaFromAvroSchema(avroSchema, getDefaultFieldTypes(avroSchema), TimeUnit.DAYS);
}
 
開發者ID:Hanmourang,項目名稱:Pinot,代碼行數:18,代碼來源:AvroUtils.java

示例8: before

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
public static void before() throws Exception {
  final String filePath =
      TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
  if (INDEX_DIR.exists()) {
    FileUtils.deleteQuietly(INDEX_DIR);
  }

  System.out.println(INDEX_DIR.getAbsolutePath());
  final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);

  final SegmentGeneratorConfig config =
      SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday",
          TimeUnit.DAYS, "test");
  config.setTimeColumnName("weeksSinceEpochSunday");
  driver.init(config);
  driver.build();

  final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
  final org.apache.avro.Schema avroSchema = avroReader.getSchema();
  final String[] columns = new String[avroSchema.getFields().size()];
  int i = 0;
  for (final Field f : avroSchema.getFields()) {
    columns[i] = f.name();
    i++;
  }
}
 
開發者ID:Hanmourang,項目名稱:Pinot,代碼行數:28,代碼來源:IntArraysTest.java

示例9: getSchemaFromFile

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * Loads the schema from an Avro data file.
 * 
 * @param conf The JobConf.
 * @param path The path to the data file.
 * @return The schema read from the data file's metadata.
 * @throws IOException
 */
public static Schema getSchemaFromFile(JobConf conf, Path path) throws IOException
{
  FileSystem fs = path.getFileSystem(new Configuration());
  FSDataInputStream dataInputStream = fs.open(path);
  DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
  return dataFileStream.getSchema();
}
 
開發者ID:linkedin,項目名稱:ml-ease,代碼行數:17,代碼來源:AvroUtils.java

示例10: getSchemaFromFile

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * Gets the schema from a given Avro data file.
 * 
 * @param fs the filesystem
 * @param path path to get schema from
 * @return The schema read from the data file's metadata.
 * @throws IOException IOException
 */
public static Schema getSchemaFromFile(FileSystem fs, Path path) throws IOException
{
  FSDataInputStream dataInputStream = fs.open(path);
  DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
  try
  {
    return dataFileStream.getSchema();
  }
  finally
  {
    dataFileStream.close();
  }
}
 
開發者ID:apache,項目名稱:incubator-datafu,代碼行數:23,代碼來源:PathUtils.java

示例11: getAvroSchema

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * Reads the avro schema at the specified location.
 * @param p Location of file
 * @param job Hadoop job object
 * @return an Avro Schema object derived from the specified file
 * @throws IOException
 *
 */
public Schema getAvroSchema(final Path p, final Job job)
    throws IOException {
  GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
  FileSystem fs = FileSystem.get(p.toUri(), job.getConfiguration());
  FileStatus[] statusArray = fs.globStatus(p);

  if (statusArray == null) {
    throw new IOException("Path " + p.toString() + " does not exist.");
  }

  if (statusArray.length == 0) {
    throw new IOException("No path matches pattern " + p.toString());
  }

  Path filePath = depthFirstSearchForFile(statusArray, fs);

  if (filePath == null) {
    throw new IOException("No path matches pattern " + p.toString());
  }

  InputStream hdfsInputStream = fs.open(filePath);
  DataFileStream<Object> avroDataStream =
      new DataFileStream<Object>(hdfsInputStream, avroReader);
  Schema s = avroDataStream.getSchema();
  avroDataStream.close();
  return s;
}
 
開發者ID:sigmoidanalytics,項目名稱:spork-streaming,代碼行數:36,代碼來源:AvroStorage.java

示例12: getSchema

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * This method is called by {@link #getAvroSchema}. The default implementation
 * returns the schema of an avro file; or the schema of the last file in a first-level
 * directory (it does not contain sub-directories).
 *
 * @param path  path of a file or first level directory
 * @param fs  file system
 * @return avro schema
 * @throws IOException
 */
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
    /* get path of the last file */
    Path lastFile = AvroStorageUtils.getLast(path, fs);

    /* read in file and obtain schema */
    GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
    InputStream hdfsInputStream = fs.open(lastFile);
    DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
    Schema ret = avroDataStream.getSchema();
    avroDataStream.close();

    return ret;
}
 
開發者ID:sigmoidanalytics,項目名稱:spork-streaming,代碼行數:24,代碼來源:AvroStorageUtils.java

示例13: fromAvro

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public static Schema fromAvro(InputStream in) throws IOException {
  GenericDatumReader<GenericRecord> datumReader =
      new GenericDatumReader<GenericRecord>();
  DataFileStream<GenericRecord> stream = null;
  boolean threw = true;

  try {
    stream = new DataFileStream<>(in, datumReader);
    Schema schema = stream.getSchema();
    threw = false;
    return schema;
  } finally {
    Closeables.close(stream, threw);
  }
}
 
開發者ID:apache,項目名稱:parquet-mr,代碼行數:16,代碼來源:Schemas.java

示例14: getAvroSchema

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
 * Reads the avro schemas at the specified location.
 * @param p Location of file
 * @param job Hadoop job object
 * @return an Avro Schema object derived from the specified file
 * @throws IOException
 *
 */
public Schema getAvroSchema(final Path[] p, final Job job) throws IOException {
  GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
  ArrayList<FileStatus> statusList = new ArrayList<FileStatus>();
  FileSystem fs = FileSystem.get(p[0].toUri(), job.getConfiguration());
  for (Path temp : p) {
    for (FileStatus tempf : fs.globStatus(temp)) {
      statusList.add(tempf);
    }
  }
  FileStatus[] statusArray = (FileStatus[]) statusList
      .toArray(new FileStatus[statusList.size()]);

  if (statusArray == null) {
    throw new IOException("Path " + p.toString() + " does not exist.");
  }

  if (statusArray.length == 0) {
    throw new IOException("No path matches pattern " + p.toString());
  }

  Path filePath = Utils.depthFirstSearchForFile(statusArray, fs);

  if (filePath == null) {
    throw new IOException("No path matches pattern " + p.toString());
  }

  InputStream hdfsInputStream = fs.open(filePath);
  DataFileStream<Object> avroDataStream = new DataFileStream<Object>(
      hdfsInputStream, avroReader);
  Schema s = avroDataStream.getSchema();
  avroDataStream.close();
  return s;
}
 
開發者ID:sigmoidanalytics,項目名稱:spork,代碼行數:42,代碼來源:AvroStorage.java

示例15: init

import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public void init(Map<String, String> props, Schema indexingSchema, String kafkaTopicName) throws Exception {
  // Load Avro schema
  DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
  _avroSchema = reader.getSchema();
  reader.close();
  _rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema);
  _reader = new GenericDatumReader<>(_avroSchema);
}
 
開發者ID:linkedin,項目名稱:pinot,代碼行數:10,代碼來源:ClusterTest.java


注:本文中的org.apache.avro.file.DataFileStream.getSchema方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。