当前位置: 首页>>代码示例>>Java>>正文


Java DataFileReader.getSchema方法代码示例

本文整理汇总了Java中org.apache.avro.file.DataFileReader.getSchema方法的典型用法代码示例。如果您正苦于以下问题:Java DataFileReader.getSchema方法的具体用法?Java DataFileReader.getSchema怎么用?Java DataFileReader.getSchema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.avro.file.DataFileReader的用法示例。


在下文中一共展示了DataFileReader.getSchema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readAvroFile

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 * Reads in binary Avro-encoded entities using the schema stored in the file
 * and prints them out.
 */
public static void readAvroFile(File file) throws IOException {
	GenericDatumReader datum = new GenericDatumReader();
	DataFileReader reader = new DataFileReader(file, datum);

	GenericData.Record record = new GenericData.Record(reader.getSchema());
	while (reader.hasNext()) {
		reader.next(record);
		System.out.println("Name " + record.get("name") + " on "
				+ record.get("Meetup_date") + " attending "
				+ record.get("going") + " organized by  "
				+ record.get("organizer") + " on  " + record.get("topics"));
	}

	reader.close();
}
 
开发者ID:airisdata,项目名称:avroparquet,代码行数:20,代码来源:StorageFormatUtils.java

示例2: getSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public static Schema getSchema(SeekableInput input) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader =
            new DataFileReader<GenericRecord>(input, datumReader);
    Schema schema = dataFileReader.getSchema();

    if (PadDefaultNullsToSchema)
    {
        // a list of "cloned" fields, with optional default value set to null
        ArrayList<Field> paddedFields = new ArrayList<Field>();

        for (Field field: schema.getFields())
        {
            // should this field be padded?
            boolean needsNullPadding = (field.schema() != null) // the field has nested schema
                && (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
                && (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type

            JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();

            Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
            paddedFields.add(f);
        }

        schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
        schema.setFields(paddedFields);
    }

    return schema;
}
 
开发者ID:linkedin,项目名称:Cubert,代码行数:32,代码来源:AvroUtils.java

示例3: testFirstUnderscoreInColumnName

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testFirstUnderscoreInColumnName() throws IOException {
  String [] names = { "_NAME" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "__NAME", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("__NAME", 1987, record1.get("__NAME"));
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:21,代码来源:TestAvroImport.java

示例4: testNonstandardCharactersInColumnName

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testNonstandardCharactersInColumnName() throws IOException {
  String [] names = { "avro\uC3A11" };
  String [] types = { "INT" };
  String [] vals = { "1987" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "AVRO1", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("AVRO1", 1987, record1.get("AVRO1"));
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:21,代码来源:TestAvroImport.java

示例5: testNonIdentCharactersInColumnName

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testNonIdentCharactersInColumnName() throws IOException {
  String [] names = { "test_a-v+r/o" };
  String [] types = { "INT" };
  String [] vals = { "2015" };
  createTableWithColTypesAndNames(names, types, vals);

  runImport(getOutputArgv(true, null));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "TEST_A_V_R_O", Type.INT);

  GenericRecord record1 = reader.next();
  assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O"));
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:21,代码来源:TestAvroImport.java

示例6: testOverrideTypeMapping

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testOverrideTypeMapping() throws IOException {
  String [] types = { "INT" };
  String [] vals = { "10" };
  createTableWithColTypes(types, vals);

  String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};

  runImport(getOutputArgv(true, extraArgs));

  Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
  DataFileReader<GenericRecord> reader = read(outputFile);
  Schema schema = reader.getSchema();
  assertEquals(Schema.Type.RECORD, schema.getType());
  List<Field> fields = schema.getFields();
  assertEquals(types.length, fields.size());

  checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING);

  GenericRecord record1 = reader.next();
  assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0"));
}
 
开发者ID:unicredit,项目名称:zSqoop,代码行数:22,代码来源:TestAvroImport.java

示例7: getSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private Schema getSchema()  {
	if(schema == null) {
		DatumReader<GenericRecord> datumReader =  new GenericDatumReader<GenericRecord>();
		try {
			Path p = new Path(fileSrcOperator.getFilePath());
			FileSystem fs = p.getFileSystem();
			FSDataInputStream schemaFile = fs.open(p);
			DataFileReader reader = new DataFileReader(
					new FSDataInputStreamWrapper(schemaFile, fs.getFileStatus(p).getLen()), datumReader);
			schema = reader.getSchema();
		} catch (IOException e) {
			throw new RuntimeException("Error while accessing schema from Avro file");
		}
	}
	return schema;
}
 
开发者ID:rmetzger,项目名称:stratosphere-sql,代码行数:17,代码来源:AvroStratosphereTable.java

示例8: validateAvroFile

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    String bodyStr = record.get("message").toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
开发者ID:cloudera,项目名称:cdk,代码行数:17,代码来源:TestAvroEventSerializer.java

示例9: validateAvroFile

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void validateAvroFile(File file) throws IOException {
  // read the events back using GenericRecord
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  DataFileReader<GenericRecord> fileReader =
      new DataFileReader<GenericRecord>(file, reader);
  GenericRecord record = new GenericData.Record(fileReader.getSchema());
  int numEvents = 0;
  while (fileReader.hasNext()) {
    fileReader.next(record);
    ByteBuffer body = (ByteBuffer) record.get("body");
    CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
    String bodyStr = decoder.decode(body).toString();
    System.out.println(bodyStr);
    numEvents++;
  }
  fileReader.close();
  Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
 
开发者ID:moueimei,项目名称:flume-release-1.7.0,代码行数:19,代码来源:TestFlumeEventAvroEventSerializer.java

示例10: getSchemaFromAvroDataFile

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private Schema getSchemaFromAvroDataFile() throws IOException {
  String firstDataFilePath = HdfsReader.getFirstDataFilePathInDir(dataLocationInHdfs);
  LOG.info("Extracting schema for table " + name + " from avro data file " + firstDataFilePath);
  SeekableInput sin = new HdfsReader(firstDataFilePath).getFsInput();

  Closer closer = Closer.create();
  try {
    DataFileReader<Void> dfr = closer.register(new DataFileReader<Void>(sin, new GenericDatumReader<Void>()));
    Schema schema = dfr.getSchema();
    return schema;
  } finally {
    closer.close();
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:15,代码来源:AvroExternalTable.java

示例11: getSchemaFromDataFile

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 * Get Avro schema from an Avro data file.
 */
public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException {
  Closer closer = Closer.create();
  try {
    SeekableInput sin = closer.register(new FsInput(dataFile, fs.getConf()));
    DataFileReader<GenericRecord> reader =
        closer.register(new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>()));
    return reader.getSchema();
  } finally {
    closer.close();
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:15,代码来源:AvroUtils.java

示例12: generateWorksheet

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Worksheet generateWorksheet() throws JSONException, IOException,
		KarmaException {
	DataFileReader<Void> schemareader = new DataFileReader<Void>(file, new GenericDatumReader<Void>());
	Schema schema = schemareader.getSchema();
	schemareader.close();
	DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(file, new GenericDatumReader<GenericRecord>(schema));
	ByteArrayOutputStream baos = new ByteArrayOutputStream();
	baos.write('[');
	baos.write('\n');
	GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(reader.getSchema());
	while(reader.hasNext())
	{
		
		GenericRecord record = reader.next();
			JsonEncoder encoder = EncoderFactory.get().jsonEncoder(reader.getSchema(), new JsonFactory().createJsonGenerator(baos)).configure(baos);
			writer.write(record, encoder);
			encoder.flush();
			if(reader.hasNext())
			{
				baos.write(',');
			}
			
		
	}
	reader.close();
	baos.write('\n');
	baos.write(']');
	baos.flush();
	baos.close();
	String json = new String(baos.toByteArray());
	JsonImport jsonImport = new JsonImport(json, this.getFactory(), this.getWorksheet(), workspace, maxNumLines);
	return jsonImport.generateWorksheet();
}
 
开发者ID:therelaxist,项目名称:spring-usc,代码行数:35,代码来源:AvroImport.java

示例13: createDataFileWriter

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
        IOException
{
    Schema schema = dataFileReader.getSchema();
    DatumWriter<GenericRecord> datumWriter =
            new GenericDatumWriter<GenericRecord>(schema);
    DataFileWriter<GenericRecord> writer =
            new DataFileWriter<GenericRecord>(datumWriter);

    // Get the codec of the reader
    String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
    int level = conf.getInt("avro.mapred.deflate.level", 1);
    String codecName = conf.get("avro.output.codec", codecStr);
    CodecFactory factory =
            codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
                    : CodecFactory.fromString(codecName);

    // Set the codec of the writer
    writer.setCodec(factory);

    writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
                                       Math.max(conf.getInt("io.file.buffer.size",
                                                            16000), 16000)));

    writer.create(schema,
                  new Path(tempFileName).getFileSystem(conf)
                                        .create(new Path(tempFileName)));
    return writer;
}
 
开发者ID:linkedin,项目名称:Cubert,代码行数:30,代码来源:Purge.java

示例14: parseChunk

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
  // We will read GenericRecord and load them based on schema
  final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
  final H2OSeekableInputAdaptor sbai = new H2OSeekableInputAdaptor(cidx, din);
  DataFileReader<GenericRecord> dataFileReader = null;
  int cnt = 0;
  try {
    // Reconstruct Avro header
    DataFileStream.Header
        fakeHeader = new DataFileReader<>(new SeekableByteArrayInput(this.header), datumReader).getHeader();
    dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true);
    Schema schema = dataFileReader.getSchema();
    GenericRecord gr = new GenericData.Record(schema);
    Schema.Field[] flatSchema = flatSchema(schema);
    long sync = dataFileReader.previousSync();
    if (sbai.chunkCnt == 0) { // Find data in first chunk
      while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) {
        gr = dataFileReader.next(gr);
        // Write values to the output
        // FIXME: what if user change input names, or ignore an input column?
        write2frame(gr, _setup.getColumnNames(), flatSchema, _setup.getColumnTypes(), dout);
        cnt++;
      }
    } // else first chunk does not contain synchronization block, so give up and let another reader to use it
  } catch (Throwable e) {
    e.printStackTrace();
  }

  Log.trace(String.format("Avro: ChunkIdx: %d read %d records, start at %d off, block count: %d, block size: %d", cidx, cnt, din.getChunkDataStart(cidx), dataFileReader.getBlockCount(), dataFileReader.getBlockSize()));

  return dout;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:34,代码来源:AvroParser.java

示例15: getSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public static Schema getSchema(FileStatus fileStatus) throws Exception {
    FsInput input = new FsInput(fileStatus.getPath(), new Configuration());
    DataFileReader<Void> reader = new DataFileReader<Void>(input, new GenericDatumReader<Void>());
    Schema schema = reader.getSchema();
    reader.close();
    input.close();
    return schema;
}
 
开发者ID:viadeo,项目名称:viadeo-avro-utils,代码行数:9,代码来源:SchemaUtils.java


注:本文中的org.apache.avro.file.DataFileReader.getSchema方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。