本文整理汇总了Java中org.apache.avro.file.DataFileReader.getSchema方法的典型用法代码示例。如果您正苦于以下问题:Java DataFileReader.getSchema方法的具体用法?Java DataFileReader.getSchema怎么用?Java DataFileReader.getSchema使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.avro.file.DataFileReader
的用法示例。
在下文中一共展示了DataFileReader.getSchema方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readAvroFile
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
* Reads in binary Avro-encoded entities using the schema stored in the file
* and prints them out.
*/
public static void readAvroFile(File file) throws IOException {
GenericDatumReader datum = new GenericDatumReader();
DataFileReader reader = new DataFileReader(file, datum);
GenericData.Record record = new GenericData.Record(reader.getSchema());
while (reader.hasNext()) {
reader.next(record);
System.out.println("Name " + record.get("name") + " on "
+ record.get("Meetup_date") + " attending "
+ record.get("going") + " organized by "
+ record.get("organizer") + " on " + record.get("topics"));
}
reader.close();
}
示例2: getSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public static Schema getSchema(SeekableInput input) throws IOException
{
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<GenericRecord>(input, datumReader);
Schema schema = dataFileReader.getSchema();
if (PadDefaultNullsToSchema)
{
// a list of "cloned" fields, with optional default value set to null
ArrayList<Field> paddedFields = new ArrayList<Field>();
for (Field field: schema.getFields())
{
// should this field be padded?
boolean needsNullPadding = (field.schema() != null) // the field has nested schema
&& (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
&& (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type
JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();
Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
paddedFields.add(f);
}
schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
schema.setFields(paddedFields);
}
return schema;
}
示例3: testFirstUnderscoreInColumnName
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testFirstUnderscoreInColumnName() throws IOException {
String [] names = { "_NAME" };
String [] types = { "INT" };
String [] vals = { "1987" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "__NAME", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("__NAME", 1987, record1.get("__NAME"));
}
示例4: testNonstandardCharactersInColumnName
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testNonstandardCharactersInColumnName() throws IOException {
String [] names = { "avro\uC3A11" };
String [] types = { "INT" };
String [] vals = { "1987" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "AVRO1", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("AVRO1", 1987, record1.get("AVRO1"));
}
示例5: testNonIdentCharactersInColumnName
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testNonIdentCharactersInColumnName() throws IOException {
String [] names = { "test_a-v+r/o" };
String [] types = { "INT" };
String [] vals = { "2015" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "TEST_A_V_R_O", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O"));
}
示例6: testOverrideTypeMapping
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void testOverrideTypeMapping() throws IOException {
String [] types = { "INT" };
String [] vals = { "10" };
createTableWithColTypes(types, vals);
String [] extraArgs = { "--map-column-java", "DATA_COL0=String"};
runImport(getOutputArgv(true, extraArgs));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "DATA_COL0", Schema.Type.STRING);
GenericRecord record1 = reader.next();
assertEquals("DATA_COL0", new Utf8("10"), record1.get("DATA_COL0"));
}
示例7: getSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private Schema getSchema() {
if(schema == null) {
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
try {
Path p = new Path(fileSrcOperator.getFilePath());
FileSystem fs = p.getFileSystem();
FSDataInputStream schemaFile = fs.open(p);
DataFileReader reader = new DataFileReader(
new FSDataInputStreamWrapper(schemaFile, fs.getFileStatus(p).getLen()), datumReader);
schema = reader.getSchema();
} catch (IOException e) {
throw new RuntimeException("Error while accessing schema from Avro file");
}
}
return schema;
}
示例8: validateAvroFile
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void validateAvroFile(File file) throws IOException {
// read the events back using GenericRecord
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> fileReader =
new DataFileReader<GenericRecord>(file, reader);
GenericRecord record = new GenericData.Record(fileReader.getSchema());
int numEvents = 0;
while (fileReader.hasNext()) {
fileReader.next(record);
String bodyStr = record.get("message").toString();
System.out.println(bodyStr);
numEvents++;
}
fileReader.close();
Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
示例9: validateAvroFile
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public void validateAvroFile(File file) throws IOException {
// read the events back using GenericRecord
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> fileReader =
new DataFileReader<GenericRecord>(file, reader);
GenericRecord record = new GenericData.Record(fileReader.getSchema());
int numEvents = 0;
while (fileReader.hasNext()) {
fileReader.next(record);
ByteBuffer body = (ByteBuffer) record.get("body");
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode(body).toString();
System.out.println(bodyStr);
numEvents++;
}
fileReader.close();
Assert.assertEquals("Should have found a total of 3 events", 3, numEvents);
}
示例10: getSchemaFromAvroDataFile
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private Schema getSchemaFromAvroDataFile() throws IOException {
String firstDataFilePath = HdfsReader.getFirstDataFilePathInDir(dataLocationInHdfs);
LOG.info("Extracting schema for table " + name + " from avro data file " + firstDataFilePath);
SeekableInput sin = new HdfsReader(firstDataFilePath).getFsInput();
Closer closer = Closer.create();
try {
DataFileReader<Void> dfr = closer.register(new DataFileReader<Void>(sin, new GenericDatumReader<Void>()));
Schema schema = dfr.getSchema();
return schema;
} finally {
closer.close();
}
}
示例11: getSchemaFromDataFile
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
* Get Avro schema from an Avro data file.
*/
public static Schema getSchemaFromDataFile(Path dataFile, FileSystem fs) throws IOException {
Closer closer = Closer.create();
try {
SeekableInput sin = closer.register(new FsInput(dataFile, fs.getConf()));
DataFileReader<GenericRecord> reader =
closer.register(new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>()));
return reader.getSchema();
} finally {
closer.close();
}
}
示例12: generateWorksheet
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Worksheet generateWorksheet() throws JSONException, IOException,
KarmaException {
DataFileReader<Void> schemareader = new DataFileReader<Void>(file, new GenericDatumReader<Void>());
Schema schema = schemareader.getSchema();
schemareader.close();
DataFileReader<GenericRecord> reader = new DataFileReader<GenericRecord>(file, new GenericDatumReader<GenericRecord>(schema));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write('[');
baos.write('\n');
GenericDatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(reader.getSchema());
while(reader.hasNext())
{
GenericRecord record = reader.next();
JsonEncoder encoder = EncoderFactory.get().jsonEncoder(reader.getSchema(), new JsonFactory().createJsonGenerator(baos)).configure(baos);
writer.write(record, encoder);
encoder.flush();
if(reader.hasNext())
{
baos.write(',');
}
}
reader.close();
baos.write('\n');
baos.write(']');
baos.flush();
baos.close();
String json = new String(baos.toByteArray());
JsonImport jsonImport = new JsonImport(json, this.getFactory(), this.getWorksheet(), workspace, maxNumLines);
return jsonImport.generateWorksheet();
}
示例13: createDataFileWriter
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private DataFileWriter<GenericRecord> createDataFileWriter(DataFileReader<GenericRecord> dataFileReader) throws IllegalArgumentException,
IOException
{
Schema schema = dataFileReader.getSchema();
DatumWriter<GenericRecord> datumWriter =
new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> writer =
new DataFileWriter<GenericRecord>(datumWriter);
// Get the codec of the reader
String codecStr = dataFileReader.getMetaString(DataFileConstants.CODEC);
int level = conf.getInt("avro.mapred.deflate.level", 1);
String codecName = conf.get("avro.output.codec", codecStr);
CodecFactory factory =
codecName.equals("deflate") ? CodecFactory.deflateCodec(level)
: CodecFactory.fromString(codecName);
// Set the codec of the writer
writer.setCodec(factory);
writer.setSyncInterval(conf.getInt("avro.mapred.sync.interval",
Math.max(conf.getInt("io.file.buffer.size",
16000), 16000)));
writer.create(schema,
new Path(tempFileName).getFileSystem(conf)
.create(new Path(tempFileName)));
return writer;
}
示例14: parseChunk
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
protected final ParseWriter parseChunk(int cidx, ParseReader din, ParseWriter dout) {
// We will read GenericRecord and load them based on schema
final DatumReader<GenericRecord> datumReader = new GenericDatumReader<>();
final H2OSeekableInputAdaptor sbai = new H2OSeekableInputAdaptor(cidx, din);
DataFileReader<GenericRecord> dataFileReader = null;
int cnt = 0;
try {
// Reconstruct Avro header
DataFileStream.Header
fakeHeader = new DataFileReader<>(new SeekableByteArrayInput(this.header), datumReader).getHeader();
dataFileReader = DataFileReader.openReader(sbai, datumReader, fakeHeader, true);
Schema schema = dataFileReader.getSchema();
GenericRecord gr = new GenericData.Record(schema);
Schema.Field[] flatSchema = flatSchema(schema);
long sync = dataFileReader.previousSync();
if (sbai.chunkCnt == 0) { // Find data in first chunk
while (dataFileReader.hasNext() && dataFileReader.previousSync() == sync) {
gr = dataFileReader.next(gr);
// Write values to the output
// FIXME: what if user change input names, or ignore an input column?
write2frame(gr, _setup.getColumnNames(), flatSchema, _setup.getColumnTypes(), dout);
cnt++;
}
} // else first chunk does not contain synchronization block, so give up and let another reader to use it
} catch (Throwable e) {
e.printStackTrace();
}
Log.trace(String.format("Avro: ChunkIdx: %d read %d records, start at %d off, block count: %d, block size: %d", cidx, cnt, din.getChunkDataStart(cidx), dataFileReader.getBlockCount(), dataFileReader.getBlockSize()));
return dout;
}
示例15: getSchema
import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public static Schema getSchema(FileStatus fileStatus) throws Exception {
FsInput input = new FsInput(fileStatus.getPath(), new Configuration());
DataFileReader<Void> reader = new DataFileReader<Void>(input, new GenericDatumReader<Void>());
Schema schema = reader.getSchema();
reader.close();
input.close();
return schema;
}