本文整理汇总了Java中org.apache.avro.file.DataFileReader类的典型用法代码示例。如果您正苦于以下问题:Java DataFileReader类的具体用法?Java DataFileReader怎么用?Java DataFileReader使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
DataFileReader类属于org.apache.avro.file包,在下文中一共展示了DataFileReader类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readAvroFile
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
/**
* Reads in binary Avro-encoded entities using the schema stored in the file
* and prints them out.
*/
public static void readAvroFile(File file) throws IOException {
GenericDatumReader datum = new GenericDatumReader();
DataFileReader reader = new DataFileReader(file, datum);
GenericData.Record record = new GenericData.Record(reader.getSchema());
while (reader.hasNext()) {
reader.next(record);
System.out.println("Name " + record.get("name") + " on "
+ record.get("Meetup_date") + " attending "
+ record.get("going") + " organized by "
+ record.get("organizer") + " on " + record.get("topics"));
}
reader.close();
}
示例2: AvroFileInputStream
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public AvroFileInputStream(FileStatus status) throws IOException {
pos = 0;
buffer = new byte[0];
GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
FileContext fc = FileContext.getFileContext(new Configuration());
fileReader =
DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
Schema schema = fileReader.getSchema();
writer = new GenericDatumWriter<Object>(schema);
output = new ByteArrayOutputStream();
JsonGenerator generator =
new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
generator.setPrettyPrinter(prettyPrinter);
encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
示例3: testGenericRecord
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
@Test
public void testGenericRecord() throws IOException {
final Path outputPath = new Path(File.createTempFile("avro-output-file", "generic.avro").getAbsolutePath());
final AvroOutputFormat<GenericRecord> outputFormat = new AvroOutputFormat<>(outputPath, GenericRecord.class);
Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"user\", \"fields\": [{\"name\":\"user_name\", \"type\":\"string\"}, {\"name\":\"favorite_number\", \"type\":\"int\"}, {\"name\":\"favorite_color\", \"type\":\"string\"}]}");
outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
outputFormat.setSchema(schema);
output(outputFormat, schema);
GenericDatumReader<GenericRecord> reader = new GenericDatumReader<>(schema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<>(new File(outputPath.getPath()), reader);
while (dataFileReader.hasNext()) {
GenericRecord record = dataFileReader.next();
assertEquals(record.get("user_name").toString(), "testUser");
assertEquals(record.get("favorite_number"), 1);
assertEquals(record.get("favorite_color").toString(), "blue");
}
//cleanup
FileSystem fs = FileSystem.getLocalFileSystem();
fs.delete(outputPath, false);
}
示例4: getSchema
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public static Schema getSchema(SeekableInput input) throws IOException
{
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<GenericRecord>(input, datumReader);
Schema schema = dataFileReader.getSchema();
if (PadDefaultNullsToSchema)
{
// a list of "cloned" fields, with optional default value set to null
ArrayList<Field> paddedFields = new ArrayList<Field>();
for (Field field: schema.getFields())
{
// should this field be padded?
boolean needsNullPadding = (field.schema() != null) // the field has nested schema
&& (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
&& (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type
JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();
Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
paddedFields.add(f);
}
schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
schema.setFields(paddedFields);
}
return schema;
}
示例5: initialize
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
private void initialize() throws IOException, NoSuchAlgorithmException {
SeekableResettableInputBridge in = new SeekableResettableInputBridge(ris);
long pos = in.tell();
in.seek(0L);
fileReader = new DataFileReader<GenericRecord>(in,
new GenericDatumReader<GenericRecord>());
fileReader.sync(pos);
schema = fileReader.getSchema();
datumWriter = new GenericDatumWriter(schema);
out = new ByteArrayOutputStream();
encoder = EncoderFactory.get().binaryEncoder(out, encoder);
schemaHash = SchemaNormalization.parsingFingerprint("CRC-64-AVRO", schema);
schemaHashString = Hex.encodeHexString(schemaHash);
}
示例6: testFirstUnderscoreInColumnName
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public void testFirstUnderscoreInColumnName() throws IOException {
String [] names = { "_NAME" };
String [] types = { "INT" };
String [] vals = { "1987" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "__NAME", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("__NAME", 1987, record1.get("__NAME"));
}
示例7: testNonstandardCharactersInColumnName
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public void testNonstandardCharactersInColumnName() throws IOException {
String [] names = { "avro\uC3A11" };
String [] types = { "INT" };
String [] vals = { "1987" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "AVRO1", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("AVRO1", 1987, record1.get("AVRO1"));
}
示例8: testNonIdentCharactersInColumnName
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public void testNonIdentCharactersInColumnName() throws IOException {
String [] names = { "test_a-v+r/o" };
String [] types = { "INT" };
String [] vals = { "2015" };
createTableWithColTypesAndNames(names, types, vals);
runImport(getOutputArgv(true, null));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
Schema schema = reader.getSchema();
assertEquals(Schema.Type.RECORD, schema.getType());
List<Field> fields = schema.getFields();
assertEquals(types.length, fields.size());
checkField(fields.get(0), "TEST_A_V_R_O", Type.INT);
GenericRecord record1 = reader.next();
assertEquals("TEST_A_V_R_O", 2015, record1.get("TEST_A_V_R_O"));
}
示例9: testBlobAvroImportInline
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
/** Import blob data that is smaller than inline lob limit. Blob data
* should be saved as Avro bytes.
* @throws IOException
* @throws SQLException
*/
public void testBlobAvroImportInline() throws IOException, SQLException {
String [] types = { getBlobType() };
String expectedVal = "This is short BLOB data";
String [] vals = { getBlobInsertStr(expectedVal) };
createTableWithColTypes(types, vals);
runImport(getArgv());
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
GenericRecord record = reader.next();
// Verify that blob data is imported as Avro bytes.
ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
String returnVal = new String(buf.array());
assertEquals(getColName(0), expectedVal, returnVal);
}
示例10: testBlobCompressedAvroImportInline
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
/**
* Import blob data that is smaller than inline lob limit and compress with
* deflate codec. Blob data should be encoded and saved as Avro bytes.
* @throws IOException
* @throws SQLException
*/
public void testBlobCompressedAvroImportInline()
throws IOException, SQLException {
String [] types = { getBlobType() };
String expectedVal = "This is short BLOB data";
String [] vals = { getBlobInsertStr(expectedVal) };
createTableWithColTypes(types, vals);
runImport(getArgv("--compression-codec", CodecMap.DEFLATE));
Path outputFile = new Path(getTablePath(), "part-m-00000.avro");
DataFileReader<GenericRecord> reader = read(outputFile);
GenericRecord record = reader.next();
// Verify that the data block of the Avro file is compressed with deflate
// codec.
assertEquals(CodecMap.DEFLATE,
reader.getMetaString(DataFileConstants.CODEC));
// Verify that all columns are imported correctly.
ByteBuffer buf = (ByteBuffer) record.get(getColName(0));
String returnVal = new String(buf.array());
assertEquals(getColName(0), expectedVal, returnVal);
}
示例11: deserUserCompile
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public void deserUserCompile(){
// Deserialize Users from disk
DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
DataFileReader<User> dataFileReader = null;
User user = null;
try {
dataFileReader = new DataFileReader<User>(
new File("/Users/a/Desktop/tmp/users.avro"),
userDatumReader);
while (dataFileReader.hasNext()) {
// Reuse user object by passing it to next(). This saves us from
// allocating and garbage collecting many objects for files with
// many items.
user = dataFileReader.next(user);
System.out.println(user);
}
} catch (IOException e) {
e.printStackTrace();
}
}
示例12: MemberInfoDynDeser
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
/**
* 动态反序列:通过Schema文件进行动态反序列化操作
*
* @throws IOException
*/
public void MemberInfoDynDeser() throws IOException {
// 1.schema文件解析
Parser parser = new Parser();
Schema mSchema = parser.parse(this.getClass().getResourceAsStream("/Members.avsc"));
// 2.构建数据读对象
DatumReader<GenericRecord> mGr = new SpecificDatumReader<GenericRecord>(mSchema);
DataFileReader<GenericRecord> mDfr = new DataFileReader<GenericRecord>(new File("/Users/a/Desktop/tmp/members.avro"), mGr);
// 3.从序列化文件中进行数据反序列化取出数据
GenericRecord gr = null;
while (mDfr.hasNext()) {
gr = mDfr.next();
System.err.println("deser data:" + gr.toString());
}
mDfr.close();
System.out.println("Dyn Builder Ser Start Complete.");
}
示例13: main
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public static void main(String[] args) throws IOException
{
String filename = args[0] ;
File file=new File(filename) ;
DatumReader<GenericRecord> reader= new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader=new DataFileReader<GenericRecord>(file,reader);
while (dataFileReader.hasNext())
{
GenericRecord result=dataFileReader.next();
String output = String.format("%s %s %s %f",
result.get("sighting_date"), result.get("city"), result.get("shape"), result.get("duration")) ;
System.out.println(output) ;
}
}
示例14: main
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
public static void main(String[] args) throws IOException
{
String filename = args[0] ;
File file=new File(filename) ;
DatumReader<GenericRecord> reader= new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader=new DataFileReader<GenericRecord>(file,reader);
while (dataFileReader.hasNext())
{
GenericRecord result=dataFileReader.next();
String output = String.format("%s %d",
result.get("shape"), result.get("count")) ;
System.out.println(output) ;
}
}
示例15: initReader
import org.apache.avro.file.DataFileReader; //导入依赖的package包/类
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}