本文整理匯總了Java中org.apache.avro.file.SeekableInput類的典型用法代碼示例。如果您正苦於以下問題:Java SeekableInput類的具體用法?Java SeekableInput怎麽用?Java SeekableInput使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
SeekableInput類屬於org.apache.avro.file包,在下文中一共展示了SeekableInput類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: getSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
public static Schema getSchema(SeekableInput input) throws IOException
{
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader =
new DataFileReader<GenericRecord>(input, datumReader);
Schema schema = dataFileReader.getSchema();
if (PadDefaultNullsToSchema)
{
// a list of "cloned" fields, with optional default value set to null
ArrayList<Field> paddedFields = new ArrayList<Field>();
for (Field field: schema.getFields())
{
// should this field be padded?
boolean needsNullPadding = (field.schema() != null) // the field has nested schema
&& (field.schema().getType().equals(Type.UNION)) // the nested schema is UNION
&& (field.schema().getTypes().get(0).getType().equals(Type.NULL)); // the first element of union is NULL type
JsonNode defValue = needsNullPadding ? NullNode.getInstance() : field.defaultValue();
Field f = new Field(field.name(), field.schema(), field.doc(), defValue);
paddedFields.add(f);
}
schema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
schema.setFields(paddedFields);
}
return schema;
}
示例2: getParquetSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
private String getParquetSchema(String source) throws IOException {
Formats.Format format;
try (SeekableInput in = openSeekable(source)) {
format = Formats.detectFormat((InputStream) in);
in.seek(0);
switch (format) {
case PARQUET:
return new ParquetFileReader(
getConf(), qualifiedPath(source), ParquetMetadataConverter.NO_FILTER)
.getFileMetaData().getSchema().toString();
default:
throw new IllegalArgumentException(String.format(
"Could not get a Parquet schema for format %s: %s", format, source));
}
}
}
示例3: initReader
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
DatumReader<E> datumReader;
if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
datumReader = new GenericDatumReader<E>();
} else {
datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
}
if (LOG.isInfoEnabled()) {
LOG.info("Opening split {}", split);
}
SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
}
end = split.getStart() + split.getLength();
recordsReadSinceLastSync = 0;
return dataFileReader;
}
示例4: getSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("avro file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
String codec = reader.getMetaString("avro.codec");
long record_count = reader.getBlockCount();
String schemaString = reader.getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), codec, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例5: getSampleData
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
Iterator<GenericRecord> iter = reader.iterator();
int count = 0;
List<Object> list = new ArrayList<Object>();
//JSONArray list = new JSONArray();
while (iter.hasNext() && count < 10) {
// TODO handle out of memory error
list.add(iter.next().toString().replaceAll("[\\n\\r\\p{C}]", "").replaceAll("\"", "\\\""));
count++;
}
SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
return sampleDataRecord;
}
示例6: getSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("parquet file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();
String schemaString = reader.read().getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
// TODO set codec
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), null, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例7: createDataFileReader
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
private DataFileReader<GenericRecord> createDataFileReader(String filename,
boolean localFS) throws IOException
{
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader;
if (localFS)
{
dataFileReader =
new DataFileReader<GenericRecord>(new File(filename), datumReader);
}
else
{
Path path = new Path(filename);
SeekableInput input = new FsInput(path, conf);
dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
}
return dataFileReader;
}
示例8: init
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
/**
* Initializes the AvroScanner.
*/
@Override
public void init() throws IOException {
if (targets == null) {
targets = schema.toArray();
}
prepareProjection(targets);
outTuple = new VTuple(projectionMap.length);
Schema avroSchema = AvroUtil.getAvroSchema(meta, conf);
avroFields = avroSchema.getFields();
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
SeekableInput input = new FsInput(fragment.getPath(), conf);
dataFileReader = new DataFileReader<>(input, datumReader);
super.init();
}
示例9: init
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
/**
* Initializes the AvroScanner.
*/
@Override
public void init() throws IOException {
if (targets == null) {
targets = schema.toArray();
}
prepareProjection(targets);
avroSchema = AvroUtil.getAvroSchema(meta, conf);
avroFields = avroSchema.getFields();
DatumReader<GenericRecord> datumReader =
new GenericDatumReader<GenericRecord>(avroSchema);
SeekableInput input = new FsInput(fragment.getPath(), conf);
dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
super.init();
}
示例10: getAvroSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
protected Schema getAvroSchema(String source) throws IOException {
Formats.Format format;
try (SeekableInput in = openSeekable(source)) {
format = Formats.detectFormat((InputStream) in);
in.seek(0);
switch (format) {
case PARQUET:
return Schemas.fromParquet(getConf(), qualifiedURI(source));
case AVRO:
return Schemas.fromAvro(open(source));
case TEXT:
if (source.endsWith("avsc")) {
return Schemas.fromAvsc(open(source));
} else if (source.endsWith("json")) {
return Schemas.fromJSON("json", open(source));
}
default:
}
throw new IllegalArgumentException(String.format(
"Could not determine file format of %s.", source));
}
}
示例11: readAndCheckResultsFromHdfs
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
FileStatus[] statuses = fileSystem.globStatus(logsPath);
List<TestLogData> resultTestLogs = new ArrayList<>();
Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
for (FileStatus status : statuses) {
FileReader<GenericRecord> fileReader = null;
try {
SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
fileReader = DataFileReader.openReader(input, datumReader);
for (GenericRecord record : fileReader) {
RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
Assert.assertEquals(header, recordHeader);
TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
resultTestLogs.add(recordData);
}
} finally {
IOUtils.closeQuietly(fileReader);
}
}
Assert.assertEquals(testLogs, resultTestLogs);
}
示例12: open
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public void open(FileInputSplit split) throws IOException {
super.open(split);
DatumReader<E> datumReader;
if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
datumReader = new SpecificDatumReader<E>(avroValueType);
} else {
datumReader = new ReflectDatumReader<E>(avroValueType);
}
LOG.info("Opening split " + split);
SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength());
dataFileReader = DataFileReader.openReader(in, datumReader);
dataFileReader.sync(split.getStart());
}
示例13: getSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
LOG.info("avro file path : " + targetFilePath.toUri().getPath());
try {
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
String codec = reader.getMetaString("avro.codec");
long record_count = reader.getBlockCount();
String schemaString = reader.getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
System.out.println("the schema string is: " + schemaString);
System.out.println("the abstract path is: " + abstractPath);
FileStatus fstat = fs.getFileStatus(targetFilePath);
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), codec, storage, "");
reader.close();
sin.close();
LOG.info("Avro file datasetjsonrecorc get success, it is : " + datasetJsonRecord);
return datasetJsonRecord;
} catch (Exception e) {
LOG.info("AvroAnalyzer get datasetjson failure, and exception is " + e.getMessage());
return null;
}
}
示例14: getSampleData
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
SampleDataRecord sampleDataRecord = null;
try {
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
Iterator<GenericRecord> iter = reader.iterator();
int count = 0;
List<Object> list = new ArrayList<Object>();
//JSONArray list = new JSONArray();
while (iter.hasNext() && count < 10) {
// TODO handle out of memory error
list.add(iter.next().toString().replaceAll("[\\n\\r\\p{C}]", "").replaceAll("\"", "\\\""));
count++;
}
// for debug
System.out.println("avro arraylist is: " + list.toString());
sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
return sampleDataRecord;
} catch (Exception e) {
LOG.info("AvroAnalyzer get sampleDataRecord failure and exception is " + e.getMessage());
}
return sampleDataRecord;
}
示例15: getSchema
import org.apache.avro.file.SeekableInput; //導入依賴的package包/類
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
SeekableInput input = new FsInput(path, conf);
DatumReader<Object> reader = new GenericDatumReader<>();
FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
org.apache.avro.Schema schema = fileReader.getSchema();
fileReader.close();
return avroData.toConnectSchema(schema);
}