本文整理匯總了Java中org.apache.avro.file.DataFileStream.getSchema方法的典型用法代碼示例。如果您正苦於以下問題:Java DataFileStream.getSchema方法的具體用法?Java DataFileStream.getSchema怎麽用?Java DataFileStream.getSchema使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類org.apache.avro.file.DataFileStream
的用法示例。
在下文中一共展示了DataFileStream.getSchema方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。
示例1: before
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
TimeUnit.DAYS, "test");
config.setTimeColumnName("daysSinceEpoch");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
示例2: getSchema
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* This method is called by {@link #getAvroSchema}. The default implementation
* returns the schema of an avro file; or the schema of the last file in a first-level
* directory (it does not contain sub-directories).
*
* @param path path of a file or first level directory
* @param fs file system
* @return avro schema
* @throws IOException
*/
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
if (lastFile == null) {
return null;
}
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
InputStream hdfsInputStream = fs.open(lastFile);
DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
Schema ret = avroDataStream.getSchema();
avroDataStream.close();
return ret;
}
示例3: before
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
public static void before() throws Exception {
final String filePath = TestUtils.getFileFromResourceUrl(BlocksTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
// System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
TimeUnit.DAYS, "test");
config.setTimeColumnName("daysSinceEpoch");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
示例4: verifyOutputAvroFiles
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
private void verifyOutputAvroFiles(FileSystem fs, Configuration conf, String dir, String prefix,
List<String> bodies) throws IOException {
int found = 0;
int expected = bodies.size();
for (String outputFile : getAllFiles(dir)) {
String name = (new File(outputFile)).getName();
if (name.startsWith(prefix)) {
FSDataInputStream input = fs.open(new Path(outputFile));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(input, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
ByteBuffer body = (ByteBuffer) record.get("body");
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode(body).toString();
LOG.debug("Removing event: {}", bodyStr);
bodies.remove(bodyStr);
found++;
}
avroStream.close();
input.close();
}
}
Assert.assertTrue("Found = " + found + ", Expected = " +
expected + ", Left = " + bodies.size() + " " + bodies,
bodies.size() == 0);
}
示例5: testGzipDurabilityWithSerializer
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Test
public void testGzipDurabilityWithSerializer() throws Exception {
Context context = new Context();
context.put("serializer", "AVRO_EVENT");
HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
writer.configure(context);
writer.open(fileURI, factory.getCodec(new Path(fileURI)),
SequenceFile.CompressionType.BLOCK);
String[] bodies = { "yarf!", "yarfing!" };
writeBodies(writer, bodies);
int found = 0;
int expected = bodies.length;
List<String> expectedBodies = Lists.newArrayList(bodies);
GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> avroStream =
new DataFileStream<GenericRecord>(cmpIn, reader);
GenericRecord record = new GenericData.Record(avroStream.getSchema());
while (avroStream.hasNext()) {
avroStream.next(record);
CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
String bodyStr = decoder.decode((ByteBuffer) record.get("body"))
.toString();
expectedBodies.remove(bodyStr);
found++;
}
avroStream.close();
cmpIn.close();
Assert.assertTrue("Found = " + found + ", Expected = " + expected
+ ", Left = " + expectedBodies.size() + " " + expectedBodies,
expectedBodies.size() == 0);
}
示例6: init
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public void init(Map<String, String> props, Schema indexingSchema, String kafkaTopicName) throws Exception {
// Load Avro schema
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
_avroSchema = reader.getSchema();
reader.close();
_rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema);
_reader = new GenericDatumReader<GenericData.Record>(_avroSchema);
}
示例7: extractSchemaFromAvro
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* gives back a basic pinot schema object with field type as unknown and not aware of whether SV or MV
* this is just a util method for testing
* @param avroFile
* @return
* @throws FileNotFoundException
* @throws IOException
*/
public static Schema extractSchemaFromAvro(File avroFile) throws IOException {
final Schema schema = new Schema();
final DataFileStream<GenericRecord> dataStreamReader = getAvroReader(avroFile);
final org.apache.avro.Schema avroSchema = dataStreamReader.getSchema();
dataStreamReader.close();
return getPinotSchemaFromAvroSchema(avroSchema, getDefaultFieldTypes(avroSchema), TimeUnit.DAYS);
}
示例8: before
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@BeforeClass
public static void before() throws Exception {
final String filePath =
TestUtils.getFileFromResourceUrl(DictionariesTest.class.getClassLoader().getResource(AVRO_DATA));
if (INDEX_DIR.exists()) {
FileUtils.deleteQuietly(INDEX_DIR);
}
System.out.println(INDEX_DIR.getAbsolutePath());
final SegmentIndexCreationDriver driver = SegmentCreationDriverFactory.get(null);
final SegmentGeneratorConfig config =
SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "weeksSinceEpochSunday",
TimeUnit.DAYS, "test");
config.setTimeColumnName("weeksSinceEpochSunday");
driver.init(config);
driver.build();
final DataFileStream<GenericRecord> avroReader = AvroUtils.getAvroReader(new File(filePath));
final org.apache.avro.Schema avroSchema = avroReader.getSchema();
final String[] columns = new String[avroSchema.getFields().size()];
int i = 0;
for (final Field f : avroSchema.getFields()) {
columns[i] = f.name();
i++;
}
}
示例9: getSchemaFromFile
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Loads the schema from an Avro data file.
*
* @param conf The JobConf.
* @param path The path to the data file.
* @return The schema read from the data file's metadata.
* @throws IOException
*/
public static Schema getSchemaFromFile(JobConf conf, Path path) throws IOException
{
FileSystem fs = path.getFileSystem(new Configuration());
FSDataInputStream dataInputStream = fs.open(path);
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
return dataFileStream.getSchema();
}
示例10: getSchemaFromFile
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Gets the schema from a given Avro data file.
*
* @param fs the filesystem
* @param path path to get schema from
* @return The schema read from the data file's metadata.
* @throws IOException IOException
*/
public static Schema getSchemaFromFile(FileSystem fs, Path path) throws IOException
{
FSDataInputStream dataInputStream = fs.open(path);
DatumReader <GenericRecord> reader = new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> dataFileStream = new DataFileStream<GenericRecord>(dataInputStream, reader);
try
{
return dataFileStream.getSchema();
}
finally
{
dataFileStream.close();
}
}
示例11: getAvroSchema
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Reads the avro schema at the specified location.
* @param p Location of file
* @param job Hadoop job object
* @return an Avro Schema object derived from the specified file
* @throws IOException
*
*/
public Schema getAvroSchema(final Path p, final Job job)
throws IOException {
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
FileSystem fs = FileSystem.get(p.toUri(), job.getConfiguration());
FileStatus[] statusArray = fs.globStatus(p);
if (statusArray == null) {
throw new IOException("Path " + p.toString() + " does not exist.");
}
if (statusArray.length == 0) {
throw new IOException("No path matches pattern " + p.toString());
}
Path filePath = depthFirstSearchForFile(statusArray, fs);
if (filePath == null) {
throw new IOException("No path matches pattern " + p.toString());
}
InputStream hdfsInputStream = fs.open(filePath);
DataFileStream<Object> avroDataStream =
new DataFileStream<Object>(hdfsInputStream, avroReader);
Schema s = avroDataStream.getSchema();
avroDataStream.close();
return s;
}
示例12: getSchema
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* This method is called by {@link #getAvroSchema}. The default implementation
* returns the schema of an avro file; or the schema of the last file in a first-level
* directory (it does not contain sub-directories).
*
* @param path path of a file or first level directory
* @param fs file system
* @return avro schema
* @throws IOException
*/
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
InputStream hdfsInputStream = fs.open(lastFile);
DataFileStream<Object> avroDataStream = new DataFileStream<Object>(hdfsInputStream, avroReader);
Schema ret = avroDataStream.getSchema();
avroDataStream.close();
return ret;
}
示例13: fromAvro
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
public static Schema fromAvro(InputStream in) throws IOException {
GenericDatumReader<GenericRecord> datumReader =
new GenericDatumReader<GenericRecord>();
DataFileStream<GenericRecord> stream = null;
boolean threw = true;
try {
stream = new DataFileStream<>(in, datumReader);
Schema schema = stream.getSchema();
threw = false;
return schema;
} finally {
Closeables.close(stream, threw);
}
}
示例14: getAvroSchema
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
/**
* Reads the avro schemas at the specified location.
* @param p Location of file
* @param job Hadoop job object
* @return an Avro Schema object derived from the specified file
* @throws IOException
*
*/
public Schema getAvroSchema(final Path[] p, final Job job) throws IOException {
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
ArrayList<FileStatus> statusList = new ArrayList<FileStatus>();
FileSystem fs = FileSystem.get(p[0].toUri(), job.getConfiguration());
for (Path temp : p) {
for (FileStatus tempf : fs.globStatus(temp)) {
statusList.add(tempf);
}
}
FileStatus[] statusArray = (FileStatus[]) statusList
.toArray(new FileStatus[statusList.size()]);
if (statusArray == null) {
throw new IOException("Path " + p.toString() + " does not exist.");
}
if (statusArray.length == 0) {
throw new IOException("No path matches pattern " + p.toString());
}
Path filePath = Utils.depthFirstSearchForFile(statusArray, fs);
if (filePath == null) {
throw new IOException("No path matches pattern " + p.toString());
}
InputStream hdfsInputStream = fs.open(filePath);
DataFileStream<Object> avroDataStream = new DataFileStream<Object>(
hdfsInputStream, avroReader);
Schema s = avroDataStream.getSchema();
avroDataStream.close();
return s;
}
示例15: init
import org.apache.avro.file.DataFileStream; //導入方法依賴的package包/類
@Override
public void init(Map<String, String> props, Schema indexingSchema, String kafkaTopicName) throws Exception {
// Load Avro schema
DataFileStream<GenericRecord> reader = AvroUtils.getAvroReader(avroFile);
_avroSchema = reader.getSchema();
reader.close();
_rowGenerator = new AvroRecordToPinotRowGenerator(indexingSchema);
_reader = new GenericDatumReader<>(_avroSchema);
}