当前位置: 首页>>代码示例>>Java>>正文


Java DataFileReader.openReader方法代码示例

本文整理汇总了Java中org.apache.avro.file.DataFileReader.openReader方法的典型用法代码示例。如果您正苦于以下问题:Java DataFileReader.openReader方法的具体用法?Java DataFileReader.openReader怎么用?Java DataFileReader.openReader使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.avro.file.DataFileReader的用法示例。


在下文中一共展示了DataFileReader.openReader方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: AvroFileInputStream

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  FileContext fc = FileContext.getFileContext(new Configuration());
  fileReader =
    DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:18,代码来源:Display.java

示例2: initReader

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private DataFileReader<E> initReader(FileInputSplit split) throws IOException {
	DatumReader<E> datumReader;

	if (org.apache.avro.generic.GenericRecord.class == avroValueType) {
		datumReader = new GenericDatumReader<E>();
	} else {
		datumReader = org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)
			? new SpecificDatumReader<E>(avroValueType) : new ReflectDatumReader<E>(avroValueType);
	}
	if (LOG.isInfoEnabled()) {
		LOG.info("Opening split {}", split);
	}

	SeekableInput in = new FSDataInputStreamWrapper(stream, split.getPath().getFileSystem().getFileStatus(split.getPath()).getLen());
	DataFileReader<E> dataFileReader = (DataFileReader) DataFileReader.openReader(in, datumReader);

	if (LOG.isDebugEnabled()) {
		LOG.debug("Loaded SCHEMA: {}", dataFileReader.getSchema());
	}

	end = split.getStart() + split.getLength();
	recordsReadSinceLastSync = 0;
	return dataFileReader;
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:25,代码来源:AvroInputFormat.java

示例3: testDeserializeToSpecificType

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<User>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		Assert.assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray())))
		{
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:42,代码来源:AvroRecordInputFormatTest.java

示例4: AvroFileInputStream

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
public AvroFileInputStream(FileStatus status) throws IOException {
  pos = 0;
  buffer = new byte[0];
  GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
  fileReader =
    DataFileReader.openReader(new File(status.getPath().toUri()), reader);
  Schema schema = fileReader.getSchema();
  writer = new GenericDatumWriter<Object>(schema);
  output = new ByteArrayOutputStream();
  JsonGenerator generator =
    new JsonFactory().createJsonGenerator(output, JsonEncoding.UTF8);
  MinimalPrettyPrinter prettyPrinter = new MinimalPrettyPrinter();
  prettyPrinter.setRootValueSeparator(System.getProperty("line.separator"));
  generator.setPrettyPrinter(prettyPrinter);
  encoder = EncoderFactory.get().jsonEncoder(schema, generator);
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:17,代码来源:Display.java

示例5: getSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 *
 * @param file
 * @return
 * @throws IOException
 */
public Schema getSchema(File file) throws IOException {
	Schema schema = null;
	FileReader<IndexedRecord> fileReader = null;
	try {
		DatumReader<IndexedRecord> reader = new GenericDatumReader<>();
		fileReader = DataFileReader.openReader(file, reader);
		schema = fileReader.getSchema();
	}
	finally {
		if (fileReader != null) {
			fileReader.close();
		}
	}
	return schema;
}
 
开发者ID:conversant,项目名称:mara,代码行数:22,代码来源:AvroUnitTestHelper.java

示例6: readAndCheckResultsFromHdfs

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
  Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
  FileStatus[] statuses = fileSystem.globStatus(logsPath);
  List<TestLogData> resultTestLogs = new ArrayList<>();
  Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
  for (FileStatus status : statuses) {
    FileReader<GenericRecord> fileReader = null;
    try {
      SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
      DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
      fileReader = DataFileReader.openReader(input, datumReader);
      for (GenericRecord record : fileReader) {
        RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
        Assert.assertEquals(header, recordHeader);
        TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
        resultTestLogs.add(recordData);
      }
    } finally {
      IOUtils.closeQuietly(fileReader);
    }
  }
  Assert.assertEquals(testLogs, resultTestLogs);
}
 
开发者ID:kaaproject,项目名称:kaa,代码行数:24,代码来源:TestKaaHdfsSink.java

示例7: decodePojo

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private <T> T decodePojo(byte[] encodedData, Class type) {

    log.debug("Reflecting decode of {}, consider registering a converter", type);

    DatumReader<T> userDatumReader = RD.createDatumReader(RD.getSchema(type));

    try {
      FileReader<T> ts = DataFileReader.openReader(new SeekableByteArrayInput(encodedData), userDatumReader);
      T mine = (T) type.newInstance();

      return (T) ts.next(mine);
    } catch (IOException | IllegalAccessException | InstantiationException e) {
      e.printStackTrace();
    }

    return null;
  }
 
开发者ID:muoncore,项目名称:muon-java,代码行数:18,代码来源:AvroCodec.java

示例8: open

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public void open(FileInputSplit split) throws IOException {
	super.open(split);

	DatumReader<E> datumReader;
	if (org.apache.avro.specific.SpecificRecordBase.class.isAssignableFrom(avroValueType)) {
		datumReader = new SpecificDatumReader<E>(avroValueType);
	} else {
		datumReader = new ReflectDatumReader<E>(avroValueType);
	}
	
	LOG.info("Opening split " + split);
	
	SeekableInput in = new FSDataInputStreamWrapper(stream, (int) split.getLength());
	
	dataFileReader = DataFileReader.openReader(in, datumReader);
	dataFileReader.sync(split.getStart());
}
 
开发者ID:citlab,项目名称:vs.msc.ws14,代码行数:19,代码来源:AvroInputFormat.java

示例9: convert

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public FixFile convert(TestRun testRun, FixFile ff) throws IOException {
    byte[] bytes = IOUtils.toByteArray(ff.getContent());
    if (bytes.length == 0) {
        return ff;
    }
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    GenericDatumReader<Object> reader = new GenericDatumReader<>();
    FileReader<Object> fileReader =  DataFileReader.openReader(new SeekableByteArrayInput(bytes), reader);
    try {
        Schema schema = fileReader.getSchema();
        DatumWriter<Object> writer = new GenericDatumWriter<>(schema);
        JsonEncoder encoder = EncoderFactory.get().jsonEncoder(schema, os);

        for (Object datum : fileReader) {
            writer.write(datum, encoder);
        }
        encoder.flush();
    } finally {
        fileReader.close();
    }
    return new FixFile(new ByteArrayInputStream(os.toByteArray()));
}
 
开发者ID:collectivemedia,项目名称:celos,代码行数:24,代码来源:AvroToJsonConverter.java

示例10: getSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Schema getSchema(Configuration conf, Path path) throws IOException {
  SeekableInput input = new FsInput(path, conf);
  DatumReader<Object> reader = new GenericDatumReader<>();
  FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
  org.apache.avro.Schema schema = fileReader.getSchema();
  fileReader.close();
  return avroData.toConnectSchema(schema);
}
 
开发者ID:jiangxiluning,项目名称:kafka-connect-hdfs,代码行数:10,代码来源:AvroFileReader.java

示例11: readData

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public Collection<Object> readData(Configuration conf, Path path) throws IOException {
  ArrayList<Object> collection = new ArrayList<>();
  SeekableInput input = new FsInput(path, conf);
  DatumReader<Object> reader = new GenericDatumReader<>();
  FileReader<Object> fileReader = DataFileReader.openReader(input, reader);
  for (Object object: fileReader) {
    collection.add(object);
  }
  fileReader.close();
  return collection;
}
 
开发者ID:jiangxiluning,项目名称:kafka-connect-hdfs,代码行数:13,代码来源:AvroFileReader.java

示例12: getAvroSchema

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 * Get the schema of AVRO files stored in a directory
 */
public static Schema getAvroSchema(Path path, Configuration conf)
    throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  Path fileToTest;
  if (fs.isDirectory(path)) {
    FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
      @Override
      public boolean accept(Path p) {
        String name = p.getName();
        return !name.startsWith("_") && !name.startsWith(".");
      }
    });
    if (fileStatuses.length == 0) {
      return null;
    }
    fileToTest = fileStatuses[0].getPath();
  } else {
    fileToTest = path;
  }

  SeekableInput input = new FsInput(fileToTest, conf);
  DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
  FileReader<GenericRecord> fileReader = DataFileReader.openReader(input, reader);

  Schema result = fileReader.getSchema();
  fileReader.close();
  return result;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:32,代码来源:AvroUtil.java

示例13: initialize

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
    throws IOException, InterruptedException {
  FileSplit split = (FileSplit) genericSplit;
  Configuration conf = context.getConfiguration();
  SeekableInput in = new FsInput(split.getPath(), conf);
  DatumReader<T> datumReader = new GenericDatumReader<T>();
  this.reader = DataFileReader.openReader(in, datumReader);
  reader.sync(split.getStart());                    // sync to start
  this.start = reader.tell();
  this.end = split.getStart() + split.getLength();
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:13,代码来源:AvroRecordReader.java

示例14: checkAvroFileForLine

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
private boolean checkAvroFileForLine(FileSystem fs, Path p, List<Integer> record)
    throws IOException {
  SeekableInput in = new FsInput(p, new Configuration());
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
  FileReader<GenericRecord> reader = DataFileReader.openReader(in, datumReader);
  reader.sync(0);

  while (reader.hasNext()) {
    if (valueMatches(reader.next(), record)) {
      return true;
    }
  }

  return false;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:16,代码来源:TestMerge.java

示例15: testDeserializeToSpecificType

import org.apache.avro.file.DataFileReader; //导入方法依赖的package包/类
/**
 * This test validates proper serialization with specific (generated POJO) types.
 */
@Test
public void testDeserializeToSpecificType() throws IOException {

	DatumReader<User> datumReader = new SpecificDatumReader<User>(userSchema);

	try (FileReader<User> dataFileReader = DataFileReader.openReader(testFile, datumReader)) {
		User rec = dataFileReader.next();

		// check if record has been read correctly
		assertNotNull(rec);
		assertEquals("name not equal", TEST_NAME, rec.get("name").toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), rec.get("type_enum").toString());

		// now serialize it with our framework:
		ExecutionConfig ec = new ExecutionConfig();
		TypeInformation<User> te = TypeExtractor.createTypeInfo(User.class);

		Assert.assertEquals(AvroTypeInfo.class, te.getClass());
		TypeSerializer<User> tser = te.createSerializer(ec);

		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(out)) {
			tser.serialize(rec, outView);
		}

		User newRec;
		try (DataInputViewStreamWrapper inView = new DataInputViewStreamWrapper(
				new ByteArrayInputStream(out.toByteArray()))) {
			newRec = tser.deserialize(inView);
		}

		// check if it is still the same
		assertNotNull(newRec);
		assertEquals("name not equal", TEST_NAME, newRec.getName().toString());
		assertEquals("enum not equal", TEST_ENUM_COLOR.toString(), newRec.getTypeEnum().toString());
	}
}
 
开发者ID:axbaretto,项目名称:flink,代码行数:41,代码来源:AvroRecordInputFormatTest.java


注:本文中的org.apache.avro.file.DataFileReader.openReader方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。