本文整理汇总了Java中org.apache.avro.mapred.FsInput类的典型用法代码示例。如果您正苦于以下问题:Java FsInput类的具体用法?Java FsInput怎么用?Java FsInput使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FsInput类属于org.apache.avro.mapred包,在下文中一共展示了FsInput类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: readIndividualsFromFile
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public static List<IndividualWrapper<Individual, FitnessValue>> readIndividualsFromFile(
Path filePath, Configuration configuration) throws IOException {
List<IndividualWrapper<Individual, FitnessValue>> result =
new ArrayList<IndividualWrapper<Individual, FitnessValue>>();
SeekableInput seekableFileInput = new FsInput(filePath, configuration);
ReflectData reflectData = new ReflectData(configuration.getClassLoader());
DatumReader<IndividualWrapper<Individual, FitnessValue>> datumReader = new ReflectDatumReader<IndividualWrapper<Individual, FitnessValue>>(reflectData);
DataFileReader<IndividualWrapper<Individual, FitnessValue>> avroFileReader =
new DataFileReader<IndividualWrapper<Individual, FitnessValue>>(seekableFileInput, datumReader);
for (IndividualWrapper<Individual, FitnessValue> individualWrapper : avroFileReader)
result.add(individualWrapper);
avroFileReader.close();
return result;
}
示例2: getDirectorySchema
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
* Get the latest avro schema for a directory
* @param directory the input dir that contains avro files
* @param conf configuration
* @param latest true to return latest schema, false to return oldest schema
* @return the latest/oldest schema in the directory
* @throws IOException
*/
public static Schema getDirectorySchema(Path directory, Configuration conf, boolean latest) throws IOException {
Schema schema = null;
Closer closer = Closer.create();
try {
List<FileStatus> files = getDirectorySchemaHelper(directory, FileSystem.get(conf));
if (files == null || files.size() == 0) {
LOG.warn("There is no previous avro file in the directory: " + directory);
} else {
FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
LOG.info("Path to get the avro schema: " + file);
FsInput fi = new FsInput(file.getPath(), conf);
GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<GenericRecord>();
schema = closer.register(new DataFileReader<GenericRecord>(fi, genReader)).getSchema();
}
} catch (IOException ioe) {
throw new IOException("Cannot get the schema for directory " + directory, ioe);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
return schema;
}
示例3: getAvroFile
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException {
try {
if (!fs.exists(new Path(file))) {
LOGGER.warn(file + " does not exist.");
return null;
}
if (state.getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER,
ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) {
return new DataFileReader<GenericRecord>(new ProxyFsInput(new Path(file), this.fs),
new GenericDatumReader<GenericRecord>());
} else {
return new DataFileReader<GenericRecord>(new FsInput(new Path(file), fs.getConf()),
new GenericDatumReader<GenericRecord>());
}
} catch (IOException e) {
throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e);
}
}
示例4: TestExtractor
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public TestExtractor(WorkUnitState workUnitState) {
//super(workUnitState);
Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
Path sourceFile = new Path(workUnitState.getWorkunit().getProp(SOURCE_FILE_KEY));
LOG.info("Reading from source file " + sourceFile);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
try {
FileSystem fs = FileSystem
.get(URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)),
new Configuration());
fs.makeQualified(sourceFile);
this.dataFileReader =
new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader);
} catch (IOException ioe) {
LOG.error("Failed to read the source file " + sourceFile, ioe);
}
}
示例5: getSchema
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("avro file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
String codec = reader.getMetaString("avro.codec");
long record_count = reader.getBlockCount();
String schemaString = reader.getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), codec, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例6: getSampleData
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
throws IOException {
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
DataFileReader<GenericRecord> reader =
new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
Iterator<GenericRecord> iter = reader.iterator();
int count = 0;
List<Object> list = new ArrayList<Object>();
//JSONArray list = new JSONArray();
while (iter.hasNext() && count < 10) {
// TODO handle out of memory error
list.add(iter.next().toString().replaceAll("[\\n\\r\\p{C}]", "").replaceAll("\"", "\\\""));
count++;
}
SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);
return sampleDataRecord;
}
示例7: getSchema
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
throws IOException {
System.out.println("parquet file path : " + targetFilePath.toUri().getPath());
SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();
String schemaString = reader.read().getSchema().toString();
String storage = STORAGE_TYPE;
String abstractPath = targetFilePath.toUri().getPath();
FileStatus fstat = fs.getFileStatus(targetFilePath);
// TODO set codec
DatasetJsonRecord datasetJsonRecord =
new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
fstat.getPermission().toString(), null, storage, "");
reader.close();
sin.close();
return datasetJsonRecord;
}
示例8: createDataFileReader
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private DataFileReader<GenericRecord> createDataFileReader(String filename,
boolean localFS) throws IOException
{
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
DataFileReader<GenericRecord> dataFileReader;
if (localFS)
{
dataFileReader =
new DataFileReader<GenericRecord>(new File(filename), datumReader);
}
else
{
Path path = new Path(filename);
SeekableInput input = new FsInput(path, conf);
dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
}
return dataFileReader;
}
示例9: init
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
* Initializes the AvroScanner.
*/
@Override
public void init() throws IOException {
if (targets == null) {
targets = schema.toArray();
}
prepareProjection(targets);
outTuple = new VTuple(projectionMap.length);
Schema avroSchema = AvroUtil.getAvroSchema(meta, conf);
avroFields = avroSchema.getFields();
DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
SeekableInput input = new FsInput(fragment.getPath(), conf);
dataFileReader = new DataFileReader<>(input, datumReader);
super.init();
}
示例10: init
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
* Initializes the AvroScanner.
*/
@Override
public void init() throws IOException {
if (targets == null) {
targets = schema.toArray();
}
prepareProjection(targets);
avroSchema = AvroUtil.getAvroSchema(meta, conf);
avroFields = avroSchema.getFields();
DatumReader<GenericRecord> datumReader =
new GenericDatumReader<GenericRecord>(avroSchema);
SeekableInput input = new FsInput(fragment.getPath(), conf);
dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
super.init();
}
示例11: getDirectorySchema
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
* Get the latest avro schema for a directory
* @param directory the input dir that contains avro files
* @param fs the {@link FileSystem} for the given directory.
* @param latest true to return latest schema, false to return oldest schema
* @return the latest/oldest schema in the directory
* @throws IOException
*/
public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException {
Schema schema = null;
try (Closer closer = Closer.create()) {
List<FileStatus> files = getDirectorySchemaHelper(directory, fs);
if (files == null || files.size() == 0) {
LOG.warn("There is no previous avro file in the directory: " + directory);
} else {
FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
LOG.debug("Path to get the avro schema: " + file);
FsInput fi = new FsInput(file.getPath(), fs.getConf());
GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>();
schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema();
}
} catch (IOException ioe) {
throw new IOException("Cannot get the schema for directory " + directory, ioe);
}
return schema;
}
示例12: getAvroFile
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
* Returns an {@link DataFileReader} to the specified avro file.
* <p>
* Note: It is the caller's responsibility to close the returned {@link DataFileReader}.
* </p>
*
* @param file The path to the avro file to open.
* @return A {@link DataFileReader} for the specified avro file.
* @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file.
*/
public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException {
try {
if (!this.getFileSystem().exists(new Path(file))) {
LOGGER.warn(file + " does not exist.");
return null;
}
if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER,
ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) {
return new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()),
new GenericDatumReader<GenericRecord>());
}
return new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()),
new GenericDatumReader<GenericRecord>());
} catch (IOException e) {
throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e);
}
}
示例13: TestExtractor
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public TestExtractor(WorkUnitState workUnitState) {
//super(workUnitState);
Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
Path sourceFile = new Path(workUnitState.getWorkunit().getProp(TestSource.SOURCE_FILE_KEY));
LOG.info("Reading from source file " + sourceFile);
DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
try {
FileSystem fs = FileSystem
.get(URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)),
new Configuration());
fs.makeQualified(sourceFile);
this.dataFileReader =
new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader);
} catch (IOException ioe) {
LOG.error("Failed to read the source file " + sourceFile, ioe);
}
}
示例14: initCurrentFile
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private void initCurrentFile() throws IOException {
if (reader != null) {
reader.close();
}
LOG.info("Initializing {}:{}+{}",
new Object[] { split.getPath(currentFile),
split.getOffset(currentFile), split.getLength(currentFile) });
GenericDatumReader<Record> datumReader = new GenericDatumReader<Record>(
Schemas.getSchema("logBlock"));
reader = new DataFileReader<Record>(new FsInput(split.getPath(currentFile),
job), datumReader);
datumReader.setExpected(Schemas.getSchema("logBlock"));
datumReader.setSchema(reader.getSchema());
long size = split.getLength(currentFile);
start = split.getOffset(currentFile);
end = start + size;
reader.sync(start);
}
示例15: readAndCheckResultsFromHdfs
import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
FileStatus[] statuses = fileSystem.globStatus(logsPath);
List<TestLogData> resultTestLogs = new ArrayList<>();
Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
for (FileStatus status : statuses) {
FileReader<GenericRecord> fileReader = null;
try {
SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
fileReader = DataFileReader.openReader(input, datumReader);
for (GenericRecord record : fileReader) {
RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
Assert.assertEquals(header, recordHeader);
TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
resultTestLogs.add(recordData);
}
} finally {
IOUtils.closeQuietly(fileReader);
}
}
Assert.assertEquals(testLogs, resultTestLogs);
}