当前位置: 首页>>代码示例>>Java>>正文


Java FsInput类代码示例

本文整理汇总了Java中org.apache.avro.mapred.FsInput的典型用法代码示例。如果您正苦于以下问题:Java FsInput类的具体用法?Java FsInput怎么用?Java FsInput使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


FsInput类属于org.apache.avro.mapred包,在下文中一共展示了FsInput类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: readIndividualsFromFile

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public static List<IndividualWrapper<Individual, FitnessValue>> readIndividualsFromFile(
        Path filePath, Configuration configuration) throws IOException {
    List<IndividualWrapper<Individual, FitnessValue>> result =
            new ArrayList<IndividualWrapper<Individual, FitnessValue>>();

    SeekableInput seekableFileInput = new FsInput(filePath, configuration);
    ReflectData reflectData = new ReflectData(configuration.getClassLoader());
    DatumReader<IndividualWrapper<Individual, FitnessValue>> datumReader = new ReflectDatumReader<IndividualWrapper<Individual, FitnessValue>>(reflectData);
    DataFileReader<IndividualWrapper<Individual, FitnessValue>> avroFileReader =
            new DataFileReader<IndividualWrapper<Individual, FitnessValue>>(seekableFileInput, datumReader);

    for (IndividualWrapper<Individual, FitnessValue> individualWrapper : avroFileReader)
        result.add(individualWrapper);

    avroFileReader.close();
    return result;
}
 
开发者ID:pasqualesalza,项目名称:elephant56,代码行数:18,代码来源:Driver.java

示例2: getDirectorySchema

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
 * Get the latest avro schema for a directory
 * @param directory the input dir that contains avro files
 * @param conf configuration
 * @param latest true to return latest schema, false to return oldest schema
 * @return the latest/oldest schema in the directory
 * @throws IOException
 */
public static Schema getDirectorySchema(Path directory, Configuration conf, boolean latest) throws IOException {
  Schema schema = null;
  Closer closer = Closer.create();
  try {
    List<FileStatus> files = getDirectorySchemaHelper(directory, FileSystem.get(conf));
    if (files == null || files.size() == 0) {
      LOG.warn("There is no previous avro file in the directory: " + directory);
    } else {
      FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
      LOG.info("Path to get the avro schema: " + file);
      FsInput fi = new FsInput(file.getPath(), conf);
      GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<GenericRecord>();
      schema = closer.register(new DataFileReader<GenericRecord>(fi, genReader)).getSchema();
    }
  } catch (IOException ioe) {
    throw new IOException("Cannot get the schema for directory " + directory, ioe);
  } catch (Throwable t) {
    throw closer.rethrow(t);
  } finally {
    closer.close();
  }
  return schema;
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:32,代码来源:AvroUtils.java

示例3: getAvroFile

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException {
  try {
    if (!fs.exists(new Path(file))) {
      LOGGER.warn(file + " does not exist.");
      return null;
    }
    if (state.getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER,
        ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) {
      return new DataFileReader<GenericRecord>(new ProxyFsInput(new Path(file), this.fs),
          new GenericDatumReader<GenericRecord>());
    } else {
      return new DataFileReader<GenericRecord>(new FsInput(new Path(file), fs.getConf()),
          new GenericDatumReader<GenericRecord>());
    }
  } catch (IOException e) {
    throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e);
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:19,代码来源:AvroFsHelper.java

示例4: TestExtractor

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public TestExtractor(WorkUnitState workUnitState) {
  //super(workUnitState);
  Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
  Path sourceFile = new Path(workUnitState.getWorkunit().getProp(SOURCE_FILE_KEY));
  LOG.info("Reading from source file " + sourceFile);
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
  try {
    FileSystem fs = FileSystem
        .get(URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)),
            new Configuration());
    fs.makeQualified(sourceFile);
    this.dataFileReader =
        new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader);
  } catch (IOException ioe) {
    LOG.error("Failed to read the source file " + sourceFile, ioe);
  }
}
 
开发者ID:Hanmourang,项目名称:Gobblin,代码行数:18,代码来源:TestExtractor.java

示例5: getSchema

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
  throws IOException {
  System.out.println("avro file path : " + targetFilePath.toUri().getPath());

  SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
  DataFileReader<GenericRecord> reader =
    new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());
  String codec = reader.getMetaString("avro.codec");
  long record_count = reader.getBlockCount();

  String schemaString = reader.getSchema().toString();
  String storage = STORAGE_TYPE;
  String abstractPath = targetFilePath.toUri().getPath();

  FileStatus fstat = fs.getFileStatus(targetFilePath);
  DatasetJsonRecord datasetJsonRecord =
    new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
      fstat.getPermission().toString(), codec, storage, "");
  reader.close();
  sin.close();
  return datasetJsonRecord;
}
 
开发者ID:linkedin,项目名称:WhereHows,代码行数:24,代码来源:AvroFileAnalyzer.java

示例6: getSampleData

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public SampleDataRecord getSampleData(Path targetFilePath)
  throws IOException {
  SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
  DataFileReader<GenericRecord> reader =
    new DataFileReader<GenericRecord>(sin, new GenericDatumReader<GenericRecord>());

  Iterator<GenericRecord> iter = reader.iterator();
  int count = 0;
  List<Object> list = new ArrayList<Object>();
  //JSONArray list = new JSONArray();
  while (iter.hasNext() && count < 10) {
    // TODO handle out of memory error
    list.add(iter.next().toString().replaceAll("[\\n\\r\\p{C}]", "").replaceAll("\"", "\\\""));
    count++;
  }
  SampleDataRecord sampleDataRecord = new SampleDataRecord(targetFilePath.toUri().getPath(), list);

  return sampleDataRecord;
}
 
开发者ID:linkedin,项目名称:WhereHows,代码行数:21,代码来源:AvroFileAnalyzer.java

示例7: getSchema

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
@Override
public DatasetJsonRecord getSchema(Path targetFilePath)
  throws IOException {
  System.out.println("parquet file path : " + targetFilePath.toUri().getPath());

  SeekableInput sin = new FsInput(targetFilePath, fs.getConf());
  ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(targetFilePath).build();

  String schemaString = reader.read().getSchema().toString();
  String storage = STORAGE_TYPE;
  String abstractPath = targetFilePath.toUri().getPath();

  FileStatus fstat = fs.getFileStatus(targetFilePath);
  // TODO set codec
  DatasetJsonRecord datasetJsonRecord =
    new DatasetJsonRecord(schemaString, abstractPath, fstat.getModificationTime(), fstat.getOwner(), fstat.getGroup(),
      fstat.getPermission().toString(), null, storage, "");
  reader.close();
  sin.close();
  return datasetJsonRecord;
}
 
开发者ID:linkedin,项目名称:WhereHows,代码行数:22,代码来源:ParquetFileAnalyzer.java

示例8: createDataFileReader

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private DataFileReader<GenericRecord> createDataFileReader(String filename,
                                                           boolean localFS) throws IOException
{
    DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>();
    DataFileReader<GenericRecord> dataFileReader;

    if (localFS)
    {
        dataFileReader =
                new DataFileReader<GenericRecord>(new File(filename), datumReader);
    }
    else
    {
        Path path = new Path(filename);
        SeekableInput input = new FsInput(path, conf);
        dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
    }

    return dataFileReader;
}
 
开发者ID:linkedin,项目名称:Cubert,代码行数:21,代码来源:Purge.java

示例9: init

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
 * Initializes the AvroScanner.
 */
@Override
public void init() throws IOException {
  if (targets == null) {
    targets = schema.toArray();
  }
  prepareProjection(targets);
  outTuple = new VTuple(projectionMap.length);

  Schema avroSchema = AvroUtil.getAvroSchema(meta, conf);
  avroFields = avroSchema.getFields();

  DatumReader<GenericRecord> datumReader = new GenericDatumReader<>(avroSchema);
  SeekableInput input = new FsInput(fragment.getPath(), conf);
  dataFileReader = new DataFileReader<>(input, datumReader);
  super.init();
}
 
开发者ID:apache,项目名称:tajo,代码行数:20,代码来源:AvroScanner.java

示例10: init

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
 * Initializes the AvroScanner.
 */
@Override
public void init() throws IOException {
  if (targets == null) {
    targets = schema.toArray();
  }
  prepareProjection(targets);

  avroSchema = AvroUtil.getAvroSchema(meta, conf);
  avroFields = avroSchema.getFields();

  DatumReader<GenericRecord> datumReader =
      new GenericDatumReader<GenericRecord>(avroSchema);
  SeekableInput input = new FsInput(fragment.getPath(), conf);
  dataFileReader = new DataFileReader<GenericRecord>(input, datumReader);
  super.init();
}
 
开发者ID:gruter,项目名称:tajo-cdh,代码行数:20,代码来源:AvroScanner.java

示例11: getDirectorySchema

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
 * Get the latest avro schema for a directory
 * @param directory the input dir that contains avro files
 * @param fs the {@link FileSystem} for the given directory.
 * @param latest true to return latest schema, false to return oldest schema
 * @return the latest/oldest schema in the directory
 * @throws IOException
 */
public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException {
  Schema schema = null;
  try (Closer closer = Closer.create()) {
    List<FileStatus> files = getDirectorySchemaHelper(directory, fs);
    if (files == null || files.size() == 0) {
      LOG.warn("There is no previous avro file in the directory: " + directory);
    } else {
      FileStatus file = latest ? files.get(0) : files.get(files.size() - 1);
      LOG.debug("Path to get the avro schema: " + file);
      FsInput fi = new FsInput(file.getPath(), fs.getConf());
      GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>();
      schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema();
    }
  } catch (IOException ioe) {
    throw new IOException("Cannot get the schema for directory " + directory, ioe);
  }
  return schema;
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:27,代码来源:AvroUtils.java

示例12: getAvroFile

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
/**
 * Returns an {@link DataFileReader} to the specified avro file.
 * <p>
 * Note: It is the caller's responsibility to close the returned {@link DataFileReader}.
 * </p>
 *
 * @param file The path to the avro file to open.
 * @return A {@link DataFileReader} for the specified avro file.
 * @throws FileBasedHelperException if there is a problem opening the {@link InputStream} for the specified file.
 */
public DataFileReader<GenericRecord> getAvroFile(String file) throws FileBasedHelperException {
  try {
    if (!this.getFileSystem().exists(new Path(file))) {
      LOGGER.warn(file + " does not exist.");
      return null;
    }
    if (this.getState().getPropAsBoolean(ConfigurationKeys.SHOULD_FS_PROXY_AS_USER,
        ConfigurationKeys.DEFAULT_SHOULD_FS_PROXY_AS_USER)) {
      return new DataFileReader<>(new ProxyFsInput(new Path(file), this.getFileSystem()),
          new GenericDatumReader<GenericRecord>());
    }
    return new DataFileReader<>(new FsInput(new Path(file), this.getFileSystem().getConf()),
        new GenericDatumReader<GenericRecord>());
  } catch (IOException e) {
    throw new FileBasedHelperException("Failed to open avro file " + file + " due to error " + e.getMessage(), e);
  }
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:28,代码来源:AvroFsHelper.java

示例13: TestExtractor

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
public TestExtractor(WorkUnitState workUnitState) {
  //super(workUnitState);
  Schema schema = new Schema.Parser().parse(AVRO_SCHEMA);
  Path sourceFile = new Path(workUnitState.getWorkunit().getProp(TestSource.SOURCE_FILE_KEY));
  LOG.info("Reading from source file " + sourceFile);
  DatumReader<GenericRecord> datumReader = new GenericDatumReader<GenericRecord>(schema);
  try {
    FileSystem fs = FileSystem
        .get(URI.create(workUnitState.getProp(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)),
            new Configuration());
    fs.makeQualified(sourceFile);
    this.dataFileReader =
        new DataFileReader<GenericRecord>(new FsInput(sourceFile, new Configuration()), datumReader);
  } catch (IOException ioe) {
    LOG.error("Failed to read the source file " + sourceFile, ioe);
  }
}
 
开发者ID:apache,项目名称:incubator-gobblin,代码行数:18,代码来源:TestExtractor.java

示例14: initCurrentFile

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private void initCurrentFile() throws IOException {
  if (reader != null) {
    reader.close();
  }

  LOG.info("Initializing {}:{}+{}",
      new Object[] { split.getPath(currentFile),
          split.getOffset(currentFile), split.getLength(currentFile) });

  GenericDatumReader<Record> datumReader = new GenericDatumReader<Record>(
      Schemas.getSchema("logBlock"));
  reader = new DataFileReader<Record>(new FsInput(split.getPath(currentFile),
      job), datumReader);
  datumReader.setExpected(Schemas.getSchema("logBlock"));
  datumReader.setSchema(reader.getSchema());

  long size = split.getLength(currentFile);
  start = split.getOffset(currentFile);
  end = start + size;

  reader.sync(start);
}
 
开发者ID:blackberry,项目名称:BB-BigData-Log-Tools,代码行数:23,代码来源:BoomRecordReader.java

示例15: readAndCheckResultsFromHdfs

import org.apache.avro.mapred.FsInput; //导入依赖的package包/类
private void readAndCheckResultsFromHdfs(RecordHeader header, List<TestLogData> testLogs) throws IOException {
  Path logsPath = new Path("/logs" + Path.SEPARATOR + applicationToken + Path.SEPARATOR + logSchemaVersion + Path.SEPARATOR + "data*");
  FileStatus[] statuses = fileSystem.globStatus(logsPath);
  List<TestLogData> resultTestLogs = new ArrayList<>();
  Schema wrapperSchema = RecordWrapperSchemaGenerator.generateRecordWrapperSchema(TestLogData.getClassSchema().toString());
  for (FileStatus status : statuses) {
    FileReader<GenericRecord> fileReader = null;
    try {
      SeekableInput input = new FsInput(status.getPath(), fileSystem.getConf());
      DatumReader<GenericRecord> datumReader = new SpecificDatumReader<>(wrapperSchema);
      fileReader = DataFileReader.openReader(input, datumReader);
      for (GenericRecord record : fileReader) {
        RecordHeader recordHeader = (RecordHeader) record.get(RecordWrapperSchemaGenerator.RECORD_HEADER_FIELD);
        Assert.assertEquals(header, recordHeader);
        TestLogData recordData = (TestLogData) record.get(RecordWrapperSchemaGenerator.RECORD_DATA_FIELD);
        resultTestLogs.add(recordData);
      }
    } finally {
      IOUtils.closeQuietly(fileReader);
    }
  }
  Assert.assertEquals(testLogs, resultTestLogs);
}
 
开发者ID:kaaproject,项目名称:kaa,代码行数:24,代码来源:TestKaaHdfsSink.java


注:本文中的org.apache.avro.mapred.FsInput类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。