当前位置: 首页>>代码示例>>Java>>正文


Java ReadSupport类代码示例

本文整理汇总了Java中org.apache.parquet.hadoop.api.ReadSupport的典型用法代码示例。如果您正苦于以下问题:Java ReadSupport类的具体用法?Java ReadSupport怎么用?Java ReadSupport使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ReadSupport类属于org.apache.parquet.hadoop.api包,在下文中一共展示了ReadSupport类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initialize

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
public void initialize(FileMetaData parquetFileMetadata,
                       Path file, List<BlockMetaData> blocks, Configuration configuration)
    throws IOException {
  // initialize a ReadContext for this file
  Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
  ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
      configuration, toSetMultiMap(fileMetadata), fileSchema));
  this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
  this.requestedSchema = readContext.getRequestedSchema();
  this.fileSchema = parquetFileMetadata.getSchema();
  this.file = file;
  this.columnCount = requestedSchema.getPaths().size();
  this.recordConverter = readSupport.prepareForRead(
      configuration, fileMetadata, fileSchema, readContext);
  this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
  List<ColumnDescriptor> columns = requestedSchema.getColumns();
  reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
  for (BlockMetaData block : blocks) {
    total += block.getRowCount();
  }
  this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
  LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
 
开发者ID:apache,项目名称:tajo,代码行数:24,代码来源:InternalParquetRecordReader.java

示例2: ParquetReader

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
private ParquetReader(Configuration conf,
                      Path file,
                      ReadSupport<T> readSupport,
                      Filter filter) throws IOException {
  this.readSupport = readSupport;
  this.filter = checkNotNull(filter, "filter");
  this.conf = conf;

  FileSystem fs = file.getFileSystem(conf);
  List<FileStatus> statuses = Arrays.asList(fs.listStatus(file, HiddenFileFilter.INSTANCE));
  List<Footer> footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(conf, statuses, false);
  this.footersIterator = footers.iterator();

  for (Footer footer : footers) {
    for(BlockMetaData block : footer.getParquetMetadata().getBlocks()) {
      totalRowCount += block.getRowCount();
    }
  }
}
 
开发者ID:apache,项目名称:tajo,代码行数:20,代码来源:ParquetReader.java

示例3: initialize

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
public void initialize(MessageType fileSchema,
                       FileMetaData parquetFileMetadata,
                       Path file, List<BlockMetaData> blocks, Configuration configuration)
        throws IOException {
  // initialize a ReadContext for this file
  Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
  ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
          configuration, toSetMultiMap(fileMetadata), fileSchema));
  this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
  this.requestedSchema = readContext.getRequestedSchema();
  this.fileSchema = fileSchema;
  this.file = file;
  this.columnCount = requestedSchema.getPaths().size();
  this.recordConverter = readSupport.prepareForRead(
          configuration, fileMetadata, fileSchema, readContext);
  this.strictTypeChecking = true;
  List<ColumnDescriptor> columns = requestedSchema.getColumns();
  reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
  for (BlockMetaData block : blocks) {
    total += block.getRowCount();
  }
  Log.info("RecordReader initialized will read a total of " + total + " records.");
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:H2OInternalParquetReader.java

示例4: initialize

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
public void initialize(ParquetFileReader reader, Configuration configuration)
    throws IOException {
  // initialize a ReadContext for this file
  this.reader = reader;
  FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
  this.fileSchema = parquetFileMetadata.getSchema();
  Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
  ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
      configuration, toSetMultiMap(fileMetadata), fileSchema));
  this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
  this.requestedSchema = readContext.getRequestedSchema();
  this.columnCount = requestedSchema.getPaths().size();
  this.recordConverter = readSupport.prepareForRead(
      configuration, fileMetadata, fileSchema, readContext);
  this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
  this.total = reader.getRecordCount();
  this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
  this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true);
  reader.setRequestedSchema(requestedSchema);
  LOG.info("RecordReader initialized will read a total of {} records.", total);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:InternalParquetRecordReader.java

示例5: prepareForRead

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
@Override
public RecordMaterializer<T> prepareForRead(Configuration configuration,
    Map<String, String> keyValueMetaData, MessageType fileSchema,
    org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) {
  ThriftMetaData thriftMetaData = ThriftMetaData.fromExtraMetaData(keyValueMetaData);
  try {
    initThriftClass(thriftMetaData, configuration);
  } catch (ClassNotFoundException e) {
    throw new RuntimeException("Cannot find Thrift object class for metadata: " + thriftMetaData, e);
  }

  // if there was not metadata in the file, get it from requested class
  if (thriftMetaData == null) {
    thriftMetaData = ThriftMetaData.fromThriftClass(thriftClass);
  }

  String converterClassName = configuration.get(RECORD_CONVERTER_CLASS_KEY, RECORD_CONVERTER_DEFAULT);
  return getRecordConverterInstance(converterClassName, thriftClass,
      readContext.getRequestedSchema(), thriftMetaData.getDescriptor(),
      configuration);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:22,代码来源:ThriftReadSupport.java

示例6: init

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
@Override
public ReadContext init(
        Configuration configuration, Map<String, String> keyValueMetaData,
        MessageType fileSchema) {

    String partialSchemaString;
    String partialSchemaFile = configuration.get(PARQUET_READ_SCHEMA_FILE, "");
    if (!partialSchemaFile.isEmpty()) {
        StringBuilder r = new StringBuilder();
        try {
            BufferedReader br = new BufferedReader(new FileReader(new File(partialSchemaFile)));
            String line;
            while ((line = br.readLine()) != null)
                r.append(line);
        } catch (Exception e) {
            throw new RuntimeException("Can't read schema from file " + partialSchemaFile + ": " + e.getMessage());
        }

        partialSchemaString = r.toString();
    }
    else
        partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);

    return new ReadContext(getSchemaForRead(fileSchema, partialSchemaString));
}
 
开发者ID:whale2,项目名称:iow-hadoop-streaming,代码行数:26,代码来源:GroupReadSupport.java

示例7: ParquetRowReader

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
public ParquetRowReader(Configuration configuration, Path filePath, ReadSupport<T> readSupport) throws IOException
{
    this.filePath = filePath;

    ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, filePath, ParquetMetadataConverter.NO_FILTER);
    List<BlockMetaData> blocks = parquetMetadata.getBlocks();

    FileMetaData fileMetadata = parquetMetadata.getFileMetaData();
    this.fileSchema = fileMetadata.getSchema();
    Map<String, String> keyValueMetadata = fileMetadata.getKeyValueMetaData();
    ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
            configuration, toSetMultiMap(keyValueMetadata), fileSchema));
    this.columnIOFactory = new ColumnIOFactory(fileMetadata.getCreatedBy());

    this.requestedSchema = readContext.getRequestedSchema();
    this.recordConverter = readSupport.prepareForRead(
            configuration, fileMetadata.getKeyValueMetaData(), fileSchema, readContext);

    List<ColumnDescriptor> columns = requestedSchema.getColumns();

    reader = new ParquetFileReader(configuration, fileMetadata, filePath, blocks, columns);

    long total = 0;
    for (BlockMetaData block : blocks) {
        total += block.getRowCount();
    }
    this.total = total;

    this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
    logger.info("ParquetRowReader initialized will read a total of " + total + " records.");
}
 
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:32,代码来源:ParquetRowReader.java

示例8: init

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
/**
 * Initializes the ReadSupport.
 *
 * @param context The InitContext.
 * @return A ReadContext that defines how to read the file.
 */
@Override
public ReadSupport.ReadContext init(InitContext context) {
  if (requestedSchema == null) {
    throw new RuntimeException("requestedSchema is null.");
  }
  MessageType requestedParquetSchema =
    new TajoSchemaConverter().convert(requestedSchema);
  LOG.debug("Reading data with projection:\n" + requestedParquetSchema);
  return new ReadContext(requestedParquetSchema);
}
 
开发者ID:apache,项目名称:tajo,代码行数:17,代码来源:TajoReadSupport.java

示例9: init

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(
    Configuration configuration, Map<String, String> keyValueMetaData,
    MessageType fileSchema) {
  String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);
  MessageType requestedProjection = getSchemaForRead(fileSchema, partialSchemaString);
  return new ReadContext(requestedProjection);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:9,代码来源:GroupReadSupport.java

示例10: ParquetReader

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
private ParquetReader(Configuration conf,
                      Path file,
                      ReadSupport<T> readSupport,
                      FilterCompat.Filter filter) throws IOException {
  this(Collections.singletonList((InputFile) HadoopInputFile.fromPath(file, conf)),
      HadoopReadOptions.builder(conf)
          .withRecordFilter(checkNotNull(filter, "filter"))
          .build(),
      readSupport);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:ParquetReader.java

示例11: createRecordReader

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
/**
 * {@inheritDoc}
 */
@Override
public RecordReader<Void, T> createRecordReader(
    InputSplit inputSplit,
    TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException {
  Configuration conf = ContextUtil.getConfiguration(taskAttemptContext);
  ReadSupport<T> readSupport = getReadSupport(conf);
  return new ParquetRecordReader<T>(readSupport, getFilter(conf));
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:12,代码来源:ParquetInputFormat.java

示例12: getReadSupport

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
/**
 * @param configuration to find the configuration for the read support
 * @return the configured read support
 * @deprecated use getReadSupportInstance static methods instead
 */
@Deprecated
@SuppressWarnings("unchecked")
ReadSupport<T> getReadSupport(Configuration configuration){
  return getReadSupportInstance(readSupportClass == null ?
      (Class<? extends ReadSupport<T>>) getReadSupportClass(configuration) :
      readSupportClass);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:13,代码来源:ParquetInputFormat.java

示例13: testInitWithoutSpecifyingRequestSchema

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
@Test
public void testInitWithoutSpecifyingRequestSchema() throws Exception {
  GroupReadSupport s = new GroupReadSupport();
  Configuration configuration = new Configuration();
  Map<String, String> keyValueMetaData = new HashMap<String, String>();
  MessageType fileSchema = MessageTypeParser.parseMessageType(fullSchemaStr);

  ReadSupport.ReadContext context = s.init(configuration, keyValueMetaData, fileSchema);
  assertEquals(context.getRequestedSchema(), fileSchema);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:11,代码来源:GroupReadSupportTest.java

示例14: runMapReduceJob

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
private void runMapReduceJob(CompressionCodecName codec) throws IOException, ClassNotFoundException, InterruptedException {

    final FileSystem fileSystem = parquetPath.getFileSystem(conf);
    fileSystem.delete(parquetPath, true);
    fileSystem.delete(outputPath, true);
    {
      writeJob = new Job(conf, "write");
      TextInputFormat.addInputPath(writeJob, inputPath);
      writeJob.setInputFormatClass(TextInputFormat.class);
      writeJob.setNumReduceTasks(0);
      ExampleOutputFormat.setCompression(writeJob, codec);
      ExampleOutputFormat.setOutputPath(writeJob, parquetPath);
      writeJob.setOutputFormatClass(ExampleOutputFormat.class);
      writeJob.setMapperClass(ReadMapper.class);
      ExampleOutputFormat.setSchema(
              writeJob,
              MessageTypeParser.parseMessageType(
                      writeSchema));
      writeJob.submit();
      waitForJob(writeJob);
    }
    {
      jobConf.set(ReadSupport.PARQUET_READ_SCHEMA, readSchema);
      jobConf.set(ParquetInputFormat.READ_SUPPORT_CLASS, GroupReadSupport.class.getCanonicalName());
      jobConf.setInputFormat(MyDeprecatedInputFormat.class);
      MyDeprecatedInputFormat.setInputPaths(jobConf, parquetPath);
      jobConf.setOutputFormat(org.apache.hadoop.mapred.TextOutputFormat.class);
      org.apache.hadoop.mapred.TextOutputFormat.setOutputPath(jobConf, outputPath);
      jobConf.setMapperClass(DeprecatedWriteMapper.class);
      jobConf.setNumReduceTasks(0);
      mapRedJob = JobClient.runJob(jobConf);
    }
  }
 
开发者ID:apache,项目名称:parquet-mr,代码行数:34,代码来源:DeprecatedInputFormatTest.java

示例15: init

import org.apache.parquet.hadoop.api.ReadSupport; //导入依赖的package包/类
@Override
public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) {
  final Configuration configuration = context.getConfiguration();
  final MessageType fileMessageType = context.getFileSchema();
  MessageType requestedProjection = fileMessageType;
  String partialSchemaString = configuration.get(ReadSupport.PARQUET_READ_SCHEMA);

  FieldProjectionFilter projectionFilter = getFieldProjectionFilter(configuration);

  if (partialSchemaString != null && projectionFilter != null) {
    throw new ThriftProjectionException(
        String.format("You cannot provide both a partial schema and field projection filter."
                + "Only one of (%s, %s, %s) should be set.",
            PARQUET_READ_SCHEMA, STRICT_THRIFT_COLUMN_FILTER_KEY, THRIFT_COLUMN_FILTER_KEY));
  }

  //set requestedProjections only when it's specified
  if (partialSchemaString != null) {
    requestedProjection = getSchemaForRead(fileMessageType, partialSchemaString);
  } else if (projectionFilter != null) {
    try {
      initThriftClassFromMultipleFiles(context.getKeyValueMetadata(), configuration);
      requestedProjection =  getProjectedSchema(projectionFilter);
    } catch (ClassNotFoundException e) {
      throw new ThriftProjectionException("can not find thriftClass from configuration", e);
    }
  }

  MessageType schemaForRead = getSchemaForRead(fileMessageType, requestedProjection);
  return new ReadContext(schemaForRead);
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:32,代码来源:ThriftReadSupport.java


注:本文中的org.apache.parquet.hadoop.api.ReadSupport类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。