当前位置: 首页>>代码示例>>Java>>正文


Java ColumnChunkMetaData.getTotalSize方法代码示例

本文整理汇总了Java中org.apache.parquet.hadoop.metadata.ColumnChunkMetaData.getTotalSize方法的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData.getTotalSize方法的具体用法?Java ColumnChunkMetaData.getTotalSize怎么用?Java ColumnChunkMetaData.getTotalSize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.parquet.hadoop.metadata.ColumnChunkMetaData的用法示例。


在下文中一共展示了ColumnChunkMetaData.getTotalSize方法的10个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: PageReader

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FSDataInputStream inputStream, Path path, ColumnChunkMetaData columnChunkMetaData) throws ExecutionSetupException {
  this.parentColumnReader = parentStatus;
  allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
  codecFactory = parentColumnReader.parentReader.getCodecFactory();
  this.stats = parentColumnReader.parentReader.parquetReaderStats;
  long start = columnChunkMetaData.getFirstDataPageOffset();
  this.inputStream = inputStream;
  try {
    this.dataReader = new ColumnDataReader(inputStream, start, columnChunkMetaData.getTotalSize());
    loadDictionaryIfExists(parentStatus, columnChunkMetaData, inputStream);
  } catch (IOException e) {
    throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
      + path.getName(), e);
  }
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:16,代码来源:PageReader.java

示例2: assertColumnsEquivalent

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public void assertColumnsEquivalent(List<ColumnChunkMetaData> expected,
                                    List<ColumnChunkMetaData> actual) {
  Assert.assertEquals("Should have the expected columns",
      expected.size(), actual.size());
  for (int i = 0; i < actual.size(); i += 1) {
    ColumnChunkMetaData current = actual.get(i);
    if (i != 0) {
      ColumnChunkMetaData previous = actual.get(i - 1);
      long expectedStart = previous.getStartingPos() + previous.getTotalSize();
      Assert.assertEquals("Should start after the previous column",
          expectedStart, current.getStartingPos());
    }

    assertColumnMetadataEquivalent(expected.get(i), current);
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:TestParquetWriterAppendBlocks.java

示例3: ColumnChunkIncPageReader

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ColumnChunkIncPageReader(ColumnChunkMetaData metaData, ColumnDescriptor columnDescriptor, FSDataInputStream in) throws IOException {
  this.metaData = metaData;
  this.columnDescriptor = columnDescriptor;
  this.size = metaData.getTotalSize();
  this.fileOffset = metaData.getStartingPos();
  this.in = in;
  this.decompressor = codecFactory.getDecompressor(metaData.getCodec());
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:9,代码来源:ColumnChunkIncReadStore.java

示例4: getParquetFileMetadata

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private ParquetFileMetadata getParquetFileMetadata(FileStatus file) throws IOException {
  final ParquetMetadata metadata;

  metadata = SingletonParquetFooterCache.readFooter(fs, file, ParquetMetadataConverter.NO_FILTER);

  MessageType schema = metadata.getFileMetaData().getSchema();

  Map<SchemaPath, OriginalType> originalTypeMap = Maps.newHashMap();
  schema.getPaths();
  for (String[] path : schema.getPaths()) {
    originalTypeMap.put(SchemaPath.getCompoundPath(path), getOriginalType(schema, path, 0));
  }

  List<RowGroupMetadata> rowGroupMetadataList = Lists.newArrayList();

  ArrayList<SchemaPath> ALL_COLS = new ArrayList<>();
  ALL_COLS.add(AbstractRecordReader.STAR_COLUMN);
  boolean autoCorrectCorruptDates = formatConfig.autoCorrectCorruptDates;
  ParquetReaderUtility.DateCorruptionStatus containsCorruptDates = ParquetReaderUtility.detectCorruptDates(metadata, ALL_COLS, autoCorrectCorruptDates);
  if(logger.isDebugEnabled()){
    logger.debug(containsCorruptDates.toString());
  }
  final Map<ColumnTypeMetadata.Key, ColumnTypeMetadata> columnTypeInfo = Maps.newHashMap();
  for (BlockMetaData rowGroup : metadata.getBlocks()) {
    List<ColumnMetadata> columnMetadataList = Lists.newArrayList();
    long length = 0;
    for (ColumnChunkMetaData col : rowGroup.getColumns()) {
      ColumnMetadata columnMetadata;

      boolean statsAvailable = (col.getStatistics() != null && !col.getStatistics().isEmpty());

      Statistics<?> stats = col.getStatistics();
      String[] columnName = col.getPath().toArray();
      SchemaPath columnSchemaName = SchemaPath.getCompoundPath(columnName);
      ColumnTypeMetadata columnTypeMetadata =
          new ColumnTypeMetadata(columnName, col.getType(), originalTypeMap.get(columnSchemaName));

      columnTypeInfo.put(new ColumnTypeMetadata.Key(columnTypeMetadata.name), columnTypeMetadata);
      if (statsAvailable) {
        // Write stats only if minVal==maxVal. Also, we then store only maxVal
        Object mxValue = null;
        if (stats.genericGetMax() != null && stats.genericGetMin() != null &&
            stats.genericGetMax().equals(stats.genericGetMin())) {
          mxValue = stats.genericGetMax();
          if (containsCorruptDates == ParquetReaderUtility.DateCorruptionStatus.META_SHOWS_CORRUPTION
              && columnTypeMetadata.originalType == OriginalType.DATE) {
            mxValue = ParquetReaderUtility.autoCorrectCorruptedDate((Integer) mxValue);
          }
        }
        columnMetadata =
            new ColumnMetadata(columnTypeMetadata.name, mxValue, stats.getNumNulls());
      } else {
        columnMetadata = new ColumnMetadata(columnTypeMetadata.name,null, null);
      }
      columnMetadataList.add(columnMetadata);
      length += col.getTotalSize();
    }

    RowGroupMetadata rowGroupMeta =
        new RowGroupMetadata(rowGroup.getStartingPos(), length, rowGroup.getRowCount(),
            getHostAffinity(file, rowGroup.getStartingPos(), length), columnMetadataList);

    rowGroupMetadataList.add(rowGroupMeta);
  }

  return new ParquetFileMetadata(file, file.getLen(), rowGroupMetadataList, columnTypeInfo);
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:68,代码来源:Metadata.java

示例5: PageReader

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
PageReader(org.apache.drill.exec.store.parquet.columnreaders.ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
  throws ExecutionSetupException {
  this.parentColumnReader = parentStatus;
  allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
  codecFactory = parentColumnReader.parentReader.getCodecFactory();
  this.stats = parentColumnReader.parentReader.parquetReaderStats;
  this.fileName = path.toString();
  debugName = new StringBuilder()
     .append(this.parentColumnReader.parentReader.getFragmentContext().getFragIdString())
     .append(":")
     .append(this.parentColumnReader.parentReader.getOperatorContext().getStats().getId() )
     .append(this.parentColumnReader.columnChunkMetaData.toString() )
     .toString();
  try {
    inputStream  = fs.open(path);
    BufferAllocator allocator =  parentColumnReader.parentReader.getOperatorContext().getAllocator();
    columnChunkMetaData.getTotalUncompressedSize();
    useBufferedReader  = parentColumnReader.parentReader.useBufferedReader;
    scanBufferSize = parentColumnReader.parentReader.bufferedReadSize;
    useFadvise = parentColumnReader.parentReader.useFadvise;
    enforceTotalSize = parentColumnReader.parentReader.enforceTotalSize;
    if (useBufferedReader) {
      this.dataReader = new BufferedDirectBufInputStream(inputStream, allocator, path.getName(),
          columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), scanBufferSize,
          enforceTotalSize, useFadvise);
    } else {
      this.dataReader = new DirectBufInputStream(inputStream, allocator, path.getName(),
          columnChunkMetaData.getStartingPos(), columnChunkMetaData.getTotalSize(), enforceTotalSize,
          useFadvise);
    }
  } catch (IOException e) {
    throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
        + path.getName(), e);
  }

}
 
开发者ID:axbaretto,项目名称:drill,代码行数:37,代码来源:PageReader.java

示例6: showDetails

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static void showDetails(PrettyPrintWriter out, ColumnChunkMetaData meta, boolean name) {
  long doff = meta.getDictionaryPageOffset();
  long foff = meta.getFirstDataPageOffset();
  long tsize = meta.getTotalSize();
  long usize = meta.getTotalUncompressedSize();
  long count = meta.getValueCount();
  double ratio = usize / (double)tsize;
  String encodings = Joiner.on(',').skipNulls().join(meta.getEncodings());

  if (name) {
    String path = Joiner.on('.').skipNulls().join(meta.getPath());
    out.format("%s: ", path);
  }

  out.format(" %s", meta.getType());
  out.format(" %s", meta.getCodec());
  out.format(" DO:%d", doff);
  out.format(" FPO:%d", foff);
  out.format(" SZ:%d/%d/%.2f", tsize, usize, ratio);
  out.format(" VC:%d", count);
  if (!encodings.isEmpty()) out.format(" ENC:%s", encodings);
  Statistics<?> stats = meta.getStatistics();
  if (stats != null) {
    out.format(" ST:[%s]", stats);
  } else {
    out.format(" ST:[none]");
  }
  out.println();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:MetadataUtils.java

示例7: getParquetInputSplit

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
  MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
  long length = 0;

  for (BlockMetaData block : this.getRowGroups()) {
    List<ColumnChunkMetaData> columns = block.getColumns();
    for (ColumnChunkMetaData column : columns) {
      if (requested.containsPath(column.getPath().toArray())) {
        length += column.getTotalSize();
      }
    }
  }

  BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
  long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();

  long[] rowGroupOffsets = new long[this.getRowGroupCount()];
  for (int i = 0; i < rowGroupOffsets.length; i++) {
    rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
  }

  return new ParquetInputSplit(
          fileStatus.getPath(),
          hdfsBlock.getOffset(),
          end,
          length,
          hdfsBlock.getHosts(),
          rowGroupOffsets
  );
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:31,代码来源:ParquetInputFormat.java

示例8: end

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
  MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
  long length = 0;

  for (BlockMetaData block : blocks) {
    List<ColumnChunkMetaData> columns = block.getColumns();
    for (ColumnChunkMetaData column : columns) {
      if (requested.containsPath(column.getPath().toArray())) {
        length += column.getTotalSize();
      }
    }
  }
  return length;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:15,代码来源:ParquetInputSplit.java

示例9: addRowGroup

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
    //rowGroup.total_byte_size = ;
    List<ColumnChunkMetaData> columns = block.getColumns();
    List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
    for (ColumnChunkMetaData columnMetaData : columns) {
      ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
      columnChunk.file_path = block.getPath(); // they are in the same file for now
      columnChunk.meta_data = new ColumnMetaData(
          getType(columnMetaData.getType()),
          toFormatEncodings(columnMetaData.getEncodings()),
          Arrays.asList(columnMetaData.getPath().toArray()),
          toFormatCodec(columnMetaData.getCodec()),
          columnMetaData.getValueCount(),
          columnMetaData.getTotalUncompressedSize(),
          columnMetaData.getTotalSize(),
          columnMetaData.getFirstDataPageOffset());
      columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
      if (!columnMetaData.getStatistics().isEmpty()) {
        columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
      }
      if (columnMetaData.getEncodingStats() != null) {
        columnChunk.meta_data.setEncoding_stats(convertEncodingStats(columnMetaData.getEncodingStats()));
      }
//      columnChunk.meta_data.index_page_offset = ;
//      columnChunk.meta_data.key_value_metadata = ; // nothing yet

      parquetColumns.add(columnChunk);
    }
    RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
    rowGroups.add(rowGroup);
  }
 
开发者ID:apache,项目名称:parquet-mr,代码行数:32,代码来源:ParquetMetadataConverter.java

示例10: printColumnChunk

import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData; //导入方法依赖的package包/类
private void printColumnChunk(Logger console, int width, ColumnChunkMetaData column, MessageType schema) {
  String[] path = column.getPath().toArray();
  PrimitiveType type = primitive(schema, path);
  Preconditions.checkNotNull(type);

  ColumnDescriptor desc = schema.getColumnDescription(path);
  long size = column.getTotalSize();
  long count = column.getValueCount();
  float perValue = ((float) size) / count;
  CompressionCodecName codec = column.getCodec();
  Set<Encoding> encodings = column.getEncodings();
  EncodingStats encodingStats = column.getEncodingStats();
  String encodingSummary = encodingStats == null ?
      encodingsAsString(encodings, desc) :
      encodingStatsAsString(encodingStats);
  Statistics stats = column.getStatistics();

  String name = column.getPath().toDotString();

  PrimitiveType.PrimitiveTypeName typeName = type.getPrimitiveTypeName();
  if (typeName == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
    console.info(String.format("%-" + width + "s  FIXED[%d] %s %-7s %-9d %-8s %-7s %s",
        name, type.getTypeLength(), shortCodec(codec), encodingSummary, count,
        humanReadable(perValue), stats == null ? "" : String.valueOf(stats.getNumNulls()),
        minMaxAsString(stats, type.getOriginalType())));
  } else {
    console.info(String.format("%-" + width + "s  %-9s %s %-7s %-9d %-10s %-7s %s",
        name, typeName, shortCodec(codec), encodingSummary, count, humanReadable(perValue),
        stats == null ? "" : String.valueOf(stats.getNumNulls()),
        minMaxAsString(stats, type.getOriginalType())));
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:33,代码来源:ParquetMetadataCommand.java


注:本文中的org.apache.parquet.hadoop.metadata.ColumnChunkMetaData.getTotalSize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。