当前位置: 首页>>代码示例>>Java>>正文


Java BlockMetaData.getRowCount方法代码示例

本文整理汇总了Java中parquet.hadoop.metadata.BlockMetaData.getRowCount方法的典型用法代码示例。如果您正苦于以下问题:Java BlockMetaData.getRowCount方法的具体用法?Java BlockMetaData.getRowCount怎么用?Java BlockMetaData.getRowCount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parquet.hadoop.metadata.BlockMetaData的用法示例。


在下文中一共展示了BlockMetaData.getRowCount方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initialize

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public void initialize(MessageType fileSchema,
                       Map<String, String> fileMetadata,
                       Path file, List<BlockMetaData> blocks, Configuration configuration)
        throws IOException {
    // initialize a ReadContext for this file
    ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
            configuration, toSetMultiMap(fileMetadata), fileSchema));
    this.requestedSchema = readContext.getRequestedSchema();
    this.fileSchema = fileSchema;
    this.file = file;
    this.columnCount = requestedSchema.getPaths().size();
    this.recordConverter = readSupport.prepareForRead(
            configuration, fileMetadata, fileSchema, readContext);
    this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
    List<ColumnDescriptor> columns = requestedSchema.getColumns();
    reader = new ParquetFileReader(configuration, file, blocks, columns);
    for (BlockMetaData block : blocks) {
        total += block.getRowCount();
    }
    LOG.info("RecordReader initialized will read a total of " + total + " records.");
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:22,代码来源:InternalParquetRecordReader.java

示例2: add

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static void add(ParquetMetadata footer) {
    for (BlockMetaData blockMetaData : footer.getBlocks()) {
        ++blockCount;
        MessageType schema = footer.getFileMetaData().getSchema();
        recordCount += blockMetaData.getRowCount();
        List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
            add(
                    desc,
                    columnMetaData.getValueCount(),
                    columnMetaData.getTotalSize(),
                    columnMetaData.getTotalUncompressedSize(),
                    columnMetaData.getEncodings(),
                    columnMetaData.getStatistics());
        }
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PrintFooter.java

示例3: toParquetMetadata

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public FileMetaData toParquetMetadata(int currentVersion, ParquetMetadata parquetMetadata) {
    List<BlockMetaData> blocks = parquetMetadata.getBlocks();
    List<RowGroup> rowGroups = new ArrayList<RowGroup>();
    int numRows = 0;
    for (BlockMetaData block : blocks) {
        numRows += block.getRowCount();
        addRowGroup(parquetMetadata, rowGroups, block);
    }
    FileMetaData fileMetaData = new FileMetaData(
            currentVersion,
            toParquetSchema(parquetMetadata.getFileMetaData().getSchema()),
            numRows,
            rowGroups);

    Set<Entry<String, String>> keyValues = parquetMetadata.getFileMetaData().getKeyValueMetaData().entrySet();
    for (Entry<String, String> keyValue : keyValues) {
        addKeyValue(fileMetaData, keyValue.getKey(), keyValue.getValue());
    }

    fileMetaData.setCreated_by(parquetMetadata.getFileMetaData().getCreatedBy());
    return fileMetaData;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:23,代码来源:ParquetMetadataConverter.java

示例4: ParquetReader

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public ParquetReader(MessageType fileSchema,
        Map<String, String> extraMetadata,
        MessageType requestedSchema,
        Path file,
        List<BlockMetaData> blocks,
        Configuration configuration)
        throws IOException
{
    this.fileSchema = fileSchema;
    this.extraMetadata = extraMetadata;
    this.requestedSchema = requestedSchema;
    this.file = file;
    this.blocks = blocks;
    this.configuration = configuration;
    this.fileReader = new ParquetFileReader(configuration, file, blocks, requestedSchema.getColumns());
    for (BlockMetaData block : blocks) {
        fileRowCount += block.getRowCount();
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:20,代码来源:ParquetReader.java

示例5: readNextRowGroup

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
/**
 * Reads all the columns requested from the row group at the current file position.
 *
 * @return the PageReadStore which can provide PageReaders for each column.
 * @throws IOException if an error occurs while reading
 */
public PageReadStore readNextRowGroup() throws IOException {
    if (currentBlock == blocks.size()) {
        return null;
    }
    BlockMetaData block = blocks.get(currentBlock);
    if (block.getRowCount() == 0) {
        throw new RuntimeException("Illegal row group of 0 rows");
    }
    ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(block.getRowCount());
    // prepare the list of consecutive chunks to read them in one scan
    List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
    ConsecutiveChunkList currentChunks = null;
    for (ColumnChunkMetaData mc : block.getColumns()) {
        ColumnPath pathKey = mc.getPath();
        BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
        ColumnDescriptor columnDescriptor = paths.get(pathKey);
        if (columnDescriptor != null) {
            long startingPos = mc.getStartingPos();
            // first chunk or not consecutive => new list
            if (currentChunks == null || currentChunks.endPos() != startingPos) {
                currentChunks = new ConsecutiveChunkList(startingPos);
                allChunks.add(currentChunks);
            }
            currentChunks.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int) mc.getTotalSize()));
        }
    }
    // actually read all the chunks
    for (ConsecutiveChunkList consecutiveChunks : allChunks) {
        final List<Chunk> chunks = consecutiveChunks.readAll(f);
        for (Chunk chunk : chunks) {
            columnChunkPageReadStore.addColumn(chunk.descriptor.col, chunk.readAllPages());
        }
    }
    ++currentBlock;
    return columnChunkPageReadStore;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:43,代码来源:ParquetFileReader.java

示例6: addRowGroup

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
        //rowGroup.total_byte_size = ;
        List<ColumnChunkMetaData> columns = block.getColumns();
        List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
            columnChunk.file_path = block.getPath(); // they are in the same file for now
            columnChunk.meta_data = new parquet.format.ColumnMetaData(
                    getType(columnMetaData.getType()),
                    toFormatEncodings(columnMetaData.getEncodings()),
                    Arrays.asList(columnMetaData.getPath().toArray()),
                    columnMetaData.getCodec().getParquetCompressionCodec(),
                    columnMetaData.getValueCount(),
                    columnMetaData.getTotalUncompressedSize(),
                    columnMetaData.getTotalSize(),
                    columnMetaData.getFirstDataPageOffset());
            columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
            if (!columnMetaData.getStatistics().isEmpty()) {
                columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
            }
//      columnChunk.meta_data.index_page_offset = ;
//      columnChunk.meta_data.key_value_metadata = ; // nothing yet

            parquetColumns.add(columnChunk);
        }
        RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
        rowGroups.add(rowGroup);
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:29,代码来源:ParquetMetadataConverter.java

示例7: showDetails

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static void showDetails(PrettyPrintWriter out, BlockMetaData meta, Long num) {
  long rows = meta.getRowCount();
  long tbs = meta.getTotalByteSize();

  out.format("row group%s: RC:%d TS:%d%n", (num == null ? "" : " " + num), rows, tbs);
  out.rule('-');
  showDetails(out, meta.getColumns());
}
 
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:9,代码来源:MetadataUtils.java


注:本文中的parquet.hadoop.metadata.BlockMetaData.getRowCount方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。