当前位置: 首页>>代码示例>>Java>>正文


Java BlockMetaData.getColumns方法代码示例

本文整理汇总了Java中parquet.hadoop.metadata.BlockMetaData.getColumns方法的典型用法代码示例。如果您正苦于以下问题:Java BlockMetaData.getColumns方法的具体用法?Java BlockMetaData.getColumns怎么用?Java BlockMetaData.getColumns使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parquet.hadoop.metadata.BlockMetaData的用法示例。


在下文中一共展示了BlockMetaData.getColumns方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: add

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static void add(ParquetMetadata footer) {
    for (BlockMetaData blockMetaData : footer.getBlocks()) {
        ++blockCount;
        MessageType schema = footer.getFileMetaData().getSchema();
        recordCount += blockMetaData.getRowCount();
        List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
            add(
                    desc,
                    columnMetaData.getValueCount(),
                    columnMetaData.getTotalSize(),
                    columnMetaData.getTotalUncompressedSize(),
                    columnMetaData.getEncodings(),
                    columnMetaData.getStatistics());
        }
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PrintFooter.java

示例2: readNextRowGroup

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
/**
 * Reads all the columns requested from the row group at the current file position.
 *
 * @return the PageReadStore which can provide PageReaders for each column.
 * @throws IOException if an error occurs while reading
 */
public PageReadStore readNextRowGroup() throws IOException {
    if (currentBlock == blocks.size()) {
        return null;
    }
    BlockMetaData block = blocks.get(currentBlock);
    if (block.getRowCount() == 0) {
        throw new RuntimeException("Illegal row group of 0 rows");
    }
    ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(block.getRowCount());
    // prepare the list of consecutive chunks to read them in one scan
    List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
    ConsecutiveChunkList currentChunks = null;
    for (ColumnChunkMetaData mc : block.getColumns()) {
        ColumnPath pathKey = mc.getPath();
        BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
        ColumnDescriptor columnDescriptor = paths.get(pathKey);
        if (columnDescriptor != null) {
            long startingPos = mc.getStartingPos();
            // first chunk or not consecutive => new list
            if (currentChunks == null || currentChunks.endPos() != startingPos) {
                currentChunks = new ConsecutiveChunkList(startingPos);
                allChunks.add(currentChunks);
            }
            currentChunks.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int) mc.getTotalSize()));
        }
    }
    // actually read all the chunks
    for (ConsecutiveChunkList consecutiveChunks : allChunks) {
        final List<Chunk> chunks = consecutiveChunks.readAll(f);
        for (Chunk chunk : chunks) {
            columnChunkPageReadStore.addColumn(chunk.descriptor.col, chunk.readAllPages());
        }
    }
    ++currentBlock;
    return columnChunkPageReadStore;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:43,代码来源:ParquetFileReader.java

示例3: getParquetInputSplit

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
public ParquetInputSplit getParquetInputSplit(FileStatus fileStatus, String requestedSchema, Map<String, String> readSupportMetadata) throws IOException {
    MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
    long length = 0;

    for (BlockMetaData block : this.getRowGroups()) {
        List<ColumnChunkMetaData> columns = block.getColumns();
        for (ColumnChunkMetaData column : columns) {
            if (requested.containsPath(column.getPath().toArray())) {
                length += column.getTotalSize();
            }
        }
    }

    BlockMetaData lastRowGroup = this.getRowGroups().get(this.getRowGroupCount() - 1);
    long end = lastRowGroup.getStartingPos() + lastRowGroup.getTotalByteSize();

    long[] rowGroupOffsets = new long[this.getRowGroupCount()];
    for (int i = 0; i < rowGroupOffsets.length; i++) {
        rowGroupOffsets[i] = this.getRowGroups().get(i).getStartingPos();
    }

    return new ParquetInputSplit(
            fileStatus.getPath(),
            hdfsBlock.getOffset(),
            end,
            length,
            hdfsBlock.getHosts(),
            rowGroupOffsets
    );
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:31,代码来源:ParquetInputFormat.java

示例4: end

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private static long end(List<BlockMetaData> blocks, String requestedSchema) {
    MessageType requested = MessageTypeParser.parseMessageType(requestedSchema);
    long length = 0;

    for (BlockMetaData block : blocks) {
        List<ColumnChunkMetaData> columns = block.getColumns();
        for (ColumnChunkMetaData column : columns) {
            if (requested.containsPath(column.getPath().toArray())) {
                length += column.getTotalSize();
            }
        }
    }
    return length;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:ParquetInputSplit.java

示例5: addRowGroup

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
private void addRowGroup(ParquetMetadata parquetMetadata, List<RowGroup> rowGroups, BlockMetaData block) {
        //rowGroup.total_byte_size = ;
        List<ColumnChunkMetaData> columns = block.getColumns();
        List<ColumnChunk> parquetColumns = new ArrayList<ColumnChunk>();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnChunk columnChunk = new ColumnChunk(columnMetaData.getFirstDataPageOffset()); // verify this is the right offset
            columnChunk.file_path = block.getPath(); // they are in the same file for now
            columnChunk.meta_data = new parquet.format.ColumnMetaData(
                    getType(columnMetaData.getType()),
                    toFormatEncodings(columnMetaData.getEncodings()),
                    Arrays.asList(columnMetaData.getPath().toArray()),
                    columnMetaData.getCodec().getParquetCompressionCodec(),
                    columnMetaData.getValueCount(),
                    columnMetaData.getTotalUncompressedSize(),
                    columnMetaData.getTotalSize(),
                    columnMetaData.getFirstDataPageOffset());
            columnChunk.meta_data.dictionary_page_offset = columnMetaData.getDictionaryPageOffset();
            if (!columnMetaData.getStatistics().isEmpty()) {
                columnChunk.meta_data.setStatistics(toParquetStatistics(columnMetaData.getStatistics()));
            }
//      columnChunk.meta_data.index_page_offset = ;
//      columnChunk.meta_data.key_value_metadata = ; // nothing yet

            parquetColumns.add(columnChunk);
        }
        RowGroup rowGroup = new RowGroup(parquetColumns, block.getTotalByteSize(), block.getRowCount());
        rowGroups.add(rowGroup);
    }
 
开发者ID:grokcoder,项目名称:pbase,代码行数:29,代码来源:ParquetMetadataConverter.java

示例6: test

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
    Configuration conf = new Configuration();
    Path root = new Path("target/tests/TestParquetWriter/");
    FileSystem fs = root.getFileSystem(conf);
    if (fs.exists(root)) {
        fs.delete(root, true);
    }
    fs.mkdirs(root);
    MessageType schema = parseMessageType(
            "message test { "
                    + "required binary binary_field; "
                    + "required int32 int32_field; "
                    + "required int64 int64_field; "
                    + "required boolean boolean_field; "
                    + "required float float_field; "
                    + "required double double_field; "
                    + "required fixed_len_byte_array(3) flba_field; "
                    + "required int96 int96_field; "
                    + "optional binary null_field; "
                    + "} ");
    GroupWriteSupport.setSchema(schema, conf);
    SimpleGroupFactory f = new SimpleGroupFactory(schema);
    Map<String, Encoding> expected = new HashMap<String, Encoding>();
    expected.put("10-" + PARQUET_1_0, PLAIN_DICTIONARY);
    expected.put("1000-" + PARQUET_1_0, PLAIN);
    expected.put("10-" + PARQUET_2_0, RLE_DICTIONARY);
    expected.put("1000-" + PARQUET_2_0, DELTA_BYTE_ARRAY);
    for (int modulo : asList(10, 1000)) {
        for (WriterVersion version : WriterVersion.values()) {
            Path file = new Path(root, version.name() + "_" + modulo);
            ParquetWriter<Group> writer = new ParquetWriter<Group>(
                    file,
                    new GroupWriteSupport(null),
                    UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
            for (int i = 0; i < 1000; i++) {
                writer.write(
                        f.newGroup()
                                .append("binary_field", "test" + (i % modulo))
                                .append("int32_field", 32)
                                .append("int64_field", 64l)
                                .append("boolean_field", true)
                                .append("float_field", 1.0f)
                                .append("double_field", 2.0d)
                                .append("flba_field", "foo")
                                .append("int96_field", Binary.fromByteArray(new byte[12])));
            }
            writer.close();

            ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).build();
            for (int i = 0; i < 1000; i++) {
                Group group = reader.read();
                assertEquals("test" + (i % modulo), group.getBinary("binary_field", 0).toStringUsingUTF8());
                assertEquals(32, group.getInteger("int32_field", 0));
                assertEquals(64l, group.getLong("int64_field", 0));
                assertEquals(true, group.getBoolean("boolean_field", 0));
                assertEquals(1.0f, group.getFloat("float_field", 0), 0.001);
                assertEquals(2.0d, group.getDouble("double_field", 0), 0.001);
                assertEquals("foo", group.getBinary("flba_field", 0).toStringUsingUTF8());
                assertEquals(Binary.fromByteArray(new byte[12]), group.getInt96("int96_field", 0));
                assertEquals(0, group.getFieldRepetitionCount("null_field"));
            }
            reader.close();
            ParquetMetadata footer = readFooter(conf, file, NO_FILTER);
            for (BlockMetaData blockMetaData : footer.getBlocks()) {
                for (ColumnChunkMetaData column : blockMetaData.getColumns()) {
                    if (column.getPath().toDotString().equals("binary_field")) {
                        String key = modulo + "-" + version;
                        Encoding expectedEncoding = expected.get(key);
                        assertTrue(
                                key + ":" + column.getEncodings() + " should contain " + expectedEncoding,
                                column.getEncodings().contains(expectedEncoding));
                    }
                }
            }
        }
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:79,代码来源:TestParquetWriterNewPage.java

示例7: test

import parquet.hadoop.metadata.BlockMetaData; //导入方法依赖的package包/类
@Test
public void test() throws Exception {
    Configuration conf = new Configuration();
    Path root = new Path("target/tests/TestParquetWriter/");
    enforceEmptyDir(conf, root);
    MessageType schema = parseMessageType(
            "message test { "
                    + "required binary binary_field; "
                    + "required int32 int32_field; "
                    + "required int64 int64_field; "
                    + "required boolean boolean_field; "
                    + "required float float_field; "
                    + "required double double_field; "
                    + "required fixed_len_byte_array(3) flba_field; "
                    + "required int96 int96_field; "
                    + "} ");
    GroupWriteSupport.setSchema(schema, conf);
    SimpleGroupFactory f = new SimpleGroupFactory(schema);
    Map<String, Encoding> expected = new HashMap<String, Encoding>();
    expected.put("10-" + PARQUET_1_0, PLAIN_DICTIONARY);
    expected.put("1000-" + PARQUET_1_0, PLAIN);
    expected.put("10-" + PARQUET_2_0, RLE_DICTIONARY);
    expected.put("1000-" + PARQUET_2_0, DELTA_BYTE_ARRAY);
    for (int modulo : asList(10, 1000)) {
        for (WriterVersion version : WriterVersion.values()) {
            Path file = new Path(root, version.name() + "_" + modulo);
            ParquetWriter<Group> writer = new ParquetWriter<Group>(
                    file,
                    new GroupWriteSupport(null),
                    UNCOMPRESSED, 1024, 1024, 512, true, false, version, conf);
            for (int i = 0; i < 1000; i++) {
                writer.write(
                        f.newGroup()
                                .append("binary_field", "test" + (i % modulo))
                                .append("int32_field", 32)
                                .append("int64_field", 64l)
                                .append("boolean_field", true)
                                .append("float_field", 1.0f)
                                .append("double_field", 2.0d)
                                .append("flba_field", "foo")
                                .append("int96_field", Binary.fromByteArray(new byte[12])));
            }
            writer.close();
            ParquetReader<Group> reader = ParquetReader.builder(new GroupReadSupport(), file).withConf(conf).build();
            for (int i = 0; i < 1000; i++) {
                Group group = reader.read();
                assertEquals("test" + (i % modulo), group.getBinary("binary_field", 0).toStringUsingUTF8());
                assertEquals(32, group.getInteger("int32_field", 0));
                assertEquals(64l, group.getLong("int64_field", 0));
                assertEquals(true, group.getBoolean("boolean_field", 0));
                assertEquals(1.0f, group.getFloat("float_field", 0), 0.001);
                assertEquals(2.0d, group.getDouble("double_field", 0), 0.001);
                assertEquals("foo", group.getBinary("flba_field", 0).toStringUsingUTF8());
                assertEquals(Binary.fromByteArray(new byte[12]), group.getInt96("int96_field", 0));
            }
            reader.close();
            ParquetMetadata footer = readFooter(conf, file, NO_FILTER);
            for (BlockMetaData blockMetaData : footer.getBlocks()) {
                for (ColumnChunkMetaData column : blockMetaData.getColumns()) {
                    if (column.getPath().toDotString().equals("binary_field")) {
                        String key = modulo + "-" + version;
                        Encoding expectedEncoding = expected.get(key);
                        assertTrue(
                                key + ":" + column.getEncodings() + " should contain " + expectedEncoding,
                                column.getEncodings().contains(expectedEncoding));
                    }
                }
            }
        }
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:72,代码来源:TestParquetWriter.java


注:本文中的parquet.hadoop.metadata.BlockMetaData.getColumns方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。