当前位置: 首页>>代码示例>>Java>>正文


Java ColumnPath类代码示例

本文整理汇总了Java中parquet.hadoop.metadata.ColumnPath的典型用法代码示例。如果您正苦于以下问题:Java ColumnPath类的具体用法?Java ColumnPath怎么用?Java ColumnPath使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


ColumnPath类属于parquet.hadoop.metadata包,在下文中一共展示了ColumnPath类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: startColumn

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
/**
 * start a column inside a block
 *
 * @param descriptor           the column descriptor
 * @param valueCount           the value count in this column
 * @param statistics           the statistics in this column
 * @param compressionCodecName
 * @throws IOException
 */
public void startColumn(ColumnDescriptor descriptor,
                        long valueCount,
                        CompressionCodecName compressionCodecName) throws IOException {
    state = state.startColumn();
    if (DEBUG) LOG.debug(out.getPos() + ": start column: " + descriptor + " count=" + valueCount);
    currentEncodings = new HashSet<parquet.column.Encoding>();
    currentChunkPath = ColumnPath.get(descriptor.getPath());
    currentChunkType = descriptor.getType();
    currentChunkCodec = compressionCodecName;
    currentChunkValueCount = valueCount;
    currentChunkFirstDataPage = out.getPos();
    compressedLength = 0;
    uncompressedLength = 0;
    // need to know what type of stats to initialize to
    // better way to do this?
    currentStatistics = Statistics.getStatsBasedOnType(currentChunkType);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:27,代码来源:ParquetFileWriter.java

示例2: ParquetFileReader

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
public ParquetFileReader(
        Configuration configuration,
        Path file,
        List<BlockMetaData> blocks,
        List<ColumnDescriptor> columns)
        throws IOException
{
    this.file = file;
    this.inputStream = file.getFileSystem(configuration).open(file);
    this.blocks = blocks;
    if (!blocks.isEmpty()) {
        for (ColumnDescriptor columnDescriptor : columns) {
            for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
                if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
                    columnMetadata.put(columnDescriptor, metadata);
                }
            }
        }
    }
    this.codecFactory = new ParquetCodecFactory(configuration);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:22,代码来源:ParquetFileReader.java

示例3: ParquetFileReader

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
/**
 * @param filePath              the Parquet file (will be opened for read in this constructor)
 * @param blocks         the blocks to read
 * @param columns         the columns to read (their path)
 * //@param  the codec used to compress the blocks
 * @throws IOException if the file can not be opened
 */
public ParquetFileReader(Configuration configuration, Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException {
    this.filePath = filePath;
    FileSystem fs = filePath.getFileSystem(configuration);
    this.f = fs.open(filePath);
    this.blocks = blocks;
    for (ColumnDescriptor col : columns) {
        paths.put(ColumnPath.get(col.getPath()), col);
    }
    this.codecFactory = new CodecFactory(configuration);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:18,代码来源:ParquetFileReader.java

示例4: readNextRowGroup

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
/**
 * Reads all the columns requested from the row group at the current file position.
 *
 * @return the PageReadStore which can provide PageReaders for each column.
 * @throws IOException if an error occurs while reading
 */
public PageReadStore readNextRowGroup() throws IOException {
    if (currentBlock == blocks.size()) {
        return null;
    }
    BlockMetaData block = blocks.get(currentBlock);
    if (block.getRowCount() == 0) {
        throw new RuntimeException("Illegal row group of 0 rows");
    }
    ColumnChunkPageReadStore columnChunkPageReadStore = new ColumnChunkPageReadStore(block.getRowCount());
    // prepare the list of consecutive chunks to read them in one scan
    List<ConsecutiveChunkList> allChunks = new ArrayList<ConsecutiveChunkList>();
    ConsecutiveChunkList currentChunks = null;
    for (ColumnChunkMetaData mc : block.getColumns()) {
        ColumnPath pathKey = mc.getPath();
        BenchmarkCounter.incrementTotalBytes(mc.getTotalSize());
        ColumnDescriptor columnDescriptor = paths.get(pathKey);
        if (columnDescriptor != null) {
            long startingPos = mc.getStartingPos();
            // first chunk or not consecutive => new list
            if (currentChunks == null || currentChunks.endPos() != startingPos) {
                currentChunks = new ConsecutiveChunkList(startingPos);
                allChunks.add(currentChunks);
            }
            currentChunks.addChunk(new ChunkDescriptor(columnDescriptor, mc, startingPos, (int) mc.getTotalSize()));
        }
    }
    // actually read all the chunks
    for (ConsecutiveChunkList consecutiveChunks : allChunks) {
        final List<Chunk> chunks = consecutiveChunks.readAll(f);
        for (Chunk chunk : chunks) {
            columnChunkPageReadStore.addColumn(chunk.descriptor.col, chunk.readAllPages());
        }
    }
    ++currentBlock;
    return columnChunkPageReadStore;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:43,代码来源:ParquetFileReader.java

示例5: makeBlockFromStats

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
public static BlockMetaData makeBlockFromStats(IntStatistics stats, long valueCount) {
    BlockMetaData blockMetaData = new BlockMetaData();

    ColumnChunkMetaData column = ColumnChunkMetaData.get(ColumnPath.get("foo"),
            PrimitiveTypeName.INT32,
            CompressionCodecName.GZIP,
            new HashSet<Encoding>(Arrays.asList(Encoding.PLAIN)),
            stats,
            100l, 100l, valueCount, 100l, 100l);
    blockMetaData.addColumn(column);
    blockMetaData.setTotalByteSize(200l);
    blockMetaData.setRowCount(valueCount);
    return blockMetaData;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:15,代码来源:TestInputFormat.java

示例6: newBlock

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private BlockMetaData newBlock(long start, long compressedBlockSize) {
    BlockMetaData blockMetaData = new BlockMetaData();
    long uncompressedSize = compressedBlockSize * 2;//assuming the compression ratio is 2
    ColumnChunkMetaData column = ColumnChunkMetaData.get(ColumnPath.get("foo"),
            PrimitiveTypeName.BINARY,
            CompressionCodecName.GZIP,
            new HashSet<Encoding>(Arrays.asList(Encoding.PLAIN)),
            new BinaryStatistics(),
            start, 0l, 0l, compressedBlockSize, uncompressedSize);
    blockMetaData.addColumn(column);
    blockMetaData.setTotalByteSize(uncompressedSize);
    return blockMetaData;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:14,代码来源:TestInputFormat.java

示例7: getIntColumnMeta

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private static ColumnChunkMetaData getIntColumnMeta(IntStatistics stats, long valueCount) {
    return ColumnChunkMetaData.get(ColumnPath.get("int", "column"),
            PrimitiveTypeName.INT32,
            CompressionCodecName.GZIP,
            new HashSet<Encoding>(Arrays.asList(Encoding.PLAIN)),
            stats,
            0L, 0L, valueCount, 0L, 0L);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:9,代码来源:TestStatisticsFilter.java

示例8: getDoubleColumnMeta

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private static ColumnChunkMetaData getDoubleColumnMeta(DoubleStatistics stats, long valueCount) {
    return ColumnChunkMetaData.get(ColumnPath.get("double", "column"),
            PrimitiveTypeName.DOUBLE,
            CompressionCodecName.GZIP,
            new HashSet<Encoding>(Arrays.asList(Encoding.PLAIN)),
            stats,
            0L, 0L, valueCount, 0L, 0L);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:9,代码来源:TestStatisticsFilter.java

示例9: getDictionariesByColumnOrdinal

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private static Map<Integer, ParquetDictionaryDescriptor> getDictionariesByColumnOrdinal(
        BlockMetaData blockMetadata,
        Path path,
        Configuration configuration,
        MessageType requestedSchema,
        TupleDomain<HiveColumnHandle> effectivePredicate)
{
    // todo should we call release?
    ParquetCodecFactory codecFactory = new ParquetCodecFactory(configuration);

    ImmutableMap.Builder<Integer, ParquetDictionaryDescriptor> dictionaries = ImmutableMap.builder();
    for (int ordinal = 0; ordinal < blockMetadata.getColumns().size(); ordinal++) {
        ColumnChunkMetaData columnChunkMetaData = blockMetadata.getColumns().get(ordinal);

        for (int i = 0; i < requestedSchema.getColumns().size(); i++) {
            ColumnDescriptor columnDescriptor = requestedSchema.getColumns().get(i);
            if (isColumnPredicate(columnDescriptor, effectivePredicate) &&
                    columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath())) &&
                    isOnlyDictionaryEncodingPages(columnChunkMetaData.getEncodings())) {
                DictionaryPage dictionaryPage;
                try (FSDataInputStream inputStream = path.getFileSystem(configuration).open(path)) {
                    inputStream.seek(columnChunkMetaData.getStartingPos());

                    int totalSize = Ints.checkedCast(columnChunkMetaData.getTotalSize());
                    byte[] buffer = new byte[totalSize];
                    inputStream.readFully(buffer);

                    dictionaryPage = readDictionaryPage(buffer, codecFactory, columnChunkMetaData.getCodec());
                    dictionaries.put(ordinal, new ParquetDictionaryDescriptor(columnDescriptor, dictionaryPage));
                }
                catch (IOException ignored) {
                }
                break;
            }
        }
    }
    return dictionaries.build();
}
 
开发者ID:y-lan,项目名称:presto,代码行数:39,代码来源:ParquetPredicateUtils.java

示例10: getColumnChunk

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private ColumnChunkMetaData getColumnChunk(ColumnPath columnPath) {
    ColumnChunkMetaData c = columns.get(columnPath);
    checkArgument(c != null, "Column " + columnPath.toDotString() + " not found in schema!");
    return c;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:6,代码来源:StatisticsFilter.java

示例11: fromParquetMetadata

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
public ParquetMetadata fromParquetMetadata(FileMetaData parquetMetadata) throws IOException {
    MessageType messageType = fromParquetSchema(parquetMetadata.getSchema());
    List<BlockMetaData> blocks = new ArrayList<BlockMetaData>();
    List<RowGroup> row_groups = parquetMetadata.getRow_groups();
    if (row_groups != null) {
        for (RowGroup rowGroup : row_groups) {
            BlockMetaData blockMetaData = new BlockMetaData();
            blockMetaData.setRowCount(rowGroup.getNum_rows());
            blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
            List<ColumnChunk> columns = rowGroup.getColumns();
            String filePath = columns.get(0).getFile_path();
            for (ColumnChunk columnChunk : columns) {
                if ((filePath == null && columnChunk.getFile_path() != null)
                        || (filePath != null && !filePath.equals(columnChunk.getFile_path()))) {
                    throw new ParquetDecodingException("all column chunks of the same row group must be in the same file for now");
                }
                parquet.format.ColumnMetaData metaData = columnChunk.meta_data;
                ColumnPath path = getPath(metaData);
                ColumnChunkMetaData column = ColumnChunkMetaData.get(
                        path,
                        messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName(),
                        CompressionCodecName.fromParquet(metaData.codec),
                        fromFormatEncodings(metaData.encodings),
                        fromParquetStatistics(metaData.statistics, messageType.getType(path.toArray()).asPrimitiveType().getPrimitiveTypeName()),
                        metaData.data_page_offset,
                        metaData.dictionary_page_offset,
                        metaData.num_values,
                        metaData.total_compressed_size,
                        metaData.total_uncompressed_size);
                // TODO
                // index_page_offset
                // key_value_metadata
                blockMetaData.addColumn(column);
            }
            blockMetaData.setPath(filePath);
            blocks.add(blockMetaData);
        }
    }
    Map<String, String> keyValueMetaData = new HashMap<String, String>();
    List<KeyValue> key_value_metadata = parquetMetadata.getKey_value_metadata();
    if (key_value_metadata != null) {
        for (KeyValue keyValue : key_value_metadata) {
            keyValueMetaData.put(keyValue.key, keyValue.value);
        }
    }
    return new ParquetMetadata(
            new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, parquetMetadata.getCreated_by()),
            blocks);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:50,代码来源:ParquetMetadataConverter.java

示例12: getPath

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
private ColumnPath getPath(parquet.format.ColumnMetaData metaData) {
    String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]);
    return ColumnPath.get(path);
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:5,代码来源:ParquetMetadataConverter.java

示例13: readFooter

import parquet.hadoop.metadata.ColumnPath; //导入依赖的package包/类
public static ParquetMetadata readFooter(Configuration configuration, Path file)
        throws IOException
{
    FileSystem fileSystem = file.getFileSystem(configuration);
    FileStatus fileStatus = fileSystem.getFileStatus(file);
    try (FSDataInputStream inputStream = fileSystem.open(file)) {
        // Parquet File Layout:
        //
        // MAGIC
        // variable: Data
        // variable: Metadata
        // 4 bytes: MetadataLength
        // MAGIC

        long length = fileStatus.getLen();
        checkArgument(length >= MAGIC.length + PARQUET_METADATA_LENGTH + MAGIC.length, "%s is not a valid Parquet File", file);
        long metadataLengthIndex = length - PARQUET_METADATA_LENGTH - MAGIC.length;

        inputStream.seek(metadataLengthIndex);
        int metadataLength = readIntLittleEndian(inputStream);

        byte[] magic = new byte[MAGIC.length];
        inputStream.readFully(magic);
        checkArgument(Arrays.equals(MAGIC, magic),
                "Not valid Parquet file: %s expected magic number: %s got: %s", file, Arrays.toString(MAGIC), Arrays.toString(magic));

        long metadataIndex = metadataLengthIndex - metadataLength;
        checkArgument(metadataIndex >= MAGIC.length && metadataIndex < metadataLengthIndex,
                "Corrupted Parquet file: %s metadata index: %s out of range", file, metadataIndex);
        inputStream.seek(metadataIndex);
        FileMetaData fileMetaData = readFileMetaData(inputStream);
        List<SchemaElement> schema = fileMetaData.getSchema();
        checkArgument(!schema.isEmpty(), "Empty Parquet schema in file: %s", file);

        MessageType messageType = readParquetSchema(schema);
        List<BlockMetaData> blocks = new ArrayList<>();
        List<RowGroup> rowGroups = fileMetaData.getRow_groups();
        if (rowGroups != null) {
            for (RowGroup rowGroup : rowGroups) {
                BlockMetaData blockMetaData = new BlockMetaData();
                blockMetaData.setRowCount(rowGroup.getNum_rows());
                blockMetaData.setTotalByteSize(rowGroup.getTotal_byte_size());
                List<ColumnChunk> columns = rowGroup.getColumns();
                checkArgument(!columns.isEmpty(), "No columns in row group: %s", rowGroup);
                String filePath = columns.get(0).getFile_path();
                for (ColumnChunk columnChunk : columns) {
                    checkArgument((filePath == null && columnChunk.getFile_path() == null)
                                    || (filePath != null && filePath.equals(columnChunk.getFile_path())),
                            "all column chunks of the same row group must be in the same file");
                    ColumnMetaData metaData = columnChunk.meta_data;
                    String[] path = metaData.path_in_schema.toArray(new String[metaData.path_in_schema.size()]);
                    ColumnPath columnPath = ColumnPath.get(path);
                    ColumnChunkMetaData column = ColumnChunkMetaData.get(
                            columnPath,
                            messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName(),
                            CompressionCodecName.fromParquet(metaData.codec),
                            readEncodings(metaData.encodings),
                            readStats(metaData.statistics, messageType.getType(columnPath.toArray()).asPrimitiveType().getPrimitiveTypeName()),
                            metaData.data_page_offset,
                            metaData.dictionary_page_offset,
                            metaData.num_values,
                            metaData.total_compressed_size,
                            metaData.total_uncompressed_size);
                    blockMetaData.addColumn(column);
                }
                blockMetaData.setPath(filePath);
                blocks.add(blockMetaData);
            }
        }

        Map<String, String> keyValueMetaData = new HashMap<>();
        List<KeyValue> keyValueList = fileMetaData.getKey_value_metadata();
        if (keyValueList != null) {
            for (KeyValue keyValue : keyValueList) {
                keyValueMetaData.put(keyValue.key, keyValue.value);
            }
        }
        return new ParquetMetadata(new parquet.hadoop.metadata.FileMetaData(messageType, keyValueMetaData, fileMetaData.getCreated_by()), blocks);
    }
}
 
开发者ID:y-lan,项目名称:presto,代码行数:81,代码来源:ParquetMetadataReader.java


注:本文中的parquet.hadoop.metadata.ColumnPath类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。