当前位置: 首页>>代码示例>>Java>>正文


Java ColumnChunkMetaData类代码示例

本文整理汇总了Java中parquet.hadoop.metadata.ColumnChunkMetaData的典型用法代码示例。如果您正苦于以下问题:Java ColumnChunkMetaData类的具体用法?Java ColumnChunkMetaData怎么用?Java ColumnChunkMetaData使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ColumnChunkMetaData类属于parquet.hadoop.metadata包,在下文中一共展示了ColumnChunkMetaData类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: PageReader

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
PageReader(ColumnReader<?> parentStatus, FileSystem fs, Path path, ColumnChunkMetaData columnChunkMetaData)
  throws ExecutionSetupException{
  this.parentColumnReader = parentStatus;
  allocatedDictionaryBuffers = new ArrayList<ByteBuf>();
  codecFactory = parentColumnReader.parentReader.getCodecFactory();

  long start = columnChunkMetaData.getFirstDataPageOffset();
  try {
    FSDataInputStream f = fs.open(path);
    this.dataReader = new ColumnDataReader(f, start, columnChunkMetaData.getTotalSize());
    loadDictionaryIfExists(parentStatus, columnChunkMetaData, f);

  } catch (IOException e) {
    throw new ExecutionSetupException("Error opening or reading metadata for parquet file at location: "
        + path.getName(), e);
  }

}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:19,代码来源:PageReader.java

示例2: ColumnReader

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
protected ColumnReader(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
    ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
  this.parentReader = parentReader;
  this.columnDescriptor = descriptor;
  this.columnChunkMetaData = columnChunkMetaData;
  this.isFixedLength = fixedLength;
  this.schemaElement = schemaElement;
  this.valueVec =  v;
  this.pageReader = new PageReader(this, parentReader.getFileSystem(), parentReader.getHadoopPath(), columnChunkMetaData);

  if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
    if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
      dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
    } else {
      dataTypeLengthInBits = ParquetRecordReader.getTypeLengthInBits(columnDescriptor.getType());
    }
  }

}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:20,代码来源:ColumnReader.java

示例3: add

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private static void add(ParquetMetadata footer) {
    for (BlockMetaData blockMetaData : footer.getBlocks()) {
        ++blockCount;
        MessageType schema = footer.getFileMetaData().getSchema();
        recordCount += blockMetaData.getRowCount();
        List<ColumnChunkMetaData> columns = blockMetaData.getColumns();
        for (ColumnChunkMetaData columnMetaData : columns) {
            ColumnDescriptor desc = schema.getColumnDescription(columnMetaData.getPath().toArray());
            add(
                    desc,
                    columnMetaData.getValueCount(),
                    columnMetaData.getTotalSize(),
                    columnMetaData.getTotalUncompressedSize(),
                    columnMetaData.getEncodings(),
                    columnMetaData.getStatistics());
        }
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:19,代码来源:PrintFooter.java

示例4: endColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
/**
 * end a column (once all rep, def and data have been written)
 *
 * @throws IOException
 */
public void endColumn() throws IOException {
    state = state.endColumn();
    if (DEBUG) LOG.debug(out.getPos() + ": end column");
    currentBlock.addColumn(ColumnChunkMetaData.get(
            currentChunkPath,
            currentChunkType,
            currentChunkCodec,
            currentEncodings,
            currentStatistics,
            currentChunkFirstDataPage,
            currentChunkDictionaryPageOffset,
            currentChunkValueCount,
            compressedLength,
            uncompressedLength));
    if (DEBUG) LOG.info("ended Column chumk: " + currentColumn);
    currentColumn = null;
    this.currentBlock.setTotalByteSize(currentBlock.getTotalByteSize() + uncompressedLength);
    this.uncompressedLength = 0;
    this.compressedLength = 0;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:26,代码来源:ParquetFileWriter.java

示例5: visit

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Override
public <T extends Comparable<T>> Boolean visit(Lt<T> lt) {
    Column<T> filterColumn = lt.getColumn();
    T value = lt.getValue();
    ColumnChunkMetaData columnChunk = getColumnChunk(filterColumn.getColumnPath());
    Statistics<T> stats = columnChunk.getStatistics();

    if (stats.isEmpty()) {
        // we have no statistics available, we cannot drop any chunks
        return false;
    }

    if (isAllNulls(columnChunk)) {
        // we are looking for records where v < someValue
        // this chunk is all nulls, so we can drop it
        return true;
    }

    // drop if value <= min
    return value.compareTo(stats.genericGetMin()) <= 0;
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:22,代码来源:StatisticsFilter.java

示例6: testClearExceptionForNots

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
@Test
public void testClearExceptionForNots() {
    List<ColumnChunkMetaData> columnMetas = Arrays.asList(
            getDoubleColumnMeta(new DoubleStatistics(), 0L),
            getIntColumnMeta(new IntStatistics(), 0L));

    FilterPredicate pred = and(not(eq(doubleColumn, 12.0)), eq(intColumn, 17));

    try {
        canDrop(pred, columnMetas);
        fail("This should throw");
    } catch (IllegalArgumentException e) {
        assertEquals("This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter?"
                + " not(eq(double.column, 12.0))", e.getMessage());
    }
}
 
开发者ID:grokcoder,项目名称:pbase,代码行数:17,代码来源:TestStatisticsFilter.java

示例7: ParquetFileReader

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public ParquetFileReader(
        Configuration configuration,
        Path file,
        List<BlockMetaData> blocks,
        List<ColumnDescriptor> columns)
        throws IOException
{
    this.file = file;
    this.inputStream = file.getFileSystem(configuration).open(file);
    this.blocks = blocks;
    if (!blocks.isEmpty()) {
        for (ColumnDescriptor columnDescriptor : columns) {
            for (ColumnChunkMetaData metadata : blocks.get(0).getColumns()) {
                if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
                    columnMetadata.put(columnDescriptor, metadata);
                }
            }
        }
    }
    this.codecFactory = new ParquetCodecFactory(configuration);
}
 
开发者ID:y-lan,项目名称:presto,代码行数:22,代码来源:ParquetFileReader.java

示例8: showDetails

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
public static void showDetails(PrettyPrintWriter out, List<ColumnChunkMetaData> ccmeta) {
  Map<String,Object> chunks = new LinkedHashMap<String,Object>();
  for (ColumnChunkMetaData cmeta : ccmeta) {
    String[] path = cmeta.getPath().toArray();

    Map<String,Object> current = chunks;
    for (int i = 0; i < path.length - 1; ++i) {
      String next = path[i];
      if (!current.containsKey(next)) {
        current.put(next, new LinkedHashMap<String,Object>());
      }

      current = (Map<String,Object>)current.get(next);
    }

    current.put(path[path.length - 1], cmeta);
  }

  showColumnChunkDetails(out, chunks, 0);
}
 
开发者ID:wesleypeck,项目名称:parquet-tools,代码行数:21,代码来源:MetadataUtils.java

示例9: VarCharColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
              ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarCharVector v,
              SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  varCharVector = v;
  mutator = v.getMutator();
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java

示例10: NullableVarCharColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarCharColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                      ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarCharVector v,
                      SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  vector = v;
  this.mutator = vector.getMutator();
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java

示例11: VarBinaryColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, VarBinaryVector v,
                SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  varBinaryVector = v;
  mutator = v.getMutator();
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java

示例12: NullableVarBinaryColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
NullableVarBinaryColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                        ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, NullableVarBinaryVector v,
                        SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  nullableVarBinaryVector = v;
  mutator = v.getMutator();
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:8,代码来源:VarLengthColumnReaders.java

示例13: VarLengthColumn

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
VarLengthColumn(ParquetRecordReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
                SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
    if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
      usingDictionary = true;
    }
    else {
      usingDictionary = false;
    }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:12,代码来源:VarLengthColumn.java

示例14: loadDictionaryIfExists

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
private void loadDictionaryIfExists(final ColumnReader<?> parentStatus,
    final ColumnChunkMetaData columnChunkMetaData, final FSDataInputStream f) throws IOException {
  if (columnChunkMetaData.getDictionaryPageOffset() > 0) {
    f.seek(columnChunkMetaData.getDictionaryPageOffset());
    final PageHeader pageHeader = Util.readPageHeader(f);
    assert pageHeader.type == PageType.DICTIONARY_PAGE;

    final DrillBuf dictionaryData = allocateDictionaryBuffer(pageHeader.getUncompressed_page_size());

    if (parentColumnReader.columnChunkMetaData.getCodec() == CompressionCodecName.UNCOMPRESSED) {
      dataReader.loadPage(dictionaryData, pageHeader.compressed_page_size);
    } else {
      final DrillBuf compressedData = allocateTemporaryBuffer(pageHeader.compressed_page_size);
      try {
        dataReader.loadPage(compressedData, pageHeader.compressed_page_size);
        DirectBytesDecompressor decompressor = codecFactory.getDecompressor(parentColumnReader.columnChunkMetaData
            .getCodec());
        decompressor.decompress(
            compressedData,
            pageHeader.compressed_page_size,
            dictionaryData,
            pageHeader.getUncompressed_page_size());

      } finally {
        compressedData.release();
      }
    }

    DictionaryPage page = new DictionaryPage(
        asBytesInput(dictionaryData, 0, pageHeader.uncompressed_page_size),
        pageHeader.uncompressed_page_size,
        pageHeader.dictionary_page_header.num_values,
        parquet.column.Encoding.valueOf(pageHeader.dictionary_page_header.encoding.name())
        );
    this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
  }
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:38,代码来源:PageReader.java

示例15: FixedWidthRepeatedReader

import parquet.hadoop.metadata.ColumnChunkMetaData; //导入依赖的package包/类
FixedWidthRepeatedReader(ParquetRecordReader parentReader, ColumnReader dataReader, int dataTypeLengthInBytes, int allocateSize, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, RepeatedValueVector valueVector, SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, valueVector, schemaElement);
  this.castedRepeatedVector = valueVector;
  this.dataTypeLengthInBytes = dataTypeLengthInBytes;
  this.dataReader = dataReader;
  this.dataReader.pageReader.clear();
  this.dataReader.pageReader = this.pageReader;
  // this is not in the reset method because it needs to be initialized only for the very first page read
  // in all other cases if a read ends at a page boundary we will need to keep track of this flag and not
  // clear it at the start of the next read loop
  notFishedReadingList = false;
}
 
开发者ID:skhalifa,项目名称:QDrill,代码行数:13,代码来源:FixedWidthRepeatedReader.java


注:本文中的parquet.hadoop.metadata.ColumnChunkMetaData类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。