当前位置: 首页>>代码示例>>Java>>正文


Java PageReadStore.getRowCount方法代码示例

本文整理汇总了Java中org.apache.parquet.column.page.PageReadStore.getRowCount方法的典型用法代码示例。如果您正苦于以下问题:Java PageReadStore.getRowCount方法的具体用法?Java PageReadStore.getRowCount怎么用?Java PageReadStore.getRowCount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.parquet.column.page.PageReadStore的用法示例。


在下文中一共展示了PageReadStore.getRowCount方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: validatePages

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
public static void validatePages(Path file, List<?> expectedValues) throws IOException {
  List<PageReadStore> blockReaders = readBlocksFromFile(file);
  MessageType fileSchema = readSchemaFromFile(file);
  int rowGroupID = 0;
  int rowsRead = 0;
  for (PageReadStore pageReadStore : blockReaders) {
    for (ColumnDescriptor columnsDesc : fileSchema.getColumns()) {
      List<DataPage> pageGroup = getPageGroupForColumn(pageReadStore, columnsDesc);
      DictionaryPage dictPage = reusableCopy(getDictionaryPageForColumn(pageReadStore, columnsDesc));

      List<?> expectedRowGroupValues = expectedValues.subList(rowsRead, (int)(rowsRead + pageReadStore.getRowCount()));
      validateFirstToLast(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
      validateLastToFirst(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
    }

    rowsRead += pageReadStore.getRowCount();
    rowGroupID++;
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:20,代码来源:FileEncodingsIT.java

示例2: checkRead

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException
{
    if (current == totalCountLoadedSoFar) {
        PageReadStore pages = reader.readNextRowGroup();
        if (pages == null) {
            throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
        }

        MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
        recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
        totalCountLoadedSoFar += pages.getRowCount();
        ++ currentBlock;
    }
}
 
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:15,代码来源:ParquetRowReader.java

示例3: load

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
public ITable load() {
    try {
        Configuration conf = new Configuration();
        System.setProperty("hadoop.home.dir", "/");
        conf.set("hadoop.security.authentication", "simple");
        conf.set("hadoop.security.authorization", "false");
        Path path = new Path(this.filename);
        ParquetMetadata md = ParquetFileReader.readFooter(conf, path,
                ParquetMetadataConverter.NO_FILTER);
        MessageType schema = md.getFileMetaData().getSchema();
        ParquetFileReader r = new ParquetFileReader(conf, path, md);
        IAppendableColumn[] cols = this.createColumns(md);
        MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);

        PageReadStore pages;
        while (null != (pages = r.readNextRowGroup())) {
            final long rows = pages.getRowCount();
            RecordReader<Group> recordReader = columnIO.getRecordReader(
                    pages, new GroupRecordConverter(schema));
            for (int i = 0; i < rows; i++) {
                Group g = recordReader.read();
                appendGroup(cols, g, md.getFileMetaData().getSchema().getColumns());
            }
        }

        for (IAppendableColumn c: cols)
            c.seal();
        return new Table(cols);
    } catch (IOException ex) {
        throw new RuntimeException(ex);
    }
}
 
开发者ID:vmware,项目名称:hillview,代码行数:33,代码来源:ParquetReader.java

示例4: checkRead

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
  if (current == totalCountLoadedSoFar) {
    if (current != 0) {
      totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
      if (Log.DEBUG) {
        LOG.debug("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
        final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
        if (totalTime != 0) {
          final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
          final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
          LOG.debug("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
        }
      }
    }

    if (Log.DEBUG) LOG.debug("at row " + current + ". reading next block");
    long t0 = System.currentTimeMillis();
    PageReadStore pages = reader.readNextRowGroup();
    if (pages == null) {
      throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
    }
    long timeSpentReading = System.currentTimeMillis() - t0;
    totalTimeSpentReadingBytes += timeSpentReading;
    BenchmarkCounter.incrementTime(timeSpentReading);
    if (Log.INFO) LOG.info("block read in memory in " + timeSpentReading + " ms. row count = " + pages.getRowCount());
    if (Log.DEBUG) LOG.debug("initializing Record assembly with requested schema " + requestedSchema);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
    recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
    startedAssemblingCurrentBlockAt = System.currentTimeMillis();
    totalCountLoadedSoFar += pages.getRowCount();
    ++ currentBlock;
  }
}
 
开发者ID:apache,项目名称:tajo,代码行数:34,代码来源:InternalParquetRecordReader.java

示例5: checkRead

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
  if (current == totalCountLoadedSoFar) {
    if (current != 0) {
      totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
      if (Log.isLoggingFor("info")) {
        Log.info("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
        final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
        if (totalTime != 0) {
          final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
          final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
          Log.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
        }
      }
    }

    Log.info("at row " + current + ". reading next block");
    long t0 = System.currentTimeMillis();
    PageReadStore pages = reader.readNextRowGroup();
    if (pages == null) {
      throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
    }
    long timeSpentReading = System.currentTimeMillis() - t0;
    totalTimeSpentReadingBytes += timeSpentReading;
    if (Log.isLoggingFor("info")) Log.info("block read in memory in " + timeSpentReading + " ms. row count = " + pages.getRowCount());
    if (Log.isLoggingFor("debug")) Log.debug("initializing Record assembly with requested schema " + requestedSchema);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
    recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
    startedAssemblingCurrentBlockAt = System.currentTimeMillis();
    totalCountLoadedSoFar += pages.getRowCount();
    ++ currentBlock;
  }
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:33,代码来源:H2OInternalParquetReader.java

示例6: checkRead

import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
  if (current == totalCountLoadedSoFar) {
    if (current != 0) {
      totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
      if (LOG.isInfoEnabled()) {
          LOG.info("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
          final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
          if (totalTime != 0) {
              final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
              final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
              LOG.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
          }
      }
    }

    LOG.info("at row " + current + ". reading next block");
    long t0 = System.currentTimeMillis();
    PageReadStore pages = reader.readNextRowGroup();
    if (pages == null) {
      throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
    }
    long timeSpentReading = System.currentTimeMillis() - t0;
    totalTimeSpentReadingBytes += timeSpentReading;
    BenchmarkCounter.incrementTime(timeSpentReading);
    if (LOG.isInfoEnabled()) LOG.info("block read in memory in {} ms. row count = {}", timeSpentReading, pages.getRowCount());
    LOG.debug("initializing Record assembly with requested schema {}", requestedSchema);
    MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
    recordReader = columnIO.getRecordReader(pages, recordConverter,
        filterRecords ? filter : FilterCompat.NOOP);
    startedAssemblingCurrentBlockAt = System.currentTimeMillis();
    totalCountLoadedSoFar += pages.getRowCount();
    ++ currentBlock;
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:35,代码来源:InternalParquetRecordReader.java


注:本文中的org.apache.parquet.column.page.PageReadStore.getRowCount方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。