本文整理汇总了Java中org.apache.parquet.column.page.PageReadStore.getRowCount方法的典型用法代码示例。如果您正苦于以下问题:Java PageReadStore.getRowCount方法的具体用法?Java PageReadStore.getRowCount怎么用?Java PageReadStore.getRowCount使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.parquet.column.page.PageReadStore
的用法示例。
在下文中一共展示了PageReadStore.getRowCount方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: validatePages
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
public static void validatePages(Path file, List<?> expectedValues) throws IOException {
List<PageReadStore> blockReaders = readBlocksFromFile(file);
MessageType fileSchema = readSchemaFromFile(file);
int rowGroupID = 0;
int rowsRead = 0;
for (PageReadStore pageReadStore : blockReaders) {
for (ColumnDescriptor columnsDesc : fileSchema.getColumns()) {
List<DataPage> pageGroup = getPageGroupForColumn(pageReadStore, columnsDesc);
DictionaryPage dictPage = reusableCopy(getDictionaryPageForColumn(pageReadStore, columnsDesc));
List<?> expectedRowGroupValues = expectedValues.subList(rowsRead, (int)(rowsRead + pageReadStore.getRowCount()));
validateFirstToLast(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
validateLastToFirst(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
}
rowsRead += pageReadStore.getRowCount();
rowGroupID++;
}
}
示例2: checkRead
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException
{
if (current == totalCountLoadedSoFar) {
PageReadStore pages = reader.readNextRowGroup();
if (pages == null) {
throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
}
MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
totalCountLoadedSoFar += pages.getRowCount();
++ currentBlock;
}
}
示例3: load
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
public ITable load() {
try {
Configuration conf = new Configuration();
System.setProperty("hadoop.home.dir", "/");
conf.set("hadoop.security.authentication", "simple");
conf.set("hadoop.security.authorization", "false");
Path path = new Path(this.filename);
ParquetMetadata md = ParquetFileReader.readFooter(conf, path,
ParquetMetadataConverter.NO_FILTER);
MessageType schema = md.getFileMetaData().getSchema();
ParquetFileReader r = new ParquetFileReader(conf, path, md);
IAppendableColumn[] cols = this.createColumns(md);
MessageColumnIO columnIO = new ColumnIOFactory().getColumnIO(schema);
PageReadStore pages;
while (null != (pages = r.readNextRowGroup())) {
final long rows = pages.getRowCount();
RecordReader<Group> recordReader = columnIO.getRecordReader(
pages, new GroupRecordConverter(schema));
for (int i = 0; i < rows; i++) {
Group g = recordReader.read();
appendGroup(cols, g, md.getFileMetaData().getSchema().getColumns());
}
}
for (IAppendableColumn c: cols)
c.seal();
return new Table(cols);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
示例4: checkRead
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
if (current == totalCountLoadedSoFar) {
if (current != 0) {
totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
if (Log.DEBUG) {
LOG.debug("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
if (totalTime != 0) {
final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
LOG.debug("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
}
}
}
if (Log.DEBUG) LOG.debug("at row " + current + ". reading next block");
long t0 = System.currentTimeMillis();
PageReadStore pages = reader.readNextRowGroup();
if (pages == null) {
throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
}
long timeSpentReading = System.currentTimeMillis() - t0;
totalTimeSpentReadingBytes += timeSpentReading;
BenchmarkCounter.incrementTime(timeSpentReading);
if (Log.INFO) LOG.info("block read in memory in " + timeSpentReading + " ms. row count = " + pages.getRowCount());
if (Log.DEBUG) LOG.debug("initializing Record assembly with requested schema " + requestedSchema);
MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
startedAssemblingCurrentBlockAt = System.currentTimeMillis();
totalCountLoadedSoFar += pages.getRowCount();
++ currentBlock;
}
}
示例5: checkRead
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
if (current == totalCountLoadedSoFar) {
if (current != 0) {
totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
if (Log.isLoggingFor("info")) {
Log.info("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
if (totalTime != 0) {
final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
Log.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
}
}
}
Log.info("at row " + current + ". reading next block");
long t0 = System.currentTimeMillis();
PageReadStore pages = reader.readNextRowGroup();
if (pages == null) {
throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
}
long timeSpentReading = System.currentTimeMillis() - t0;
totalTimeSpentReadingBytes += timeSpentReading;
if (Log.isLoggingFor("info")) Log.info("block read in memory in " + timeSpentReading + " ms. row count = " + pages.getRowCount());
if (Log.isLoggingFor("debug")) Log.debug("initializing Record assembly with requested schema " + requestedSchema);
MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
recordReader = columnIO.getRecordReader(pages, recordConverter, filter);
startedAssemblingCurrentBlockAt = System.currentTimeMillis();
totalCountLoadedSoFar += pages.getRowCount();
++ currentBlock;
}
}
示例6: checkRead
import org.apache.parquet.column.page.PageReadStore; //导入方法依赖的package包/类
private void checkRead() throws IOException {
if (current == totalCountLoadedSoFar) {
if (current != 0) {
totalTimeSpentProcessingRecords += (System.currentTimeMillis() - startedAssemblingCurrentBlockAt);
if (LOG.isInfoEnabled()) {
LOG.info("Assembled and processed " + totalCountLoadedSoFar + " records from " + columnCount + " columns in " + totalTimeSpentProcessingRecords + " ms: "+((float)totalCountLoadedSoFar / totalTimeSpentProcessingRecords) + " rec/ms, " + ((float)totalCountLoadedSoFar * columnCount / totalTimeSpentProcessingRecords) + " cell/ms");
final long totalTime = totalTimeSpentProcessingRecords + totalTimeSpentReadingBytes;
if (totalTime != 0) {
final long percentReading = 100 * totalTimeSpentReadingBytes / totalTime;
final long percentProcessing = 100 * totalTimeSpentProcessingRecords / totalTime;
LOG.info("time spent so far " + percentReading + "% reading ("+totalTimeSpentReadingBytes+" ms) and " + percentProcessing + "% processing ("+totalTimeSpentProcessingRecords+" ms)");
}
}
}
LOG.info("at row " + current + ". reading next block");
long t0 = System.currentTimeMillis();
PageReadStore pages = reader.readNextRowGroup();
if (pages == null) {
throw new IOException("expecting more rows but reached last block. Read " + current + " out of " + total);
}
long timeSpentReading = System.currentTimeMillis() - t0;
totalTimeSpentReadingBytes += timeSpentReading;
BenchmarkCounter.incrementTime(timeSpentReading);
if (LOG.isInfoEnabled()) LOG.info("block read in memory in {} ms. row count = {}", timeSpentReading, pages.getRowCount());
LOG.debug("initializing Record assembly with requested schema {}", requestedSchema);
MessageColumnIO columnIO = columnIOFactory.getColumnIO(requestedSchema, fileSchema, strictTypeChecking);
recordReader = columnIO.getRecordReader(pages, recordConverter,
filterRecords ? filter : FilterCompat.NOOP);
startedAssemblingCurrentBlockAt = System.currentTimeMillis();
totalCountLoadedSoFar += pages.getRowCount();
++ currentBlock;
}
}