当前位置: 首页>>代码示例>>Java>>正文


Java ParquetProperties类代码示例

本文整理汇总了Java中org.apache.parquet.column.ParquetProperties的典型用法代码示例。如果您正苦于以下问题:Java ParquetProperties类的具体用法?Java ParquetProperties怎么用?Java ParquetProperties使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


ParquetProperties类属于org.apache.parquet.column包,在下文中一共展示了ParquetProperties类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: generateEmptyWithSchema

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
      "message test { "
          + "required int32 int32_field; "
          + "required int64 int64_field; "
          + "required float float_field; "
          + "required double double_field; "
          + "required int64 timestamp_field (TIMESTAMP_MILLIS);"
          + "} ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
      UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  writer.close();

  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:21,代码来源:ParseTestParquet.java

示例2: generateSparseParquetFile

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      if (i % 10 == 0) { g = g.append("int32_field", i); }
      if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
      if (i % 10 == 0) { g = g.append("int32_field2", i); }
      writer.write(g.append("row", i));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:24,代码来源:ParseTestParquet.java

示例3: generateParquetFileWithNullCharacters

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
  File f = new File(parentDir, filename);

  Configuration conf = new Configuration();
  MessageType schema = parseMessageType(
          "message test { optional binary cat_field (UTF8); } ");
  GroupWriteSupport.setSchema(schema, conf);
  SimpleGroupFactory fact = new SimpleGroupFactory(schema);
  ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
          UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
  try {
    for (int i = 0; i < nrows; i++) {
      Group g = fact.newGroup();
      String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
      writer.write(g.append("cat_field", value));
    }
  } finally {
    writer.close();
  }
  return f;
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:22,代码来源:ParseTestParquet.java

示例4: dictionaryWriter

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static DictionaryValuesWriter dictionaryWriter(ColumnDescriptor path, ParquetProperties properties, Encoding dictPageEncoding, Encoding dataPageEncoding) {
  switch (path.getType()) {
    case BOOLEAN:
      throw new IllegalArgumentException("no dictionary encoding for BOOLEAN");
    case BINARY:
      return new DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case INT32:
      return new DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case INT64:
      return new DictionaryValuesWriter.PlainLongDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case INT96:
      return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), 12, dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case DOUBLE:
      return new DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case FLOAT:
      return new DictionaryValuesWriter.PlainFloatDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    case FIXED_LEN_BYTE_ARRAY:
      return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), path.getTypeLength(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
    default:
      throw new IllegalArgumentException("Unknown type " + path.getType());
  }
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:23,代码来源:DefaultValuesWriterFactory.java

示例5: ColumnWriteStoreV2

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public ColumnWriteStoreV2(
    MessageType schema,
    PageWriteStore pageWriteStore,
    ParquetProperties props) {
  this.props = props;
  this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
  Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
  for (ColumnDescriptor path : schema.getColumns()) {
    PageWriter pageWriter = pageWriteStore.getPageWriter(path);
    mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
  }
  this.columns = unmodifiableMap(mcolumns);
  this.writers = this.columns.values();

  this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:17,代码来源:ColumnWriteStoreV2.java

示例6: write

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private static void write(MemPageStore memPageStore) {
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore,
      ParquetProperties.builder()
          .withPageSize(50*1024*1024)
          .withDictionaryEncoding(false)
          .build());
  MessageColumnIO columnIO = newColumnFactory(schema);

  GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema);
  groupWriter.write(r1);
  groupWriter.write(r2);

  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 10000);
  write(memPageStore, groupWriter, 100000);
  write(memPageStore, groupWriter, 1000000);
  columns.flush();
  System.out.println();
  System.out.println(columns.getBufferedSize() + " bytes used total");
  System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:26,代码来源:PerfTest.java

示例7: writeTestRecords

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private MemPageStore writeTestRecords(MessageColumnIO columnIO, int number) {
  MemPageStore memPageStore = new MemPageStore(number * 2);
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore,
      ParquetProperties.builder()
          .withPageSize(800)
          .withDictionaryEncoding(false)
          .build());

  RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
  GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
  for ( int i = 0; i < number; i++ ) {
    groupWriter.write(r1);
    groupWriter.write(r2);
  }
  recordWriter.flush();
  columns.flush();
  return memPageStore;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:20,代码来源:TestFiltered.java

示例8: InternalParquetRecordWriter

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
 * @param parquetFileWriter the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param rowGroupSize the size of a block in the file (this will be approximate)
 * @param compressor the codec used to compress
 */
public InternalParquetRecordWriter(
    ParquetFileWriter parquetFileWriter,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long rowGroupSize,
    BytesCompressor compressor,
    boolean validating,
    ParquetProperties props) {
  this.parquetFileWriter = parquetFileWriter;
  this.writeSupport = checkNotNull(writeSupport, "writeSupport");
  this.schema = schema;
  this.extraMetaData = extraMetaData;
  this.rowGroupSize = rowGroupSize;
  this.rowGroupSizeThreshold = rowGroupSize;
  this.nextRowGroupSize = rowGroupSizeThreshold;
  this.compressor = compressor;
  this.validating = validating;
  this.props = props;
  initStore();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:30,代码来源:InternalParquetRecordWriter.java

示例9: ParquetRecordWriter

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    int blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = null;
  this.codecFactory = null;
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:36,代码来源:ParquetRecordWriter.java

示例10: ParquetWriter

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
 * Create a new ParquetWriter.
 *
 * @param file the file to create
 * @param mode file creation mode
 * @param writeSupport the implementation to write a record to a RecordConsumer
 * @param compressionCodecName the compression codec to use
 * @param blockSize the block size threshold
 * @param pageSize the page size threshold
 * @param dictionaryPageSize the page size threshold for the dictionary pages
 * @param enableDictionary to turn dictionary encoding on
 * @param validating to turn on validation using the schema
 * @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
 * @param conf Hadoop configuration to use while accessing the filesystem
 * @throws IOException
 */
@Deprecated
public ParquetWriter(
    Path file,
    ParquetFileWriter.Mode mode,
    WriteSupport<T> writeSupport,
    CompressionCodecName compressionCodecName,
    int blockSize,
    int pageSize,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    Configuration conf) throws IOException {
  this(HadoopOutputFile.fromPath(file, conf),
      mode, writeSupport, compressionCodecName, blockSize,
      validating, conf, MAX_PADDING_SIZE_DEFAULT,
      ParquetProperties.builder()
          .withPageSize(pageSize)
          .withDictionaryPageSize(dictionaryPageSize)
          .withDictionaryEncoding(enableDictionary)
          .withWriterVersion(writerVersion)
          .build());
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:40,代码来源:ParquetWriter.java

示例11: writeAndTest

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static void writeAndTest(WriteContext context) throws IOException {
  // Create the configuration, and then apply the schema to our configuration.
  Configuration configuration = new Configuration();
  GroupWriteSupport.setSchema(context.schema, configuration);
  GroupWriteSupport groupWriteSupport = new GroupWriteSupport();

  // Create the writer properties
  final int blockSize = context.blockSize;
  final int pageSize = context.pageSize;
  final int dictionaryPageSize = pageSize;
  final boolean enableDictionary = context.enableDictionary;
  final boolean enableValidation = context.enableValidation;
  ParquetProperties.WriterVersion writerVersion = context.version;
  CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;

  ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath,
      groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize,
      enableDictionary, enableValidation, writerVersion, configuration);

  context.write(writer);
  writer.close();

  context.test();

  context.path.delete();
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:27,代码来源:TestStatistics.java

示例12: main

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  String pigSchema = pigSchema(false, false);
  String pigSchemaProjected = pigSchema(true, false);
  String pigSchemaNoString = pigSchema(true, true);
  MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));

  MemPageStore memPageStore = new MemPageStore(0);
  ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
      memPageStore, ParquetProperties.builder()
          .withPageSize(50*1024*1024)
          .withDictionaryEncoding(false)
          .build());
  write(memPageStore, columns, schema, pigSchema);
  columns.flush();
  read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
  System.out.println(columns.getBufferedSize()+" bytes used total");
  System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
 
开发者ID:apache,项目名称:parquet-mr,代码行数:19,代码来源:TupleConsumerPerfTest.java

示例13: newColumnChunkPageWriteStore

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static ColumnChunkPageWriteStore newColumnChunkPageWriteStore(
    BytesCompressor compressor,
    MessageType schema,
    ParquetProperties parquetProperties
    ) {
  return new ColumnChunkPageWriteStore(compressor, schema, parquetProperties);
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:8,代码来源:ColumnChunkPageWriteStoreExposer.java

示例14: newSchema

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private void newSchema() throws IOException {
  // Reset it to half of current number and bound it within the limits
  recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCountForNextMemCheck / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);

  String json = new Schema(batchSchema).toJson();
  extraMetaData.put(DREMIO_ARROW_SCHEMA, json);
  List<Type> types = Lists.newArrayList();
  for (Field field : batchSchema) {
    if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
      continue;
    }
    Type childType = getType(field);
    if (childType != null) {
      types.add(childType);
    }
  }
  Preconditions.checkState(types.size() > 0, "No types for parquet schema");
  schema = new MessageType("root", types);

  int dictionarySize = (int)context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR);
  final ParquetProperties parquetProperties = new ParquetProperties(dictionarySize, writerVersion, enableDictionary,
    new ParquetDirectByteBufferAllocator(columnEncoderAllocator), pageSize, true, enableDictionaryForBinary);
  pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties);
  store = new ColumnWriteStoreV1(pageStore, pageSize, parquetProperties);
  MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
  consumer = columnIO.getRecordWriter(store);
  setUp(schema, consumer);
}
 
开发者ID:dremio,项目名称:dremio-oss,代码行数:29,代码来源:ParquetRecordWriter.java

示例15: run

import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public void run() {
    Configuration conf = new Configuration();
    int blockSize = 1 * 1024;
    int pageSize = 1 * 1024;
    int dictionaryPageSize = 512;
    boolean enableDictionary = false;
    boolean validating = false;
    Path basePath = new Path("file:///Users/Jelly/Developer/test");
    MessageType schema = MessageTypeParser.parseMessageType("message test {" +
            "required binary id; " +
            "required binary content; " +
            "required int64 int64_field; " +
            "}");
    GroupWriteSupport writeSupport = new GroupWriteSupport();
    writeSupport.setSchema(schema, conf);
    SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);

    try {
        ParquetWriter<Group> parquetWriter = new ParquetWriter(
                basePath,
                writeSupport,
                CompressionCodecName.UNCOMPRESSED,
                blockSize, pageSize, dictionaryPageSize,
                enableDictionary,
                validating,
                ParquetProperties.WriterVersion.PARQUET_2_0,
                conf);
        for (int i = 0; i < 50000; i++) {
            parquetWriter.write(groupFactory.newGroup()
                    .append("id", "10")
                    .append("content", "test" + i)
                    .append("int64_field", Long.valueOf(i)));
        }
        parquetWriter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
开发者ID:dbiir,项目名称:RealtimeAnalysis,代码行数:39,代码来源:ParquetWriterThread.java


注:本文中的org.apache.parquet.column.ParquetProperties类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。