本文整理汇总了Java中org.apache.parquet.column.ParquetProperties类的典型用法代码示例。如果您正苦于以下问题:Java ParquetProperties类的具体用法?Java ParquetProperties怎么用?Java ParquetProperties使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ParquetProperties类属于org.apache.parquet.column包,在下文中一共展示了ParquetProperties类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: generateEmptyWithSchema
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateEmptyWithSchema(File parentDir, String filename) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { "
+ "required int32 int32_field; "
+ "required int64 int64_field; "
+ "required float float_field; "
+ "required double double_field; "
+ "required int64 timestamp_field (TIMESTAMP_MILLIS);"
+ "} ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, false, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
writer.close();
return f;
}
示例2: generateSparseParquetFile
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateSparseParquetFile(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional int32 int32_field; optional binary string_field (UTF8); required int32 row; optional int32 int32_field2; } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
if (i % 10 == 0) { g = g.append("int32_field", i); }
if (i % 10 == 0) { g = g.append("string_field", "CAT_" + (i % 10)); }
if (i % 10 == 0) { g = g.append("int32_field2", i); }
writer.write(g.append("row", i));
}
} finally {
writer.close();
}
return f;
}
示例3: generateParquetFileWithNullCharacters
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static File generateParquetFileWithNullCharacters(File parentDir, String filename, int nrows) throws IOException {
File f = new File(parentDir, filename);
Configuration conf = new Configuration();
MessageType schema = parseMessageType(
"message test { optional binary cat_field (UTF8); } ");
GroupWriteSupport.setSchema(schema, conf);
SimpleGroupFactory fact = new SimpleGroupFactory(schema);
ParquetWriter<Group> writer = new ParquetWriter<Group>(new Path(f.getPath()), new GroupWriteSupport(),
UNCOMPRESSED, 1024, 1024, 512, true, false, ParquetProperties.WriterVersion.PARQUET_2_0, conf);
try {
for (int i = 0; i < nrows; i++) {
Group g = fact.newGroup();
String value = i == 66 ? "CAT_0_weird\0" : "CAT_" + (i % 10);
writer.write(g.append("cat_field", value));
}
} finally {
writer.close();
}
return f;
}
示例4: dictionaryWriter
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
static DictionaryValuesWriter dictionaryWriter(ColumnDescriptor path, ParquetProperties properties, Encoding dictPageEncoding, Encoding dataPageEncoding) {
switch (path.getType()) {
case BOOLEAN:
throw new IllegalArgumentException("no dictionary encoding for BOOLEAN");
case BINARY:
return new DictionaryValuesWriter.PlainBinaryDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT32:
return new DictionaryValuesWriter.PlainIntegerDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT64:
return new DictionaryValuesWriter.PlainLongDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case INT96:
return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), 12, dataPageEncoding, dictPageEncoding, properties.getAllocator());
case DOUBLE:
return new DictionaryValuesWriter.PlainDoubleDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case FLOAT:
return new DictionaryValuesWriter.PlainFloatDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
case FIXED_LEN_BYTE_ARRAY:
return new DictionaryValuesWriter.PlainFixedLenArrayDictionaryValuesWriter(properties.getDictionaryPageSizeThreshold(), path.getTypeLength(), dataPageEncoding, dictPageEncoding, properties.getAllocator());
default:
throw new IllegalArgumentException("Unknown type " + path.getType());
}
}
示例5: ColumnWriteStoreV2
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public ColumnWriteStoreV2(
MessageType schema,
PageWriteStore pageWriteStore,
ParquetProperties props) {
this.props = props;
this.thresholdTolerance = (long)(props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);
Map<ColumnDescriptor, ColumnWriterV2> mcolumns = new TreeMap<ColumnDescriptor, ColumnWriterV2>();
for (ColumnDescriptor path : schema.getColumns()) {
PageWriter pageWriter = pageWriteStore.getPageWriter(path);
mcolumns.put(path, new ColumnWriterV2(path, pageWriter, props));
}
this.columns = unmodifiableMap(mcolumns);
this.writers = this.columns.values();
this.rowCountForNextSizeCheck = props.getMinRowCountForPageSizeCheck();
}
示例6: write
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private static void write(MemPageStore memPageStore) {
ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
memPageStore,
ParquetProperties.builder()
.withPageSize(50*1024*1024)
.withDictionaryEncoding(false)
.build());
MessageColumnIO columnIO = newColumnFactory(schema);
GroupWriter groupWriter = new GroupWriter(columnIO.getRecordWriter(columns), schema);
groupWriter.write(r1);
groupWriter.write(r2);
write(memPageStore, groupWriter, 10000);
write(memPageStore, groupWriter, 10000);
write(memPageStore, groupWriter, 10000);
write(memPageStore, groupWriter, 10000);
write(memPageStore, groupWriter, 10000);
write(memPageStore, groupWriter, 100000);
write(memPageStore, groupWriter, 1000000);
columns.flush();
System.out.println();
System.out.println(columns.getBufferedSize() + " bytes used total");
System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
示例7: writeTestRecords
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private MemPageStore writeTestRecords(MessageColumnIO columnIO, int number) {
MemPageStore memPageStore = new MemPageStore(number * 2);
ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
memPageStore,
ParquetProperties.builder()
.withPageSize(800)
.withDictionaryEncoding(false)
.build());
RecordConsumer recordWriter = columnIO.getRecordWriter(columns);
GroupWriter groupWriter = new GroupWriter(recordWriter, schema);
for ( int i = 0; i < number; i++ ) {
groupWriter.write(r1);
groupWriter.write(r2);
}
recordWriter.flush();
columns.flush();
return memPageStore;
}
示例8: InternalParquetRecordWriter
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
* @param parquetFileWriter the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param rowGroupSize the size of a block in the file (this will be approximate)
* @param compressor the codec used to compress
*/
public InternalParquetRecordWriter(
ParquetFileWriter parquetFileWriter,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
long rowGroupSize,
BytesCompressor compressor,
boolean validating,
ParquetProperties props) {
this.parquetFileWriter = parquetFileWriter;
this.writeSupport = checkNotNull(writeSupport, "writeSupport");
this.schema = schema;
this.extraMetaData = extraMetaData;
this.rowGroupSize = rowGroupSize;
this.rowGroupSizeThreshold = rowGroupSize;
this.nextRowGroupSize = rowGroupSizeThreshold;
this.compressor = compressor;
this.validating = validating;
this.props = props;
initStore();
}
示例9: ParquetRecordWriter
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
*
* @param w the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param blockSize the size of a block in the file (this will be approximate)
* @param compressor the compressor used to compress the pages
* @param dictionaryPageSize the threshold for dictionary size
* @param enableDictionary to enable the dictionary
* @param validating if schema validation should be turned on
*/
@Deprecated
public ParquetRecordWriter(
ParquetFileWriter w,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
int blockSize, int pageSize,
BytesCompressor compressor,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion) {
ParquetProperties props = ParquetProperties.builder()
.withPageSize(pageSize)
.withDictionaryPageSize(dictionaryPageSize)
.withDictionaryEncoding(enableDictionary)
.withWriterVersion(writerVersion)
.build();
internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
extraMetaData, blockSize, compressor, validating, props);
this.memoryManager = null;
this.codecFactory = null;
}
示例10: ParquetWriter
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
/**
* Create a new ParquetWriter.
*
* @param file the file to create
* @param mode file creation mode
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @param dictionaryPageSize the page size threshold for the dictionary pages
* @param enableDictionary to turn dictionary encoding on
* @param validating to turn on validation using the schema
* @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
* @param conf Hadoop configuration to use while accessing the filesystem
* @throws IOException
*/
@Deprecated
public ParquetWriter(
Path file,
ParquetFileWriter.Mode mode,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion,
Configuration conf) throws IOException {
this(HadoopOutputFile.fromPath(file, conf),
mode, writeSupport, compressionCodecName, blockSize,
validating, conf, MAX_PADDING_SIZE_DEFAULT,
ParquetProperties.builder()
.withPageSize(pageSize)
.withDictionaryPageSize(dictionaryPageSize)
.withDictionaryEncoding(enableDictionary)
.withWriterVersion(writerVersion)
.build());
}
示例11: writeAndTest
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static void writeAndTest(WriteContext context) throws IOException {
// Create the configuration, and then apply the schema to our configuration.
Configuration configuration = new Configuration();
GroupWriteSupport.setSchema(context.schema, configuration);
GroupWriteSupport groupWriteSupport = new GroupWriteSupport();
// Create the writer properties
final int blockSize = context.blockSize;
final int pageSize = context.pageSize;
final int dictionaryPageSize = pageSize;
final boolean enableDictionary = context.enableDictionary;
final boolean enableValidation = context.enableValidation;
ParquetProperties.WriterVersion writerVersion = context.version;
CompressionCodecName codec = CompressionCodecName.UNCOMPRESSED;
ParquetWriter<Group> writer = new ParquetWriter<Group>(context.fsPath,
groupWriteSupport, codec, blockSize, pageSize, dictionaryPageSize,
enableDictionary, enableValidation, writerVersion, configuration);
context.write(writer);
writer.close();
context.test();
context.path.delete();
}
示例12: main
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
String pigSchema = pigSchema(false, false);
String pigSchemaProjected = pigSchema(true, false);
String pigSchemaNoString = pigSchema(true, true);
MessageType schema = new PigSchemaConverter().convert(Utils.getSchemaFromString(pigSchema));
MemPageStore memPageStore = new MemPageStore(0);
ColumnWriteStoreV1 columns = new ColumnWriteStoreV1(
memPageStore, ParquetProperties.builder()
.withPageSize(50*1024*1024)
.withDictionaryEncoding(false)
.build());
write(memPageStore, columns, schema, pigSchema);
columns.flush();
read(memPageStore, pigSchema, pigSchemaProjected, pigSchemaNoString);
System.out.println(columns.getBufferedSize()+" bytes used total");
System.out.println("max col size: "+columns.maxColMemSize()+" bytes");
}
示例13: newColumnChunkPageWriteStore
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public static ColumnChunkPageWriteStore newColumnChunkPageWriteStore(
BytesCompressor compressor,
MessageType schema,
ParquetProperties parquetProperties
) {
return new ColumnChunkPageWriteStore(compressor, schema, parquetProperties);
}
示例14: newSchema
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
private void newSchema() throws IOException {
// Reset it to half of current number and bound it within the limits
recordCountForNextMemCheck = min(max(MINIMUM_RECORD_COUNT_FOR_CHECK, recordCountForNextMemCheck / 2), MAXIMUM_RECORD_COUNT_FOR_CHECK);
String json = new Schema(batchSchema).toJson();
extraMetaData.put(DREMIO_ARROW_SCHEMA, json);
List<Type> types = Lists.newArrayList();
for (Field field : batchSchema) {
if (field.getName().equalsIgnoreCase(WriterPrel.PARTITION_COMPARATOR_FIELD)) {
continue;
}
Type childType = getType(field);
if (childType != null) {
types.add(childType);
}
}
Preconditions.checkState(types.size() > 0, "No types for parquet schema");
schema = new MessageType("root", types);
int dictionarySize = (int)context.getOptions().getOption(ExecConstants.PARQUET_DICT_PAGE_SIZE_VALIDATOR);
final ParquetProperties parquetProperties = new ParquetProperties(dictionarySize, writerVersion, enableDictionary,
new ParquetDirectByteBufferAllocator(columnEncoderAllocator), pageSize, true, enableDictionaryForBinary);
pageStore = ColumnChunkPageWriteStoreExposer.newColumnChunkPageWriteStore(codecFactory.getCompressor(codec), schema, parquetProperties);
store = new ColumnWriteStoreV1(pageStore, pageSize, parquetProperties);
MessageColumnIO columnIO = new ColumnIOFactory(false).getColumnIO(this.schema);
consumer = columnIO.getRecordWriter(store);
setUp(schema, consumer);
}
示例15: run
import org.apache.parquet.column.ParquetProperties; //导入依赖的package包/类
public void run() {
Configuration conf = new Configuration();
int blockSize = 1 * 1024;
int pageSize = 1 * 1024;
int dictionaryPageSize = 512;
boolean enableDictionary = false;
boolean validating = false;
Path basePath = new Path("file:///Users/Jelly/Developer/test");
MessageType schema = MessageTypeParser.parseMessageType("message test {" +
"required binary id; " +
"required binary content; " +
"required int64 int64_field; " +
"}");
GroupWriteSupport writeSupport = new GroupWriteSupport();
writeSupport.setSchema(schema, conf);
SimpleGroupFactory groupFactory = new SimpleGroupFactory(schema);
try {
ParquetWriter<Group> parquetWriter = new ParquetWriter(
basePath,
writeSupport,
CompressionCodecName.UNCOMPRESSED,
blockSize, pageSize, dictionaryPageSize,
enableDictionary,
validating,
ParquetProperties.WriterVersion.PARQUET_2_0,
conf);
for (int i = 0; i < 50000; i++) {
parquetWriter.write(groupFactory.newGroup()
.append("id", "10")
.append("content", "test" + i)
.append("int64_field", Long.valueOf(i)));
}
parquetWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}