本文整理汇总了Java中org.apache.parquet.hadoop.api.WriteSupport类的典型用法代码示例。如果您正苦于以下问题:Java WriteSupport类的具体用法?Java WriteSupport怎么用?Java WriteSupport使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WriteSupport类属于org.apache.parquet.hadoop.api包,在下文中一共展示了WriteSupport类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: InternalParquetRecordWriter
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* @param parquetFileWriter the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param rowGroupSize the size of a block in the file (this will be approximate)
* @param compressor the codec used to compress
*/
public InternalParquetRecordWriter(
ParquetFileWriter parquetFileWriter,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
long rowGroupSize,
BytesCompressor compressor,
boolean validating,
ParquetProperties props) {
this.parquetFileWriter = parquetFileWriter;
this.writeSupport = checkNotNull(writeSupport, "writeSupport");
this.schema = schema;
this.extraMetaData = extraMetaData;
this.rowGroupSize = rowGroupSize;
this.rowGroupSizeThreshold = rowGroupSize;
this.nextRowGroupSize = rowGroupSizeThreshold;
this.compressor = compressor;
this.validating = validating;
this.props = props;
initStore();
}
示例2: ParquetRecordWriter
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
*
* @param w the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param blockSize the size of a block in the file (this will be approximate)
* @param compressor the compressor used to compress the pages
* @param dictionaryPageSize the threshold for dictionary size
* @param enableDictionary to enable the dictionary
* @param validating if schema validation should be turned on
*/
@Deprecated
public ParquetRecordWriter(
ParquetFileWriter w,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
int blockSize, int pageSize,
BytesCompressor compressor,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion) {
ParquetProperties props = ParquetProperties.builder()
.withPageSize(pageSize)
.withDictionaryPageSize(dictionaryPageSize)
.withDictionaryEncoding(enableDictionary)
.withWriterVersion(writerVersion)
.build();
internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
extraMetaData, blockSize, compressor, validating, props);
this.memoryManager = null;
this.codecFactory = null;
}
示例3: ParquetWriter
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* Create a new ParquetWriter.
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @param dictionaryPageSize the page size threshold for the dictionary pages
* @param enableDictionary to turn dictionary encoding on
* @param validating to turn on validation using the schema
* @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
* @param conf Hadoop configuration to use while accessing the filesystem
* @throws IOException
*/
@Deprecated
public ParquetWriter(
Path file,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion,
Configuration conf) throws IOException {
this(file, ParquetFileWriter.Mode.CREATE, writeSupport,
compressionCodecName, blockSize, pageSize, dictionaryPageSize,
enableDictionary, validating, writerVersion, conf);
}
示例4: init
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(final Configuration configuration) {
final Map<String, String> extraMeta = new HashMap<>();
if (null != sparkSchema) {
extraMeta.put(ParquetReadSupport.SPARK_METADATA_KEY(), sparkSchema.json());
}
return new WriteContext(schema, extraMeta);
}
示例5: ParquetElementWriter
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Deprecated
ParquetElementWriter(final Path file, final WriteSupport<Element> writeSupport,
final CompressionCodecName compressionCodecName,
final int blockSize, final int pageSize, final boolean enableDictionary,
final boolean enableValidation,
final ParquetProperties.WriterVersion writerVersion,
final Configuration conf)
throws IOException {
super(file, writeSupport, compressionCodecName, blockSize, pageSize,
pageSize, enableDictionary, enableValidation, writerVersion, conf);
}
示例6: init
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
// if present, prefer the schema passed to the constructor
if (schema == null) {
schema = getSchema(configuration);
}
return new WriteContext(schema, this.extraMetaData);
}
示例7: getWriteSupportClass
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public static Class<?> getWriteSupportClass(Configuration configuration) {
final String className = configuration.get(WRITE_SUPPORT_CLASS);
if (className == null) {
return null;
}
final Class<?> writeSupportClass = ConfigurationUtil.getClassFromConfig(configuration, WRITE_SUPPORT_CLASS, WriteSupport.class);
return writeSupportClass;
}
示例8: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* @param conf
* @return an appropriate WriteSupport for the object model.
*/
protected WriteSupport<Group> getWriteSupport(Configuration conf)
{
return new GroupWriteSupport(type, extraMetaData);
}
示例9: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
protected WriteSupport<Way> getWriteSupport(Configuration conf) {
return new WayWriteSupport(excludeMetadata);
}
示例10: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
protected WriteSupport<Element> getWriteSupport(final Configuration conf) {
return new ElementWriteSupport(type, isEntity, converter, sparkSchema);
}
示例11: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
protected WriteSupport<PageReader> getWriteSupport(Configuration conf)
{
return new EmbulkWriteSupport(schema, timestampFormatters);
}
示例12: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
protected WriteSupport<T> getWriteSupport(Configuration conf) {
return new AvroWriteSupport((new AvroSchemaConverterLogicalTypesPre19(conf)).convert(this.schema), this.schema, this.model);
}
示例13: getWriteSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
protected WriteSupport<Group> getWriteSupport(Configuration conf) {
return new GroupWriteSupport(type, extraMetaData);
}
示例14: getRecordWriter
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, CompressionCodecName codec)
throws IOException, InterruptedException {
final WriteSupport<T> writeSupport = getWriteSupport(conf);
ParquetProperties props = ParquetProperties.builder()
.withPageSize(getPageSize(conf))
.withDictionaryPageSize(getDictionaryPageSize(conf))
.withDictionaryEncoding(getEnableDictionary(conf))
.withWriterVersion(getWriterVersion(conf))
.estimateRowCountForPageSizeCheck(getEstimatePageSizeCheck(conf))
.withMinRowCountForPageSizeCheck(getMinRowCountForPageSizeCheck(conf))
.withMaxRowCountForPageSizeCheck(getMaxRowCountForPageSizeCheck(conf))
.build();
long blockSize = getLongBlockSize(conf);
int maxPaddingSize = getMaxPaddingSize(conf);
boolean validating = getValidation(conf);
if (LOG.isInfoEnabled()) {
LOG.info("Parquet block size to {}", blockSize);
LOG.info("Parquet page size to {}", props.getPageSizeThreshold());
LOG.info("Parquet dictionary page size to {}", props.getDictionaryPageSizeThreshold());
LOG.info("Dictionary is {}", (props.isEnableDictionary() ? "on" : "off"));
LOG.info("Validation is {}", (validating ? "on" : "off"));
LOG.info("Writer version is: {}", props.getWriterVersion());
LOG.info("Maximum row group padding size is {} bytes", maxPaddingSize);
LOG.info("Page size checking is: {}", (props.estimateNextSizeCheck() ? "estimated" : "constant"));
LOG.info("Min row count for page size check is: {}", props.getMinRowCountForPageSizeCheck());
LOG.info("Max row count for page size check is: {}", props.getMaxRowCountForPageSizeCheck());
}
WriteContext init = writeSupport.init(conf);
ParquetFileWriter w = new ParquetFileWriter(HadoopOutputFile.fromPath(file, conf),
init.getSchema(), Mode.CREATE, blockSize, maxPaddingSize);
w.start();
float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO,
MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
long minAllocation = conf.getLong(ParquetOutputFormat.MIN_MEMORY_ALLOCATION,
MemoryManager.DEFAULT_MIN_MEMORY_ALLOCATION);
synchronized (ParquetOutputFormat.class) {
if (memoryManager == null) {
memoryManager = new MemoryManager(maxLoad, minAllocation);
}
}
if (memoryManager.getMemoryPoolRatio() != maxLoad) {
LOG.warn("The configuration " + MEMORY_POOL_RATIO + " has been set. It should not " +
"be reset by the new value: " + maxLoad);
}
return new ParquetRecordWriter<T>(
w,
writeSupport,
init.getSchema(),
init.getExtraMetaData(),
blockSize,
codec,
validating,
props,
memoryManager,
conf);
}
示例15: writeSupport
import org.apache.parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
private static <T> WriteSupport<T> writeSupport(Schema avroSchema,
GenericData model) {
return new AvroWriteSupport<T>(
new AvroSchemaConverter().convert(avroSchema), avroSchema, model);
}