本文整理汇总了Java中parquet.hadoop.api.WriteSupport类的典型用法代码示例。如果您正苦于以下问题:Java WriteSupport类的具体用法?Java WriteSupport怎么用?Java WriteSupport使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
WriteSupport类属于parquet.hadoop.api包,在下文中一共展示了WriteSupport类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: InternalParquetRecordWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* @param parquetFileWriter the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param rowGroupSize the size of a block in the file (this will be approximate)
* @param compressor the codec used to compress
*/
public InternalParquetRecordWriter(
ParquetFileWriter parquetFileWriter,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
long rowGroupSize,
int pageSize,
BytesCompressor compressor,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion) {
this.parquetFileWriter = parquetFileWriter;
this.writeSupport = checkNotNull(writeSupport, "writeSupport");
this.schema = schema;
this.extraMetaData = extraMetaData;
this.rowGroupSize = rowGroupSize;
this.rowGroupSizeThreshold = rowGroupSize;
this.pageSize = pageSize;
this.compressor = compressor;
this.validating = validating;
this.parquetProperties = new ParquetProperties(dictionaryPageSize, writerVersion, enableDictionary);
initStore();
}
示例2: ParquetRecordWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* @param w the file to write to
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param blockSize the size of a block in the file (this will be approximate)
* @param compressor the compressor used to compress the pages
* @param dictionaryPageSize the threshold for dictionary size
* @param enableDictionary to enable the dictionary
* @param validating if schema validation should be turned on
*/
@Deprecated
public ParquetRecordWriter(
ParquetFileWriter w,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
int blockSize, int pageSize,
BytesCompressor compressor,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion) {
internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
extraMetaData, blockSize, pageSize, compressor, dictionaryPageSize, enableDictionary,
validating, writerVersion);
}
示例3: ParquetWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* Create a new ParquetWriter.
*
* @param file the file to create
* @param writeSupport the implementation to write a record to a RecordConsumer
* @param compressionCodecName the compression codec to use
* @param blockSize the block size threshold
* @param pageSize the page size threshold
* @param dictionaryPageSize the page size threshold for the dictionary pages
* @param enableDictionary to turn dictionary encoding on
* @param validating to turn on validation using the schema
* @param writerVersion version of parquetWriter from {@link ParquetProperties.WriterVersion}
* @param conf Hadoop configuration to use while accessing the filesystem
* @throws IOException
*/
public ParquetWriter(
Path file,
WriteSupport<T> writeSupport,
CompressionCodecName compressionCodecName,
int blockSize,
int pageSize,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion,
Configuration conf) throws IOException {
this(file, ParquetFileWriter.Mode.CREATE, writeSupport,
compressionCodecName, blockSize, pageSize, dictionaryPageSize,
enableDictionary, validating, writerVersion, conf);
}
示例4: initParquetWriteSupportWhenSchemaIsNotNull
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Test
public void initParquetWriteSupportWhenSchemaIsNotNull() {
int pentahoValueMetaTypeFirstRow = 2;
boolean allowNullFirstRow = false;
int pentahoValueMetaTypeSecondRow = 5;
boolean allowNullSecondRow = false;
String schemaFromString = ParquetUtils
.createSchema( pentahoValueMetaTypeFirstRow, allowNullFirstRow, pentahoValueMetaTypeSecondRow,
allowNullSecondRow ).marshall();
SchemaDescription schema = SchemaDescription.unmarshall( schemaFromString );
PentahoParquetWriteSupport writeSupport = new PentahoParquetWriteSupport( schema );
Configuration conf = new Configuration();
conf.set( "fs.defaultFS", "file:///" );
WriteSupport.WriteContext writeContext = writeSupport.init( conf );
Assert.assertNotNull( writeContext );
}
示例5: getWriteSupportClass
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public static Class<?> getWriteSupportClass(Configuration configuration) {
final String className = configuration.get(WRITE_SUPPORT_CLASS);
if (className == null) {
return null;
}
final Class<?> writeSupportClass = ConfigurationUtil.getClassFromConfig(configuration, WRITE_SUPPORT_CLASS, WriteSupport.class);
return writeSupportClass;
}
示例6: ParquetMultiRecordWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* @param workPath the path to the output directory (temporary)
* @param extension the codec extension + .parquet
* @param taskId the zero-padded task ID
* @param writeSupport the class to convert incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to write in the footer of the file
* @param blockSize the size of a block in the file (this will be approximate)
* @param compressor the compressor used to compress the pages
* @param dictionaryPageSize the threshold for dictionary size
* @param enableDictionary to enable the dictionary
* @param validating if schema validation should be turned on
* @param maxNumberOfWriters max number of open file handles
*/
public ParquetMultiRecordWriter(
Path workPath,
String extension,
String taskId,
Configuration conf,
WriteSupport<T> writeSupport,
MessageType schema,
Map<String, String> extraMetaData,
int blockSize, int pageSize,
BytesCompressor compressor,
int dictionaryPageSize,
boolean enableDictionary,
boolean validating,
WriterVersion writerVersion,
int maxNumberOfWriters) {
this.workPath = workPath;
this.extension = extension;
this.taskId = taskId;
this.conf = conf;
this.writeSupport = writeSupport;
this.schema = schema;
this.extraMetaData = extraMetaData;
this.blockSize = blockSize;
this.pageSize = pageSize;
this.compressor = compressor = compressor;
this.dictionaryPageSize = dictionaryPageSize;
this.enableDictionary = enableDictionary;
this.validating = validating;
this.writerVersion = writerVersion;
this.maxNumberOfWriters = maxNumberOfWriters;
}
示例7: getRecordWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public RecordWriter<K, T> getRecordWriter(Configuration conf, Path workPath, String extension, String taskId, CompressionCodecName codec)
throws IOException, InterruptedException {
final WriteSupport<T> writeSupport = getWriteSupport(conf);
CodecFactory codecFactory = new CodecFactory(conf);
int blockSize = getBlockSize(conf);
if (INFO) LOG.info("Parquet block size to " + blockSize);
int pageSize = getPageSize(conf);
if (INFO) LOG.info("Parquet page size to " + pageSize);
int dictionaryPageSize = getDictionaryPageSize(conf);
if (INFO) LOG.info("Parquet dictionary page size to " + dictionaryPageSize);
boolean enableDictionary = getEnableDictionary(conf);
if (INFO) LOG.info("Dictionary is " + (enableDictionary ? "on" : "off"));
boolean validating = getValidation(conf);
if (INFO) LOG.info("Validation is " + (validating ? "on" : "off"));
WriterVersion writerVersion = getWriterVersion(conf);
if (INFO) LOG.info("Writer version is: " + writerVersion);
WriteContext init = writeSupport.init(conf);
return new ParquetMultiRecordWriter<K, T>(
workPath,
extension,
taskId,
conf,
writeSupport,
init.getSchema(),
init.getExtraMetaData(),
blockSize, pageSize,
codecFactory.getCompressor(codec, pageSize),
dictionaryPageSize,
enableDictionary,
validating,
writerVersion,
getMaxNumberOfWriters());
}
示例8: recordWriterCreateFileWithData
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithData() throws Exception {
WriteSupport support =
new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );
ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );
ParquetRecordWriter<RowMetaAndData> recordWriter =
(ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );
PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );
RowMetaAndData
row = new RowMetaAndData();
RowMeta rowMeta = new RowMeta();
rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
row.setRowMeta( rowMeta );
row.setData( new Object[] { "Alex", "87" } );
writer.write( row );
recordWriter.close( task );
Files.walk( Paths.get( tempFile.toString() ) )
.filter( Files::isRegularFile )
.forEach( ( f ) -> {
String file = f.toString();
if ( file.endsWith( "parquet" ) ) {
IPentahoInputFormat.IPentahoRecordReader recordReader =
readCreatedParquetFile( Paths.get( file ).toUri().toString() );
recordReader.forEach(
rowMetaAndData -> Assert.assertTrue( rowMetaAndData.size() == 2 ) );
}
} );
}
示例9: recordWriterCreateFileWithoutData
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Test
public void recordWriterCreateFileWithoutData() throws Exception {
WriteSupport support =
new PentahoParquetWriteSupport( ParquetUtils.createSchema( ValueMetaInterface.TYPE_INTEGER ) );
ParquetOutputFormat nativeParquetOutputFormat = new ParquetOutputFormat<>( support );
ParquetRecordWriter<RowMetaAndData> recordWriter =
(ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat.getRecordWriter( task );
PentahoParquetRecordWriter writer = new PentahoParquetRecordWriter( recordWriter, task );
RowMetaAndData
row = new RowMetaAndData();
RowMeta rowMeta = new RowMeta();
rowMeta.addValueMeta( new ValueMetaString( "Name" ) );
rowMeta.addValueMeta( new ValueMetaString( "Age" ) );
row.setRowMeta( rowMeta );
row.setData( new Object[] { null, null } );
writer.write( row );
recordWriter.close( task );
Files.walk( Paths.get( tempFile.toString() ) )
.filter( Files::isRegularFile )
.forEach( ( f ) -> {
String file = f.toString();
if ( file.endsWith( "parquet" ) ) {
try {
Assert.assertTrue( Files.size( Paths.get( file ) ) > 0 );
} catch ( IOException e ) {
e.printStackTrace();
}
}
} );
}
示例10: init
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
@Override
public parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
schema = getSchema(configuration);
return new WriteContext(schema, this.metaData);
}
示例11: getRecordWriter
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public RecordWriter<Void, T> getRecordWriter(Configuration conf, Path file, CompressionCodecName codec)
throws IOException, InterruptedException {
final WriteSupport<T> writeSupport = getWriteSupport(conf);
CodecFactory codecFactory = new CodecFactory(conf);
long blockSize = getLongBlockSize(conf);
if (INFO) LOG.info("Parquet block size to " + blockSize);
int pageSize = getPageSize(conf);
if (INFO) LOG.info("Parquet page size to " + pageSize);
int dictionaryPageSize = getDictionaryPageSize(conf);
if (INFO) LOG.info("Parquet dictionary page size to " + dictionaryPageSize);
boolean enableDictionary = getEnableDictionary(conf);
if (INFO) LOG.info("Dictionary is " + (enableDictionary ? "on" : "off"));
boolean validating = getValidation(conf);
if (INFO) LOG.info("Validation is " + (validating ? "on" : "off"));
WriterVersion writerVersion = getWriterVersion(conf);
if (INFO) LOG.info("Writer version is: " + writerVersion);
WriteContext init = writeSupport.init(conf);
ParquetFileWriter w = new ParquetFileWriter(conf, init.getSchema(), file);
w.start();
float maxLoad = conf.getFloat(ParquetOutputFormat.MEMORY_POOL_RATIO,
MemoryManager.DEFAULT_MEMORY_POOL_RATIO);
long minAllocation = conf.getLong(ParquetOutputFormat.MIN_MEMORY_ALLOCATION,
MemoryManager.DEFAULT_MIN_MEMORY_ALLOCATION);
if (memoryManager == null) {
memoryManager = new MemoryManager(maxLoad, minAllocation);
} else if (memoryManager.getMemoryPoolRatio() != maxLoad) {
LOG.warn("The configuration " + MEMORY_POOL_RATIO + " has been set. It should not " +
"be reset by the new value: " + maxLoad);
}
return new ParquetRecordWriter<T>(
w,
writeSupport,
init.getSchema(),
init.getExtraMetaData(),
blockSize, pageSize,
codecFactory.getCompressor(codec, pageSize),
dictionaryPageSize,
enableDictionary,
validating,
writerVersion,
memoryManager);
}
示例12: getCache
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public LoadingCache<K, ParquetRecordWriter> getCache() {
if (this.cache == null) {
final String extension = this.getExtension();
final String taskId = this.getTaskId();
final Path workPath = this.getWorkPath();
final Configuration conf = this.getConf();
final MessageType schema = this.getSchema();
final WriteSupport<T> writeSupport = this.getWriteSupport();
final Map<String, String> extraMetaData = this.getExtraMetaData();
final int blockSize = this.getBlockSize();
final int pageSize = this.getPageSize();
final BytesCompressor compressor = this.getCompressor();
final int dictionaryPageSize = this.getDictionaryPageSize();
final boolean enableDictionary = this.isEnableDictionary();
final boolean validating = this.isValidating();
final WriterVersion writerVersion = this.getWriterVersion();
CacheLoader<K, ParquetRecordWriter> loader =
new CacheLoader<K, ParquetRecordWriter> () {
public ParquetRecordWriter load(K key) throws Exception {
final String fieldValue = key.toString();
final long timestamp = System.currentTimeMillis();
Path path = new Path(fieldValue + extension, fieldValue + '-'
+ taskId + '-' + timestamp + extension);
Path file = new Path(workPath, path);
ParquetFileWriter fw = new ParquetFileWriter(
conf,
schema,
file);
fw.start();
return new ParquetRecordWriter<T>(
fw,
writeSupport,
schema,
extraMetaData,
blockSize,
pageSize,
compressor,
dictionaryPageSize,
enableDictionary,
validating,
writerVersion);
}
};
RemovalListener<K, ParquetRecordWriter> removalListener =
new RemovalListener<K, ParquetRecordWriter>() {
public void onRemoval(RemovalNotification<K, ParquetRecordWriter> removal) {
ParquetRecordWriter writerToRemove = removal.getValue();
try {
writerToRemove.close(null);
} catch (IOException ioe) {
throw new RuntimeException("Exception on closing cached writer", ioe);
} catch (InterruptedException ite) {
throw new RuntimeException(ite);
}
}
};
this.cache = CacheBuilder.newBuilder()
.maximumSize(getMaxNumberOfWriters())
.removalListener(removalListener)
.build(loader);
}
return this.cache;
}
示例13: getWriteSupport
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
public WriteSupport<T> getWriteSupport() {
return writeSupport;
}
示例14: ParquetOutputFormat
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* constructor used when this OutputFormat in wrapped in another one (In Pig for example)
*
* @param writeSupportClass the class used to convert the incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to be stored in the footer of the file
*/
public <S extends WriteSupport<T>> ParquetOutputFormat(S writeSupport) {
this.writeSupport = writeSupport;
}
示例15: ParquetMultiOutputFormat
import parquet.hadoop.api.WriteSupport; //导入依赖的package包/类
/**
* constructor used when this OutputFormat in wrapped in another one (In Pig for example)
*
* @param writeSupportClass the class used to convert the incoming records
* @param schema the schema of the records
* @param extraMetaData extra meta data to be stored in the footer of the file
* @param maxNumberOfWriters max number of open file handles
*/
public <S extends WriteSupport<T>> ParquetMultiOutputFormat(S writeSupport, int maxNumberOfWriters) {
this.writeSupport = writeSupport;
this.maxNumberOfWriters = maxNumberOfWriters;
}