本文整理汇总了Scala中org.apache.parquet.hadoop.ParquetWriter类的典型用法代码示例。如果您正苦于以下问题:Scala ParquetWriter类的具体用法?Scala ParquetWriter怎么用?Scala ParquetWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ParquetWriter类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: AvroParquetWriterFn
//设置package包名称以及导入依赖的类
package io.eels.component.parquet.avro
import com.sksamuel.exts.Logging
import io.eels.component.parquet.ParquetWriterConfig
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
object AvroParquetWriterFn extends Logging {
def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = {
val config = ParquetWriterConfig()
AvroParquetWriter.builder[GenericRecord](path)
.withSchema(avroSchema)
.withCompressionCodec(config.compressionCodec)
.withPageSize(config.pageSize)
.withRowGroupSize(config.blockSize)
.withDictionaryEncoding(config.enableDictionary)
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withValidation(config.validating)
.build()
}
}
示例2: ParquetWriterConfig
//设置package包名称以及导入依赖的类
package io.eels.component.parquet
import com.sksamuel.exts.Logging
import com.sksamuel.exts.config.ConfigSupport
import com.typesafe.config.{Config, ConfigFactory}
import org.apache.parquet.hadoop.ParquetWriter
import org.apache.parquet.hadoop.metadata.CompressionCodecName
case class ParquetWriterConfig(blockSize: Int,
pageSize: Int,
compressionCodec: CompressionCodecName,
enableDictionary: Boolean,
validating: Boolean)
object ParquetWriterConfig extends Logging with ConfigSupport {
def apply(): ParquetWriterConfig = apply(ConfigFactory.load())
def apply(config: Config): ParquetWriterConfig = {
val blockSize: Int = config.getIntOrElse("eel.parquet.blockSize", ParquetWriter.DEFAULT_BLOCK_SIZE)
val pageSize: Int = config.getIntOrElse("eel.parquet.pageSize", ParquetWriter.DEFAULT_PAGE_SIZE)
val compressionCodec = config.getString("eel.parquet.compressionCodec").toLowerCase() match {
case "gzip" => CompressionCodecName.GZIP
case "lzo" => CompressionCodecName.LZO
case "snappy" => CompressionCodecName.SNAPPY
case _ => CompressionCodecName.UNCOMPRESSED
}
logger.debug(s"Parquet writer will use blockSize = $blockSize; pageSize = $pageSize; compressionCodec = $compressionCodec")
ParquetWriterConfig(blockSize, pageSize, compressionCodec, true, true)
}
}
示例3: RowParquetWriterFn
//设置package包名称以及导入依赖的类
package io.eels.component.parquet
import io.eels.Row
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.column.ParquetProperties
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
import org.apache.parquet.schema.MessageType
import scala.math.BigDecimal.RoundingMode.RoundingMode
object RowParquetWriterFn {
class RowParquetWriterBuilder(path: Path,
schema: MessageType,
roundingMode: RoundingMode,
metadata: Map[String, String])
extends ParquetWriter.Builder[Row, RowParquetWriterBuilder](path) {
override def getWriteSupport(conf: Configuration): WriteSupport[Row] = new RowWriteSupport(schema, roundingMode, metadata)
override def self(): RowParquetWriterBuilder = this
}
def apply(path: Path,
schema: StructType,
metadata: Map[String, String],
dictionary: Boolean,
roundingMode: RoundingMode): ParquetWriter[Row] = {
val config = ParquetWriterConfig()
val messageType = ParquetSchemaFns.toParquetMessageType(schema)
new RowParquetWriterBuilder(path, messageType, roundingMode, metadata)
.withCompressionCodec(config.compressionCodec)
.withDictionaryEncoding(dictionary)
.withDictionaryPageSize(ParquetProperties.DEFAULT_DICTIONARY_PAGE_SIZE)
.withPageSize(config.pageSize)
.withRowGroupSize(config.blockSize)
.withValidation(config.validating)
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withWriterVersion(ParquetProperties.DEFAULT_WRITER_VERSION)
.build()
}
}