本文整理汇总了Scala中org.apache.parquet.avro.AvroParquetWriter类的典型用法代码示例。如果您正苦于以下问题:Scala AvroParquetWriter类的具体用法?Scala AvroParquetWriter怎么用?Scala AvroParquetWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了AvroParquetWriter类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: AvroParquetWriterFn
//设置package包名称以及导入依赖的类
package io.eels.component.parquet.avro
import com.sksamuel.exts.Logging
import io.eels.component.parquet.ParquetWriterConfig
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
object AvroParquetWriterFn extends Logging {
def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = {
val config = ParquetWriterConfig()
AvroParquetWriter.builder[GenericRecord](path)
.withSchema(avroSchema)
.withCompressionCodec(config.compressionCodec)
.withPageSize(config.pageSize)
.withRowGroupSize(config.blockSize)
.withDictionaryEncoding(config.enableDictionary)
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withValidation(config.validating)
.build()
}
}
示例2: AvroToParquetWriter
//设置package包名称以及导入依赖的类
package yamrcraft.etlite.writers
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.slf4j.LoggerFactory
import yamrcraft.etlite.utils.FileUtils
class AvroToParquetWriter(tempFile: String, outputFile: String) extends Writer[GenericRecord] {
val logger = LoggerFactory.getLogger(this.getClass)
// lazy initialization
var writer: Option[AvroParquetWriter[GenericRecord]] = None
val tempPath = new Path(tempFile + ".parquet")
val outputPath = new Path(outputFile + ".parquet")
logger.info(s"creating writer for working file: ${tempPath.toString}, outputFile: ${outputPath.toString}")
override def write(event: GenericRecord): Unit = {
logger.info(s"ParquetWriter.write, event type: ${event.getSchema.getName}")
if (writer.isEmpty) {
writer = Some(createWriter(tempPath.toString, event.getSchema))
}
writer.get.write(event)
}
override def commit(): Unit = {
writer.get.close()
val fs = FileUtils.getFS(outputPath.toString)
fs.mkdirs(outputPath.getParent)
if (fs.exists(outputPath)) {
fs.rename(outputPath, new Path(outputPath.getParent, s"__${outputPath.getName}.${System.currentTimeMillis()}.old.__"))
}
// copy temp file to output file (typically temp file would be on local file system).
if (tempFile.startsWith("file")) {
logger.info(s"copy file from: ${tempPath.toString} to $outputPath")
fs.copyFromLocalFile(true, true, tempPath, outputPath)
} else {
logger.info(s"renaming file from: ${tempPath.toString} to $outputPath")
fs.rename(tempPath, outputPath)
}
}
private def createWriter(file: String, schema: Schema) = {
val fs = FileUtils.getFS(file)
val path = new Path(file)
if (fs.exists(path)) {
fs.delete(path, true)
}
fs.mkdirs(path.getParent)
new AvroParquetWriter[GenericRecord](path, schema)
}
}