当前位置: 首页>>代码示例>>Scala>>正文


Scala AvroParquetWriter类代码示例

本文整理汇总了Scala中org.apache.parquet.avro.AvroParquetWriter的典型用法代码示例。如果您正苦于以下问题:Scala AvroParquetWriter类的具体用法?Scala AvroParquetWriter怎么用?Scala AvroParquetWriter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了AvroParquetWriter类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: AvroParquetWriterFn

//设置package包名称以及导入依赖的类
package io.eels.component.parquet.avro

import com.sksamuel.exts.Logging
import io.eels.component.parquet.ParquetWriterConfig
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}


object AvroParquetWriterFn extends Logging {
  def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = {
    val config = ParquetWriterConfig()
    AvroParquetWriter.builder[GenericRecord](path)
      .withSchema(avroSchema)
      .withCompressionCodec(config.compressionCodec)
      .withPageSize(config.pageSize)
      .withRowGroupSize(config.blockSize)
      .withDictionaryEncoding(config.enableDictionary)
      .withWriteMode(ParquetFileWriter.Mode.CREATE)
      .withValidation(config.validating)
      .build()
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:26,代码来源:AvroParquetWriterFn.scala

示例2: AvroToParquetWriter

//设置package包名称以及导入依赖的类
package yamrcraft.etlite.writers

import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.slf4j.LoggerFactory
import yamrcraft.etlite.utils.FileUtils

class AvroToParquetWriter(tempFile: String, outputFile: String) extends Writer[GenericRecord] {

  val logger = LoggerFactory.getLogger(this.getClass)

  // lazy initialization
  var writer: Option[AvroParquetWriter[GenericRecord]] = None

  val tempPath = new Path(tempFile + ".parquet")
  val outputPath = new Path(outputFile + ".parquet")
  logger.info(s"creating writer for working file: ${tempPath.toString}, outputFile: ${outputPath.toString}")

  override def write(event: GenericRecord): Unit = {
    logger.info(s"ParquetWriter.write, event type: ${event.getSchema.getName}")
    if (writer.isEmpty) {
      writer = Some(createWriter(tempPath.toString, event.getSchema))
    }

    writer.get.write(event)
  }

  override def commit(): Unit = {
    writer.get.close()

    val fs = FileUtils.getFS(outputPath.toString)
    fs.mkdirs(outputPath.getParent)
    if (fs.exists(outputPath)) {
      fs.rename(outputPath, new Path(outputPath.getParent, s"__${outputPath.getName}.${System.currentTimeMillis()}.old.__"))
    }
    // copy temp file to output file (typically temp file would be on local file system).
    if (tempFile.startsWith("file")) {
      logger.info(s"copy file from: ${tempPath.toString} to $outputPath")
      fs.copyFromLocalFile(true, true, tempPath, outputPath)
    } else {
      logger.info(s"renaming file from: ${tempPath.toString} to $outputPath")
      fs.rename(tempPath, outputPath)
    }
  }

  private def createWriter(file: String, schema: Schema) = {
    val fs = FileUtils.getFS(file)
    val path = new Path(file)
    if (fs.exists(path)) {
      fs.delete(path, true)
    }
    fs.mkdirs(path.getParent)
    new AvroParquetWriter[GenericRecord](path, schema)
  }

} 
开发者ID:yamrcraft,项目名称:etl-light,代码行数:59,代码来源:AvroToParquetWriter.scala


注:本文中的org.apache.parquet.avro.AvroParquetWriter类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。