当前位置: 首页>>代码示例>>Scala>>正文


Scala MessageType类代码示例

本文整理汇总了Scala中org.apache.parquet.schema.MessageType的典型用法代码示例。如果您正苦于以下问题:Scala MessageType类的具体用法?Scala MessageType怎么用?Scala MessageType使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了MessageType类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: ScalaPBWriteSupport

//设置package包名称以及导入依赖的类
package com.trueaccord.scalapb.parquet

import java.util

import com.google.protobuf.Descriptors.Descriptor
import com.trueaccord.scalapb.{GeneratedMessage, Message}
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.BadConfigurationException
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.MessageType

class ScalaPBWriteSupport[T <: GeneratedMessage with Message[T]] extends WriteSupport[T] {
  var pbClass: Class[T] = null
  var recordConsumer: RecordConsumer = null

  override def init(configuration: Configuration): WriteContext = {
    if (pbClass == null) {
      pbClass = configuration.getClass(ScalaPBWriteSupport.SCALAPB_CLASS_WRITE, null, classOf[GeneratedMessage]).asInstanceOf[Class[T]]
      if (pbClass == null) {
        throw new BadConfigurationException("ScalaPB class not specified. Please use ScalaPBOutputFormat.setMessageClass.")
      }
    }
    val descriptor: Descriptor = pbClass.getMethod("descriptor").invoke(null).asInstanceOf[Descriptor]
    val rootSchema: MessageType = SchemaConverter.convert(descriptor)
    val extraMetaDtata = new util.HashMap[String, String]
    extraMetaDtata.put(ScalaPBReadSupport.PB_CLASS, pbClass.getName)
    new WriteContext(rootSchema, extraMetaDtata)
  }

  override def write(record: T): Unit = {
    MessageWriter.writeTopLevelMessage(recordConsumer, record)
  }

  override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {
    this.recordConsumer = recordConsumer
  }
}

object ScalaPBWriteSupport {
  val SCALAPB_CLASS_WRITE = "parquet.scalapb.writeClass"

  def setSchema[T <: GeneratedMessage](config: Configuration, protoClass: Class[T]) = {
    config.setClass(SCALAPB_CLASS_WRITE, protoClass, classOf[GeneratedMessage])
  }
} 
开发者ID:scalapb,项目名称:sparksql-scalapb,代码行数:48,代码来源:ScalaPBWriteSupport.scala

示例2: SchemaConverter

//设置package包名称以及导入依赖的类
package com.trueaccord.scalapb.parquet

import com.google.protobuf.Descriptors.{ Descriptor, FieldDescriptor }
import org.apache.parquet.schema.Types.{ Builder, GroupBuilder, MessageTypeBuilder, PrimitiveBuilder }
import org.apache.parquet.schema.{ MessageType, Type, Types }

import scala.collection.JavaConverters._

object SchemaConverter {
  def convert(descriptor: Descriptor): MessageType = {
    val builder: MessageTypeBuilder = Types.buildMessage()
    addAllFields(descriptor, builder).named(descriptor.getFullName)
  }

  private def addAllFields[T](descriptor: Descriptor, builder: GroupBuilder[T]): GroupBuilder[T] = {
    descriptor.getFields.asScala.foreach {
      fd =>
        addSingleField(fd, builder).id(fd.getNumber).named(fd.getName)
    }
    builder
  }

  private def addSingleField[T](fd: FieldDescriptor, builder: GroupBuilder[T]) = {
    import FieldDescriptor.JavaType
    import org.apache.parquet.schema.OriginalType
    import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
    val repetition = if (fd.isRequired) Type.Repetition.REQUIRED
    else if (fd.isRepeated) Type.Repetition.REPEATED
    else Type.Repetition.OPTIONAL

    fd.getJavaType match {
      case JavaType.BOOLEAN => builder.primitive(PrimitiveTypeName.BOOLEAN, repetition)
      case JavaType.INT => builder.primitive(PrimitiveTypeName.INT32, repetition)
      case JavaType.LONG => builder.primitive(PrimitiveTypeName.INT64, repetition)
      case JavaType.FLOAT => builder.primitive(PrimitiveTypeName.FLOAT, repetition)
      case JavaType.DOUBLE => builder.primitive(PrimitiveTypeName.DOUBLE, repetition)
      case JavaType.BYTE_STRING => builder.primitive(PrimitiveTypeName.BINARY, repetition)
      case JavaType.STRING => builder.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
      case JavaType.MESSAGE =>
        val subgroup = builder.group(repetition)
        addAllFields(fd.getMessageType, subgroup)
      case JavaType.ENUM => builder.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
      case javaType =>
        throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType)
    }
  }
} 
开发者ID:scalapb,项目名称:sparksql-scalapb,代码行数:48,代码来源:SchemaConverter.scala

示例3: ScalaPBReadSupport

//设置package包名称以及导入依赖的类
package com.trueaccord.scalapb.parquet

import java.util

import com.trueaccord.scalapb.{GeneratedMessage, GeneratedMessageCompanion, Message}
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.api.{InitContext, ReadSupport}
import org.apache.parquet.hadoop.api.ReadSupport.ReadContext
import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer}
import org.apache.parquet.schema.MessageType

class ScalaPBReadSupport[T <: GeneratedMessage with Message[T]] extends ReadSupport[T] {
  override def prepareForRead(
    configuration: Configuration,
    keyValueMetaData: util.Map[String, String],
    fileSchema: MessageType,
    readContext: ReadContext): RecordMaterializer[T] = {
    val protoClass = Option(keyValueMetaData.get(ScalaPBReadSupport.PB_CLASS)).getOrElse(throw new RuntimeException(s"Value for ${ScalaPBReadSupport.PB_CLASS} not found."))
    val cmp = {
      import scala.reflect.runtime.universe

      val runtimeMirror = universe.runtimeMirror(getClass.getClassLoader)

      val module = runtimeMirror.staticModule(protoClass)

      val obj = runtimeMirror.reflectModule(module)

      obj.instance.asInstanceOf[GeneratedMessageCompanion[T]]
    }

    new RecordMaterializer[T] {
      val root = new ProtoMessageConverter[T](cmp, fileSchema, onEnd = _ => ())

      override def getRootConverter: GroupConverter = root

      override def getCurrentRecord: T = root.getCurrentRecord
    }
  }

  override def init(context: InitContext): ReadContext = {
    new ReadContext(context.getFileSchema)
  }
}

object ScalaPBReadSupport {
  val PB_CLASS = "parquet.scalapb.class"
} 
开发者ID:scalapb,项目名称:sparksql-scalapb,代码行数:48,代码来源:ScalaPBReadSupport.scala

示例4: RowWriteSupport

//设置package包名称以及导入依赖的类
package io.eels.component.parquet

import com.sksamuel.exts.Logging
import io.eels.Row
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.api.WriteSupport.FinalizedWriteContext
import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.MessageType

import scala.collection.JavaConverters._
import scala.math.BigDecimal.RoundingMode.RoundingMode

// implementation of WriteSupport for Row's used by the native ParquetWriter
class RowWriteSupport(schema: MessageType,
                      roundingMode: RoundingMode,
                      metadata: Map[String, String]) extends WriteSupport[Row] with Logging {
  logger.trace(s"Created parquet row write support for schema message type $schema")

  private var writer: RowWriter = _

  override def finalizeWrite(): FinalizedWriteContext = new FinalizedWriteContext(metadata.asJava)

  def init(configuration: Configuration): WriteSupport.WriteContext = {
    new WriteSupport.WriteContext(schema, new java.util.HashMap())
  }

  def prepareForWrite(record: RecordConsumer) {
    writer = new RowWriter(record, roundingMode)
  }

  def write(row: Row) {
    writer.write(row)
  }
}

class RowWriter(record: RecordConsumer, roundingMode: RoundingMode) {

  def write(row: Row): Unit = {
    record.startMessage()
    val writer = new StructRecordWriter(row.schema, roundingMode, false)
    writer.write(record, row.values)
    record.endMessage()
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:46,代码来源:RowWriteSupport.scala

示例5: RowParquetWriterFn

//设置package包名称以及导入依赖的类
package io.eels.component.parquet

import io.eels.Row
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.parquet.column.ParquetProperties
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
import org.apache.parquet.schema.MessageType

import scala.math.BigDecimal.RoundingMode.RoundingMode


object RowParquetWriterFn {

  class RowParquetWriterBuilder(path: Path,
                                schema: MessageType,
                                roundingMode: RoundingMode,
                                metadata: Map[String, String])
    extends ParquetWriter.Builder[Row, RowParquetWriterBuilder](path) {
    override def getWriteSupport(conf: Configuration): WriteSupport[Row] = new RowWriteSupport(schema, roundingMode, metadata)
    override def self(): RowParquetWriterBuilder = this
  }

  def apply(path: Path,
            schema: StructType,
            metadata: Map[String, String],
            dictionary: Boolean,
            roundingMode: RoundingMode): ParquetWriter[Row] = {
    val config = ParquetWriterConfig()
    val messageType = ParquetSchemaFns.toParquetMessageType(schema)
    new RowParquetWriterBuilder(path, messageType, roundingMode, metadata)
      .withCompressionCodec(config.compressionCodec)
      .withDictionaryEncoding(dictionary)
      .withDictionaryPageSize(ParquetProperties.DEFAULT_DICTIONARY_PAGE_SIZE)
      .withPageSize(config.pageSize)
      .withRowGroupSize(config.blockSize)
      .withValidation(config.validating)
      .withWriteMode(ParquetFileWriter.Mode.CREATE)
      .withWriterVersion(ParquetProperties.DEFAULT_WRITER_VERSION)
      .build()
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:45,代码来源:RowParquetWriterFn.scala


注:本文中的org.apache.parquet.schema.MessageType类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。