本文整理汇总了Scala中org.apache.parquet.io.api.RecordConsumer类的典型用法代码示例。如果您正苦于以下问题:Scala RecordConsumer类的具体用法?Scala RecordConsumer怎么用?Scala RecordConsumer使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了RecordConsumer类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ScalaPBWriteSupport
//设置package包名称以及导入依赖的类
package com.trueaccord.scalapb.parquet
import java.util
import com.google.protobuf.Descriptors.Descriptor
import com.trueaccord.scalapb.{GeneratedMessage, Message}
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.BadConfigurationException
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.MessageType
class ScalaPBWriteSupport[T <: GeneratedMessage with Message[T]] extends WriteSupport[T] {
var pbClass: Class[T] = null
var recordConsumer: RecordConsumer = null
override def init(configuration: Configuration): WriteContext = {
if (pbClass == null) {
pbClass = configuration.getClass(ScalaPBWriteSupport.SCALAPB_CLASS_WRITE, null, classOf[GeneratedMessage]).asInstanceOf[Class[T]]
if (pbClass == null) {
throw new BadConfigurationException("ScalaPB class not specified. Please use ScalaPBOutputFormat.setMessageClass.")
}
}
val descriptor: Descriptor = pbClass.getMethod("descriptor").invoke(null).asInstanceOf[Descriptor]
val rootSchema: MessageType = SchemaConverter.convert(descriptor)
val extraMetaDtata = new util.HashMap[String, String]
extraMetaDtata.put(ScalaPBReadSupport.PB_CLASS, pbClass.getName)
new WriteContext(rootSchema, extraMetaDtata)
}
override def write(record: T): Unit = {
MessageWriter.writeTopLevelMessage(recordConsumer, record)
}
override def prepareForWrite(recordConsumer: RecordConsumer): Unit = {
this.recordConsumer = recordConsumer
}
}
object ScalaPBWriteSupport {
val SCALAPB_CLASS_WRITE = "parquet.scalapb.writeClass"
def setSchema[T <: GeneratedMessage](config: Configuration, protoClass: Class[T]) = {
config.setClass(SCALAPB_CLASS_WRITE, protoClass, classOf[GeneratedMessage])
}
}
示例2: MessageWriter
//设置package包名称以及导入依赖的类
package com.trueaccord.scalapb.parquet
import com.google.protobuf.ByteString
import com.google.protobuf.Descriptors.FieldDescriptor.JavaType
import com.google.protobuf.Descriptors.{EnumValueDescriptor, FieldDescriptor}
import com.trueaccord.scalapb.{GeneratedMessage, Message}
import org.apache.parquet.io.api.Binary
import org.apache.parquet.Log
import org.apache.parquet.io.api.RecordConsumer
object MessageWriter {
val log = Log.getLog(this.getClass)
def writeTopLevelMessage[T <: GeneratedMessage with Message[T]](consumer: RecordConsumer, m: T) = {
consumer.startMessage()
writeAllFields(consumer, m)
consumer.endMessage()
}
private def writeAllFields[T <: GeneratedMessage](consumer: RecordConsumer, m: T): Unit = {
m.getAllFields.foreach {
case (fd, value) =>
consumer.startField(fd.getName, fd.getIndex)
if (fd.isRepeated) {
value.asInstanceOf[Seq[Any]].foreach {
v =>
writeSingleField(consumer, fd, v)
}
} else {
writeSingleField(consumer, fd, value)
}
consumer.endField(fd.getName, fd.getIndex)
}
}
private def writeSingleField(consumer: RecordConsumer, fd: FieldDescriptor, v: Any) = fd.getJavaType match {
case JavaType.BOOLEAN => consumer.addBoolean(v.asInstanceOf[Boolean])
case JavaType.INT => consumer.addInteger(v.asInstanceOf[Int])
case JavaType.LONG => consumer.addLong(v.asInstanceOf[Long])
case JavaType.FLOAT => consumer.addFloat(v.asInstanceOf[Float])
case JavaType.DOUBLE => consumer.addDouble(v.asInstanceOf[Double])
case JavaType.BYTE_STRING => consumer.addBinary(Binary.fromByteArray(v.asInstanceOf[ByteString].toByteArray))
case JavaType.STRING => consumer.addBinary(Binary.fromString(v.asInstanceOf[String]))
case JavaType.MESSAGE =>
consumer.startGroup()
writeAllFields(consumer, v.asInstanceOf[GeneratedMessage])
consumer.endGroup()
case JavaType.ENUM => consumer.addBinary(Binary.fromString(v.asInstanceOf[EnumValueDescriptor].getName))
case javaType =>
throw new UnsupportedOperationException("Cannot convert Protocol Buffer: unknown type " + javaType)
}
}
示例3: RowWriteSupport
//设置package包名称以及导入依赖的类
package io.eels.component.parquet
import com.sksamuel.exts.Logging
import io.eels.Row
import org.apache.hadoop.conf.Configuration
import org.apache.parquet.hadoop.api.WriteSupport
import org.apache.parquet.hadoop.api.WriteSupport.FinalizedWriteContext
import org.apache.parquet.io.api.RecordConsumer
import org.apache.parquet.schema.MessageType
import scala.collection.JavaConverters._
import scala.math.BigDecimal.RoundingMode.RoundingMode
// implementation of WriteSupport for Row's used by the native ParquetWriter
class RowWriteSupport(schema: MessageType,
roundingMode: RoundingMode,
metadata: Map[String, String]) extends WriteSupport[Row] with Logging {
logger.trace(s"Created parquet row write support for schema message type $schema")
private var writer: RowWriter = _
override def finalizeWrite(): FinalizedWriteContext = new FinalizedWriteContext(metadata.asJava)
def init(configuration: Configuration): WriteSupport.WriteContext = {
new WriteSupport.WriteContext(schema, new java.util.HashMap())
}
def prepareForWrite(record: RecordConsumer) {
writer = new RowWriter(record, roundingMode)
}
def write(row: Row) {
writer.write(row)
}
}
class RowWriter(record: RecordConsumer, roundingMode: RoundingMode) {
def write(row: Row): Unit = {
record.startMessage()
val writer = new StructRecordWriter(row.schema, roundingMode, false)
writer.write(record, row.values)
record.endMessage()
}
}