本文整理汇总了Scala中org.apache.avro.Schema类的典型用法代码示例。如果您正苦于以下问题:Scala Schema类的具体用法?Scala Schema怎么用?Scala Schema使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Schema类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: Tip
//设置package包名称以及导入依赖的类
package com.alvin.niagara.model
import java.io.ByteArrayOutputStream
import java.util
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.{DecoderFactory, EncoderFactory}
import scala.collection.JavaConversions._
import scala.io.Source
case class Tip(business_id: String, date: String, likes: Long, text: String, `type`: String, user_id: String)
object TipSerde {
val avroSchema = Source.fromInputStream(getClass.getResourceAsStream("/schema/tip.avsc")).mkString
val schema = new Schema.Parser().parse(avroSchema)
val reader = new GenericDatumReader[GenericRecord](schema)
val writer = new GenericDatumWriter[GenericRecord](schema)
def serialize(tip: Tip): Array[Byte] = {
val out = new ByteArrayOutputStream()
val encoder = EncoderFactory.get.binaryEncoder(out, null)
val avroRecord = new GenericData.Record(schema)
avroRecord.put("business_id", tip.business_id)
avroRecord.put("date", tip.date)
avroRecord.put("likes", tip.likes)
avroRecord.put("text", tip.text)
avroRecord.put("type", tip.`type`)
avroRecord.put("user_id", tip.user_id)
writer.write(avroRecord, encoder)
encoder.flush
out.close
out.toByteArray
}
def deserialize(bytes: Array[Byte]): Tip = {
val decoder = DecoderFactory.get.binaryDecoder(bytes, null)
val record = reader.read(null, decoder)
Tip(
record.get("business_id").toString,
record.get("date").toString,
record.get("likes").asInstanceOf[Long],
record.get("text").toString,
record.get("type").toString,
record.get("user_id").toString
)
}
}
示例2: AvroNodeSerializer
//设置package包名称以及导入依赖的类
package eventgen.launcher.core.avro
import java.io.ByteArrayOutputStream
import eventgen.launcher.core.NodeSerializer
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumWriter, GenericRecord}
import org.apache.avro.io.EncoderFactory
class AvroNodeSerializer extends NodeSerializer[Schema, AvroNode[_], ByteArrayOutputStream] {
override def serialize(metadata: Schema, node: AvroNode[_]): ByteArrayOutputStream = {
val record = node.asInstanceOf[AvroRecord]
val writer = new GenericDatumWriter[GenericRecord]
writer.setSchema(metadata)
val outputStream = new ByteArrayOutputStream
val encoder = EncoderFactory.get().jsonEncoder(metadata, outputStream, true)
writer.write(record.value, encoder)
encoder.flush()
outputStream
}
}
示例3: AvroParquetWriterFn
//设置package包名称以及导入依赖的类
package io.eels.component.parquet.avro
import com.sksamuel.exts.Logging
import io.eels.component.parquet.ParquetWriterConfig
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}
object AvroParquetWriterFn extends Logging {
def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = {
val config = ParquetWriterConfig()
AvroParquetWriter.builder[GenericRecord](path)
.withSchema(avroSchema)
.withCompressionCodec(config.compressionCodec)
.withPageSize(config.pageSize)
.withRowGroupSize(config.blockSize)
.withDictionaryEncoding(config.enableDictionary)
.withWriteMode(ParquetFileWriter.Mode.CREATE)
.withValidation(config.validating)
.build()
}
}
示例4: AvroLens
//设置package包名称以及导入依赖的类
package fr.psug.avro
import org.apache.avro.Schema
import org.apache.avro.generic._
import scala.reflect.ClassTag
object AvroLens {
def defineWithSideEffectAndSchema[A](path: String,
transform: A => A,
schema: Schema)(implicit tag: ClassTag[A]): (GenericContainer => Unit) = {
checkPath(path, schema)
(record: GenericContainer) => {
GenericData.get().validate(schema, record)
defineWithSideEffect(path, transform)(tag)(record)
}
}
def checkPath(path: String, schema: Schema) = {
val subPaths = path.replace("[]", "").split('.')
subPaths.foldLeft(schema) { (current, name) =>
Option(current.getField(name)) match {
case Some(field) =>
field.schema()
case None =>
throw new IllegalStateException(s"Path '$path' does not exist according to schema ${schema.toString(true)}")
}
}
}
}
示例5: AvroUtils
//设置package包名称以及导入依赖的类
package pulse.kafka.avro
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, File}
import com.twitter.util.Future
import org.apache.avro.Schema
import org.apache.avro.file.DataFileWriter
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.DecoderFactory
import pulse.kafka.extensions.managedByteArrayInputStream
import pulse.kafka.extensions.managedByteArrayOutputStream
import pulse.kafka.extensions.catsStdInstancesForFuture
import scala.concurrent.ExecutionContext.Implicits._
object AvroUtils {
import pulse.common.syntax._
def jsonToAvroBytes(json: String, schemaFile: File): Future[Array[Byte]] =
use(new ByteArrayOutputStream()) { output =>
for {
s <- loadSchema(schemaFile)
_ <- convertImpl(json, output, s)
} yield output.toByteArray
}
private def convertImpl(json: String, output: ByteArrayOutputStream, schemaSpec: Schema): Future[GenericDatumReader[GenericRecord]] =
use(new ByteArrayInputStream(json.getBytes)) { input =>
for {
w <- getWriter(output, schemaSpec)
r <- getReader(input, schemaSpec, w)
} yield r
}
def getReader(input: ByteArrayInputStream, schemaSpec: Schema, w: DataFileWriter[GenericRecord]) = Future.value {
val reader = new GenericDatumReader[GenericRecord](schemaSpec)
val datum = reader.read(null, getJsonDecoder(input, schemaSpec))
w.append(datum)
w.flush()
reader
}
private def getJsonDecoder(input: ByteArrayInputStream, schema: Schema) =
DecoderFactory.get.jsonDecoder(schema, new DataInputStream(input))
private def getWriter(output: ByteArrayOutputStream, schemaSpec: Schema) = {
Future.value {
val writer = new DataFileWriter[GenericRecord](new GenericDatumWriter[GenericRecord]())
writer.create(schemaSpec, output)
}
}
private def loadSchema(schemaFile: File): Future[Schema] =
Future {
new Schema.Parser().parse(schemaFile)
}
}
示例6: AvroType
//设置package包名称以及导入依赖的类
package shapeless.datatype.avro
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import shapeless._
import scala.reflect.runtime.universe._
class AvroType[A] extends Serializable {
def fromGenericRecord[L <: HList](m: GenericRecord)
(implicit gen: LabelledGeneric.Aux[A, L], fromL: FromAvroRecord[L])
: Option[A] = fromL(Right(m)).map(gen.from)
def toGenericRecord[L <: HList](a: A)
(implicit gen: LabelledGeneric.Aux[A, L], toL: ToAvroRecord[L], tt: TypeTag[A])
: GenericRecord = toL(gen.to(a)).left.get.build(AvroSchema[A])
}
object AvroType {
def apply[A: TypeTag]: AvroType[A] = new AvroType[A]
def at[V: TypeTag](schemaType: Schema.Type)(fromFn: Any => V, toFn: V => Any): BaseAvroMappableType[V] = {
AvroSchema.register(implicitly[TypeTag[V]].tpe, schemaType)
new BaseAvroMappableType[V] {
override def from(value: Any): V = fromFn(value)
override def to(value: V): Any = toFn(value)
}
}
}
示例7: AvroSchemaParser
//设置package包名称以及导入依赖的类
package eventgen.launcher.core.avro
import eventgen.launcher.core._
import org.apache.avro.Schema
import org.apache.avro.Schema.Parser
import scalaz.{\/, \/-}
class AvroSchemaParser extends SchemaParser[Schema] {
val nativeParser = new Parser
override def parse(text: String): \/[String, Schema] = {
for {
validStr <- text.ifNotEmpty
parsed <- validStr.parseRight
validated <- validateAvroSchema(parsed)
} yield validated
}
def validateAvroSchema(schema: Schema): String \/ Schema = \/-(schema)
}
示例8: AvroTreeBuilder
//设置package包名称以及导入依赖的类
package eventgen.launcher.core.avro
import eventgen.launcher.core.PrimitiveGenerators._
import eventgen.launcher.core._
import org.apache.avro.Schema
import org.apache.avro.Schema.{Field, Type}
import scala.collection.JavaConversions._
import scalaz._
import Scalaz._
import org.apache.avro.generic.GenericData
class AvroTreeBuilder extends TreeBuilder[Schema, AvroNode[_]] {
def getCustomFieldState(schema: Schema, extGenerator: ExternalGenerator[_]): State[ImmutableRandom, _] = extGenerator.get
def getRangeFieldState[T](from: Int, to: Int)(implicit rangeGen: RangeGenerator[T]): State[ImmutableRandom, AvroNode[_]] = {
rangeGen.generate(from, to).map(AvroField[T](_))
}
def getFieldState(f: Field, context: ExecutionContext): String \/ State[ImmutableRandom, AvroNode[_]] = {
val RangePattern = "Range\\[(Double|Int)\\]\\(from = ([-0-9]+), to = ([-0-9]+)\\)".r
f.getProp("generator") match {
case RangePattern(typeParam, Int(from), Int(to)) => typeParam match {
case "Double" => \/-(getRangeFieldState[Double](from, to))
case "Int" => \/-(getRangeFieldState[Int](from, to))
}
case name => context.generators.get(name) match {
case Some(extGenerator) => \/-(extGenerator.get.map(AvroField(_)))
case None => -\/(s"Cannot find generator $name")
}
}
}
override def buildTree(rootSchema: Schema, executionContext: ExecutionContext): String \/ State[ImmutableRandom, AvroNode[_]] = {
val fields = rootSchema.getFields.toList
val fieldStates = fields.map(f => {
if (f.schema().getType == Type.RECORD)
buildTree(f.schema(), executionContext).map((f.name(), _))
else
getFieldState(f, executionContext).map((f.name(), _))
})
for (childrenMap <- fieldStates.sequenceU) yield generateNodeState(rootSchema, childrenMap.toMap)
}
def generateNodeState(rootSchema: Schema, childrenStates: Map[String, State[ImmutableRandom, AvroNode[_]]]) = {
State[ImmutableRandom, AvroNode[_]](rand => {
val nativeRecord = new GenericData.Record(rootSchema)
val (rand2, childNodes) = childrenStates.invertStatesMap(rand)
childNodes.foreach {
case (fieldName, node) => nativeRecord.put(fieldName, node.value)
}
(rand2, AvroRecord(nativeRecord))
})
}
}
示例9: getRandomOutputs
//设置package包名称以及导入依赖的类
package eventgen.launcher.core
import java.io.ByteArrayOutputStream
import eventgen.launcher.core.avro._
import org.apache.avro.Schema
import scalaz._
trait RandomGenerationService[Metadata, Node, Output] {
val schemaParser: SchemaParser[Metadata]
val treeBuilder: TreeBuilder[Metadata, Node]
val nodeSerializer: NodeSerializer[Metadata, Node, Output]
def getRandomOutputs(metadata: String, context: ExecutionContext): String \/ Kleisli[Seq, ImmutableRandom, Output] = {
for {
schema <- schemaParser.parse(metadata)
nodeGenerator <- treeBuilder.buildTree(schema, context)
} yield Kleisli[Seq, ImmutableRandom, Output](seed => getRandomStream(nodeGenerator)(seed).take(context.count).toList.map(x => nodeSerializer.serialize(schema, x)))
}
def getRandomStream(nodeGenerator: State[ImmutableRandom, Node])(seed: ImmutableRandom): Stream[Node] = {
val (newState, node) = nodeGenerator(seed)
node #:: getRandomStream(nodeGenerator)(newState)
}
}
object RandomGenerationService {
def getAvro = new RandomGenerationService[Schema, AvroNode[_], ByteArrayOutputStream] {
val schemaParser = new AvroSchemaParser
val treeBuilder = new AvroTreeBuilder
val nodeSerializer = new AvroNodeSerializer
}
}
示例10: StringExtension
//设置package包名称以及导入依赖的类
package eventgen.launcher
import org.apache.avro.Schema
import org.apache.avro.Schema.Parser
import scala.util.control.NonFatal
import scalaz._
package object core {
implicit class StringExtension(s: String) {
def parseRight: String \/ Schema = {
try {
val parser = new Parser
\/-(parser.parse(s))
} catch {
case NonFatal(exc) => -\/(exc.getMessage)
}
}
def ifNotEmpty: String \/ String = {
if (s.isEmpty) {
-\/("Schema is empty")
}
else {
\/-(s)
}
}
}
object Int {
def unapply(s: String): Option[Int] = try {
Some(s.toInt)
} catch {
case _: java.lang.NumberFormatException => None
}
}
implicit class MapOfStates[Key, StateType, Node](map: Map[Key, State[StateType, Node]]) {
def invertStatesMap: State[StateType, Map[Key, Node]]
= {
val z = State.state[StateType, Map[Key, Node]](Map())
map.foldLeft(z) {
case (rootState, (fieldKey, fieldState)) => State[StateType, Map[Key, Node]](state1 => {
val (state2, map) = rootState(state1)
val (state3, newNode) = fieldState(state2)
(state3, map + (fieldKey -> newNode))
})
}
}
}
}
示例11: AvroUtils
//设置package包名称以及导入依赖的类
package pulse.services.example.avro
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, File}
import org.apache.avro.Schema
import org.apache.avro.file.DataFileWriter
import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord}
import org.apache.avro.io.DecoderFactory
import pulse.services.example.extensions._
object AvroUtils {
def jsonToAvroBytes(json: String, schemaFile: File) = {
use(new ByteArrayOutputStream())(output => {
val schemaSpec = loadSchema(schemaFile)
use(new ByteArrayInputStream(json.getBytes))(input => {
val writer = new DataFileWriter[GenericRecord](new GenericDatumWriter[GenericRecord]())
writer.create(schemaSpec, output)
val reader = new GenericDatumReader[GenericRecord](schemaSpec)
val datum = reader.read(null, getJsonDecoder(input, schemaSpec))
writer.append(datum)
writer.flush()
})
output.toByteArray
})
}
def getJsonDecoder(input: ByteArrayInputStream, schema: Schema) =
DecoderFactory.get.jsonDecoder(schema, new DataInputStream(input))
def loadSchema(schemaFile: File) =
new Schema.Parser().parse(schemaFile)
}
示例12: EtpCompiler
//设置package包名称以及导入依赖的类
import java.io.File
import org.apache.avro.Schema
import org.apache.avro.compiler.specific.SpecificCompiler
import org.apache.avro.generic.GenericData.StringType
import scala.collection.JavaConverters._
object EtpCompiler {
def main(args: Array[String]): Unit = {
val inputFiles = getRecursiveListOfFiles(new File(args(0)))
val onlyFiles = inputFiles.filter(!_.isDirectory)
val sortedFiles = avrohugger.filesorter.AvscFileSorter.sortSchemaFiles(onlyFiles)
val parser = new Schema.Parser
sortedFiles.foreach((i: File) => {
val schema = parser.parse(i)
val compiler = new SpecificCompiler(schema)
compiler.setStringType(StringType.String)
compiler.setEnableDecimalLogicalType(true)
compiler.compileToDestination(i, new File(args(1)))
})
println("done.")
}
def getRecursiveListOfFiles(dir: File): Array[File] = {
val these = dir.listFiles
these ++ these.filter(_.isDirectory).flatMap(getRecursiveListOfFiles)
}
}
示例13: AvroFileWriter
//设置package包名称以及导入依赖的类
package com.landoop.avro
import java.io.{BufferedOutputStream, File, FileOutputStream}
import com.landoop.avro.codec.CodecFactory
import org.apache.avro.Schema
import org.apache.avro.file.DataFileWriter
import org.apache.avro.generic.GenericRecord
object AvroFileWriter {
def fastWrite(file: File,
count: Int,
parallelization: Int,
schema: Schema,
records: IndexedSeq[GenericRecord]) = {
val out = new BufferedOutputStream(new FileOutputStream(file), 4 * 1048576)
import org.apache.avro.generic.GenericDatumWriter
val datumWriter = new GenericDatumWriter[GenericRecord](schema)
val builder = FastDataFileWriterBuilder(datumWriter, out, schema)
.withCodec(CodecFactory.snappyCodec())
.withFlushOnEveryBlock(false)
.withParallelization(parallelization)
builder.encoderFactory.configureBufferSize(4 * 1048576)
builder.encoderFactory.configureBlockSize(4 * 1048576)
val fileWriter = builder.build()
fileWriter.write(records)
fileWriter.close()
}
def write(file: File,
count: Int,
schema: Schema,
records: Seq[GenericRecord]) = {
val out = new BufferedOutputStream(new FileOutputStream(file), 4 * 1048576)
import org.apache.avro.generic.GenericDatumWriter
val datumWriter = new GenericDatumWriter[GenericRecord](schema)
val writer = new DataFileWriter(datumWriter)
.setCodec(org.apache.avro.file.CodecFactory.snappyCodec())
.create(schema, out)
writer.setFlushOnEveryBlock(false)
records.foreach(writer.append)
writer.close()
}
}
示例14: get
//设置package包名称以及导入依赖的类
package com.datamountaineer.avro.kcql
import com.datamountaineer.avro.kcql.AvroKcql._
import com.datamountaineer.avro.kcql.AvroSchemaKcql._
import org.apache.avro.Schema
import org.apache.avro.generic.IndexedRecord
import org.apache.avro.util.Utf8
trait AvroFieldValueGetter {
def get(value: Any, schema: Schema, path: Seq[String]): Option[Any] = {
path.headOption.map { parent =>
schema.getType match {
case Schema.Type.RECORD => if (Option(value).isEmpty) None else fromRecord(value, schema, path)
case Schema.Type.MAP => if (Option(value).isEmpty) None else fromMap(value, schema, path)
case Schema.Type.UNION => get(value, schema.fromUnion(), path)
case _ => throw new IllegalArgumentException(s"Can't select $parent field from schema:$schema")
}
}.getOrElse {
schema.getType match {
case Schema.Type.BOOLEAN | Schema.Type.NULL |
Schema.Type.DOUBLE | Schema.Type.FLOAT |
Schema.Type.LONG | Schema.Type.INT |
Schema.Type.ENUM | Schema.Type.BYTES |
Schema.Type.FIXED => Option(value)
case Schema.Type.STRING => Option(new Utf8(value.toString).asInstanceOf[Any]) //yes UTF8
case Schema.Type.UNION => get(value, schema.fromUnion(), path)
case Schema.Type.ARRAY | Schema.Type.MAP | Schema.Type.RECORD =>
throw new IllegalArgumentException(s"Can't select an element from an array(schema:$schema)")
case other => throw new IllegalArgumentException(s"Invalid Avro schema type:$other")
}
}
}
private def fromRecord(value: Any, schema: Schema, path: Seq[String]) = {
val field = Option(schema.getField(path.head))
.getOrElse(throw new IllegalArgumentException(s"Can't find field:${path.head} in schema:$schema"))
val v = value.asInstanceOf[IndexedRecord].get(path.head)
get(v, field.schema(), path.tail)
}
private def fromMap(value: Any, schema: Schema, path: Seq[String]) = {
val field = Option(schema.getField(path.head))
.getOrElse(throw new IllegalArgumentException(s"Can't find field:${path.head} in schema:$schema"))
val v = value.asInstanceOf[IndexedRecord].get(path.head)
get(v, field.schema(), path.tail)
}
}
示例15: props
//设置package包名称以及导入依赖的类
package com.kakao.cuesheet.convert
import java.util.Arrays.copyOfRange
import kafka.serializer.Decoder
import kafka.utils.VerifiableProperties
import org.apache.avro.Schema
import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
sealed trait AvroDecoder[T] extends Decoder[T] {
def props: VerifiableProperties
protected val schema = new Schema.Parser().parse(props.getString(Avro.SCHEMA))
protected val skipBytes = props.getInt(Avro.SKIP_BYTES, 0)
protected val reader = new GenericDatumReader[GenericRecord](schema)
protected val decoder = Avro.recordDecoder(reader)
private def skip(bytes: Array[Byte], size: Int): Array[Byte] = {
val length = bytes.length
length - size match {
case remaining if remaining > 0 => copyOfRange(bytes, size, length)
case _ => new Array[Byte](0)
}
}
def parse(bytes: Array[Byte]): GenericRecord = {
val data = if (skipBytes == 0) bytes else skip(bytes, skipBytes)
decoder(data)
}
}
class AvroRecordDecoder(val props: VerifiableProperties) extends AvroDecoder[GenericRecord] {
override def fromBytes(bytes: Array[Byte]): GenericRecord = parse(bytes)
}
class AvroMapDecoder(val props: VerifiableProperties) extends AvroDecoder[Map[String, Any]] {
override def fromBytes(bytes: Array[Byte]): Map[String, Any] = Avro.toMap(parse(bytes))
}
class AvroJsonDecoder(val props: VerifiableProperties) extends AvroDecoder[String] {
override def fromBytes(bytes: Array[Byte]): String = Avro.toJson(parse(bytes))
}