本文整理汇总了Scala中org.apache.spark.sql.catalyst.InternalRow类的典型用法代码示例。如果您正苦于以下问题:Scala InternalRow类的具体用法?Scala InternalRow怎么用?Scala InternalRow使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了InternalRow类的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TreeBloom
//设置package包名称以及导入依赖的类
import org.apache.spark.util.sketch.BloomFilter
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.InternalRow
import Main.sc
object TreeBloom {
// Implements bloom filter using treeAggregate instead of aggregate
// See https://issues.apache.org/jira/browse/SPARK-21039
def bloomFilter(singleCol: DataFrame, expectedNumItems:Long, fpp:Double): BloomFilter = {
val zero = BloomFilter.create(expectedNumItems, fpp)
sc.setJobGroup("bloomFilter", "Bloom filter creation")
singleCol.queryExecution.toRdd.treeAggregate(zero)(
(filter: BloomFilter, row: InternalRow) => {
filter.putLong(row.getInt(0))
filter
},
(filter1, filter2) => filter1.mergeInPlace(filter2)
)
}
}
示例2: TextualUtil
//设置package包名称以及导入依赖的类
package edu.utah.cs.simba.util
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, BindReferences, Expression}
import org.apache.spark.sql.catalyst.util.ArrayData
import scala.collection.mutable
object TextualUtil {
def simFilter(leftText: ArrayData, rightText:ArrayData, sim: Double): Boolean = {
val l = leftText.numElements()
val r = rightText.numElements()
if(sim * l > r || sim * r > l ) return false
var sameText = 0
val data = mutable.Set[String]()
var i = 0
while (i < l) {
data.add(leftText.getUTF8String(i).toString)
i += 1
}
var j = 0
while (j < r){
val tmp_str = rightText.getUTF8String(j).toString
if(data.contains(tmp_str)) sameText += 1
else data.add(tmp_str)
j += 1
}
if(sameText/1.0/data.size >= sim) return true
false
}
def getText(expression: Expression, schema: Seq[Attribute], input: InternalRow): ArrayData = {
BindReferences.bindReference(expression, schema).eval(input).asInstanceOf[ArrayData]
}
}
示例3: PointZMUDT
//设置package包名称以及导入依赖的类
package com.esri.udt
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types._
class PointZMUDT extends UserDefinedType[PointZMType] {
override def sqlType: DataType = StructType(Seq(
StructField("x", DoubleType, false),
StructField("y", DoubleType, false),
StructField("z", DoubleType, false),
StructField("m", DoubleType, false)
))
override def serialize(obj: Any): InternalRow = {
obj match {
case PointZMType(x, y, z, m) => {
val row = new GenericMutableRow(4)
row.setDouble(0, x)
row.setDouble(1, y)
row.setDouble(2, z)
row.setDouble(3, m)
row
}
}
}
override def deserialize(datum: Any): PointZMType = {
datum match {
case row: InternalRow => PointZMType(row.getDouble(0), row.getDouble(1), row.getDouble(2), row.getDouble(3))
}
}
override def userClass: Class[PointZMType] = classOf[PointZMType]
override def pyUDT: String = "com.esri.udt.PointZMUDT"
override def typeName: String = "pointZM"
override def equals(o: Any): Boolean = {
o match {
case v: PointZMUDT => true
case _ => false
}
}
// see [SPARK-8647], this achieves the needed constant hash code without constant no.
override def hashCode(): Int = classOf[PointZMUDT].getName.hashCode()
override def asNullable: PointZMUDT = this
}
示例4: PointMUDT
//设置package包名称以及导入依赖的类
package com.esri.udt
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types._
class PointMUDT extends UserDefinedType[PointMType] {
override def sqlType: DataType = StructType(Seq(
StructField("x", DoubleType, false),
StructField("y", DoubleType, false),
StructField("m", DoubleType, false)
))
override def serialize(obj: Any): InternalRow = {
obj match {
case PointMType(x, y, m) => {
val row = new GenericMutableRow(3)
row.setDouble(0, x)
row.setDouble(1, y)
row.setDouble(2, m)
row
}
}
}
override def deserialize(datum: Any): PointMType = {
datum match {
case row: InternalRow => PointMType(row.getDouble(0), row.getDouble(1), row.getDouble(2))
}
}
override def userClass: Class[PointMType] = classOf[PointMType]
override def pyUDT: String = "com.esri.udt.PointMUDT"
override def typeName: String = "pointM"
override def equals(o: Any): Boolean = {
o match {
case v: PointMUDT => true
case _ => false
}
}
// see [SPARK-8647], this achieves the needed constant hash code without constant no.
override def hashCode(): Int = classOf[PointMUDT].getName.hashCode()
override def asNullable: PointMUDT = this
}
示例5: PointZUDT
//设置package包名称以及导入依赖的类
package com.esri.udt
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types._
class PointZUDT extends UserDefinedType[PointZType] {
override def sqlType: DataType = StructType(Seq(
StructField("x", DoubleType, false),
StructField("y", DoubleType, false),
StructField("z", DoubleType, false)
))
override def serialize(obj: Any): InternalRow = {
obj match {
case PointZType(x, y, z) => {
val row = new GenericMutableRow(3)
row.setDouble(0, x)
row.setDouble(1, y)
row.setDouble(2, z)
row
}
}
}
override def deserialize(datum: Any): PointZType = {
datum match {
case row: InternalRow => PointZType(row.getDouble(0), row.getDouble(1), row.getDouble(2))
}
}
override def userClass: Class[PointZType] = classOf[PointZType]
override def pyUDT: String = "com.esri.udt.PointZUDT"
override def typeName: String = "pointZ"
override def equals(o: Any): Boolean = {
o match {
case v: PointZUDT => true
case _ => false
}
}
// see [SPARK-8647], this achieves the needed constant hash code without constant no.
override def hashCode(): Int = classOf[PointZUDT].getName.hashCode()
override def asNullable: PointZUDT = this
}
示例6: PointUDT
//设置package包名称以及导入依赖的类
package com.esri.udt
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.GenericMutableRow
import org.apache.spark.sql.types._
class PointUDT extends UserDefinedType[PointType] {
override def sqlType: DataType = StructType(Seq(
StructField("x", DoubleType, false),
StructField("y", DoubleType, false)
))
override def serialize(obj: Any): InternalRow = {
obj match {
case PointType(x, y) => {
val row = new GenericMutableRow(2)
row.setDouble(0, x)
row.setDouble(1, y)
row
}
}
}
override def deserialize(datum: Any): PointType = {
datum match {
case row: InternalRow => PointType(row.getDouble(0), row.getDouble(1))
}
}
override def userClass: Class[PointType] = classOf[PointType]
override def pyUDT: String = "com.esri.udt.PointUDT"
override def typeName: String = "point"
override def equals(o: Any): Boolean = {
o match {
case v: PointUDT => true
case _ => false
}
}
// see [SPARK-8647], this achieves the needed constant hash code without constant no.
override def hashCode(): Int = classOf[PointUDT].getName.hashCode()
override def asNullable: PointUDT = this
}
示例7: DefaultSource
//设置package包名称以及导入依赖的类
package pl.jborkowskijmartin.spark.mf
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileStatus
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.unsafe.types.UTF8String
class DefaultSource extends FileFormat {
override def inferSchema(sparkSession: SparkSession, options: Map[String, String], files: Seq[FileStatus]):
Option[StructType] = {
println(">>>InferSchema")
Some(StructType(
StructField("line", StringType, nullable = true) :: Nil
))
}
override def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataSchema: StructType):
OutputWriterFactory = {
println(">>> prepareWrite")
null
}
override def buildReader(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType,
requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String],
hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
pf => Iterator(InternalRow(UTF8String.fromString("hello")))
}
}