本文整理汇总了Scala中org.apache.spark.sql.catalyst.util.ArrayData类的典型用法代码示例。如果您正苦于以下问题:Scala ArrayData类的具体用法?Scala ArrayData怎么用?Scala ArrayData使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了ArrayData类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: TextualUtil
//设置package包名称以及导入依赖的类
package edu.utah.cs.simba.util
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, BindReferences, Expression}
import org.apache.spark.sql.catalyst.util.ArrayData
import scala.collection.mutable
object TextualUtil {
def simFilter(leftText: ArrayData, rightText:ArrayData, sim: Double): Boolean = {
val l = leftText.numElements()
val r = rightText.numElements()
if(sim * l > r || sim * r > l ) return false
var sameText = 0
val data = mutable.Set[String]()
var i = 0
while (i < l) {
data.add(leftText.getUTF8String(i).toString)
i += 1
}
var j = 0
while (j < r){
val tmp_str = rightText.getUTF8String(j).toString
if(data.contains(tmp_str)) sameText += 1
else data.add(tmp_str)
j += 1
}
if(sameText/1.0/data.size >= sim) return true
false
}
def getText(expression: Expression, schema: Seq[Attribute], input: InternalRow): ArrayData = {
BindReferences.bindReference(expression, schema).eval(input).asInstanceOf[ArrayData]
}
}
示例2: Of
//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.udts
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@SQLUserDefinedType(udt = classOf[EmbeddedSetType])
case class EmbeddedSet(elements: Array[Any]) extends Serializable {
override def hashCode(): Int = {
var hashCode = 1
val i = elements.iterator
while (i.hasNext) {
val obj = i.next()
val elemValue = if (obj == null) 0 else obj.hashCode()
hashCode = 31 * hashCode + elemValue
}
hashCode
}
override def equals(other: scala.Any): Boolean = other match {
case that: EmbeddedSet => that.elements.sameElements(this.elements)
case _ => false
}
override def toString: String = elements.mkString(", ")
}
class EmbeddedSetType extends UserDefinedType[EmbeddedSet] {
override def sqlType: DataType = ArrayType(StringType)
override def serialize(obj: EmbeddedSet): Any = {
new GenericArrayData(obj.elements.map{elem =>
val out = new ByteArrayOutputStream()
val os = new ObjectOutputStream(out)
os.writeObject(elem)
UTF8String.fromBytes(out.toByteArray)
})
}
override def deserialize(datum: Any): EmbeddedSet = {
datum match {
case values: ArrayData =>
new EmbeddedSet(values.toArray[UTF8String](StringType).map{ elem =>
val in = new ByteArrayInputStream(elem.getBytes)
val is = new ObjectInputStream(in)
is.readObject()
})
case other => sys.error(s"Cannot deserialize $other")
}
}
override def userClass: Class[EmbeddedSet] = classOf[EmbeddedSet]
}
object EmbeddedSetType extends EmbeddedSetType
示例3: Of
//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.udts
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import com.orientechnologies.orient.core.record.ORecord
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@SQLUserDefinedType(udt = classOf[LinkSetType])
case class LinkSet(elements: Array[_ <: ORecord]) extends Serializable {
override def hashCode(): Int = {
var hashCode = 1
val i = elements.iterator
while (i.hasNext) {
val obj = i.next()
val elemValue = if (obj == null) 0 else obj.hashCode()
hashCode = 31 * hashCode + elemValue
}
hashCode
}
override def equals(other: scala.Any): Boolean = other match {
case that: LinkSet => that.elements.sameElements(this.elements)
case _ => false
}
override def toString: String = elements.mkString(", ")
}
class LinkSetType extends UserDefinedType[LinkSet] {
override def sqlType: DataType = ArrayType(StringType)
override def serialize(obj: LinkSet): Any = {
new GenericArrayData(obj.elements.map{elem =>
val out = new ByteArrayOutputStream()
val os = new ObjectOutputStream(out)
os.writeObject(elem)
UTF8String.fromBytes(out.toByteArray)
})
}
override def deserialize(datum: Any): LinkSet = {
datum match {
case values: ArrayData =>
new LinkSet(values.toArray[UTF8String](StringType).map{ elem =>
val in = new ByteArrayInputStream(elem.getBytes)
val is = new ObjectInputStream(in)
is.readObject().asInstanceOf[ORecord]
})
case other => sys.error(s"Cannot deserialize $other")
}
}
override def userClass: Class[LinkSet] = classOf[LinkSet]
}
object LinkSetType extends LinkSetType
示例4: Of
//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.udts
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@SQLUserDefinedType(udt = classOf[EmbeddedListType])
case class EmbeddedList(elements: Array[Any]) extends Serializable {
override def hashCode(): Int = {
var hashCode = 1
val i = elements.iterator
while (i.hasNext) {
val obj = i.next()
val elemValue = if (obj == null) 0 else obj.hashCode()
hashCode = 31 * hashCode + elemValue
}
hashCode
}
override def equals(other: scala.Any): Boolean = other match {
case that: EmbeddedList => that.elements.sameElements(this.elements)
case _ => false
}
override def toString: String = elements.mkString(", ")
}
class EmbeddedListType extends UserDefinedType[EmbeddedList] {
override def sqlType: DataType = ArrayType(StringType)
override def serialize(obj: EmbeddedList): Any = {
new GenericArrayData(obj.elements.map{elem =>
val out = new ByteArrayOutputStream()
val os = new ObjectOutputStream(out)
os.writeObject(elem)
UTF8String.fromBytes(out.toByteArray)
})
}
override def deserialize(datum: Any): EmbeddedList = {
datum match {
case values: ArrayData =>
new EmbeddedList(values.toArray[UTF8String](StringType).map{ elem =>
val in = new ByteArrayInputStream(elem.getBytes)
val is = new ObjectInputStream(in)
is.readObject()
})
case other => sys.error(s"Cannot deserialize $other")
}
}
override def userClass: Class[EmbeddedList] = classOf[EmbeddedList]
}
object EmbeddedListType extends EmbeddedListType
示例5: Of
//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.udts
import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
import com.orientechnologies.orient.core.record.ORecord
import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.UTF8String
@SQLUserDefinedType(udt = classOf[LinkListType])
case class LinkList(elements: Array[_ <: ORecord]) extends Serializable {
override def hashCode(): Int = {
var hashCode = 1
val i = elements.iterator
while (i.hasNext) {
val obj = i.next()
val elemValue = if (obj == null) 0 else obj.hashCode()
hashCode = 31 * hashCode + elemValue
}
hashCode
}
override def equals(other: scala.Any): Boolean = other match {
case that: LinkList => that.elements.sameElements(this.elements)
case _ => false
}
override def toString: String = elements.mkString(", ")
}
class LinkListType extends UserDefinedType[LinkList] {
override def sqlType: DataType = ArrayType(StringType)
override def serialize(obj: LinkList): Any = {
new GenericArrayData(obj.elements.map{ elem =>
val out = new ByteArrayOutputStream()
val os = new ObjectOutputStream(out)
os.writeObject(elem)
UTF8String.fromBytes(out.toByteArray)
})
}
override def deserialize(datum: Any): LinkList = {
datum match {
case values: ArrayData =>
new LinkList(values.toArray[UTF8String](StringType).map{ elem =>
val in = new ByteArrayInputStream(elem.getBytes)
val is = new ObjectInputStream(in)
is.readObject().asInstanceOf[ORecord]
})
case other => sys.error(s"Cannot deserialize $other")
}
}
override def userClass: Class[LinkList] = classOf[LinkList]
}
object LinkListType extends LinkListType