当前位置: 首页>>代码示例>>Scala>>正文


Scala DeveloperApi类代码示例

本文整理汇总了Scala中org.apache.spark.annotation.DeveloperApi的典型用法代码示例。如果您正苦于以下问题:Scala DeveloperApi类的具体用法?Scala DeveloperApi怎么用?Scala DeveloperApi使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了DeveloperApi类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: StudyRDD

//设置package包名称以及导入依赖的类
package com.study.spark.datasource

import org.apache.spark.{Partition, TaskContext}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Row, SQLContext}


class StudyRDD(sqlContext: SQLContext, schema: StructType) extends RDD[Row](sqlContext.sparkContext, deps=Nil) {
  @DeveloperApi
  override def compute(split: Partition, context: TaskContext): Iterator[Row] = new StudyReader(context, schema, split)

  // ??? ?? ????? 2?? ???? ??? ????.
  // ? Executor? ???? ??? ????. ???? ???? 2? ??? ???, ??? ??? ? ?? Executor? ?? 2???.
  override protected def getPartitions: Array[Partition] = {
    val arr: Array[Partition] = new Array[Partition](2)
    arr.update(0, new Partition() {
      override def index: Int = 0
    })
    arr.update(1, new Partition() {
      override def index: Int = 1
    })
    arr
  }
} 
开发者ID:hackpupu,项目名称:LML,代码行数:27,代码来源:StudyRDD.scala

示例2: buildAnalyzer

//设置package包名称以及导入依赖的类
package com.sparklingpandas.sparklingml.feature

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types._

import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute


@DeveloperApi
trait LuceneTransformer[T <:LuceneTransformer[T]]
    extends UnaryTransformer[String, Array[String], T] {

  // Implement this function to construct an analyzer based on the provided settings.
  def buildAnalyzer(): Analyzer

  override def outputDataType: DataType = ArrayType(StringType)

  override def validateInputType(inputType: DataType): Unit = {
    require(inputType.isInstanceOf[StringType],
      s"The input column must be StringType, but got $inputType.")
  }

  override def createTransformFunc: String => Array[String] = {
    val analyzer = buildAnalyzer()
      (inputText: String) => {
      val inputStream = analyzer.tokenStream($(inputCol), inputText)
      val builder = Array.newBuilder[String]
      val charTermAttr = inputStream.addAttribute(classOf[CharTermAttribute])
      inputStream.reset()
      while (inputStream.incrementToken) builder += charTermAttr.toString
      inputStream.end()
      inputStream.close()
      builder.result()
    }
  }
} 
开发者ID:sparklingpandas,项目名称:sparklingml,代码行数:40,代码来源:LuceneAnalyzer.scala

示例3: GDBRDD

//设置package包名称以及导入依赖的类
package com.esri.gdb

import org.apache.hadoop.conf.Configuration
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.Row
import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}


case class GDBRDD(@transient sc: SparkContext, gdbPath: String, gdbName: String, numPartitions: Int) extends RDD[Row](sc, Nil) with Logging {

  @DeveloperApi
  override def compute(partition: Partition, context: TaskContext): Iterator[Row] = {
    val part = partition.asInstanceOf[GDBPartition]
    val hadoopConf = if (sc == null) new Configuration() else sc.hadoopConfiguration
    val index = GDBIndex(gdbPath, part.hexName, hadoopConf)
    val table = GDBTable(gdbPath, part.hexName, hadoopConf)
    context.addTaskCompletionListener(context => {
      table.close()
      index.close()
    })
    table.rowIterator(index, part.startAtRow, part.numRowsToRead)
  }

  override protected def getPartitions: Array[Partition] = {
    val hadoopConf = if (sc == null) new Configuration() else sc.hadoopConfiguration
    GDBTable.findTable(gdbPath, gdbName, hadoopConf) match {
      case Some(catTab) => {
        val index = GDBIndex(gdbPath, catTab.hexName, hadoopConf)
        try {
          val numRows = index.numRows
          val numRowsPerPartition = (numRows.toDouble / numPartitions).ceil.toInt
          var startAtRow = 0
          (0 until numPartitions).map(i => {
            val endAtRow = startAtRow + numRowsPerPartition
            val numRowsToRead = if (endAtRow <= numRows) numRowsPerPartition else numRows - startAtRow
            val gdbPartition = GDBPartition(i, catTab.hexName, startAtRow, numRowsToRead)
            startAtRow += numRowsToRead
            gdbPartition
          }).toArray
        } finally {
          index.close()
        }
      }
      case _ => {
        log.error(s"Cannot find '$gdbName' in $gdbPath, creating an empty array of Partitions !")
        Array.empty[Partition]
      }
    }
  }
}

private[this] case class GDBPartition(m_index: Int,
                                      val hexName: String,
                                      val startAtRow: Int,
                                      val numRowsToRead: Int
                                     ) extends Partition {
  override def index = m_index
} 
开发者ID:mraad,项目名称:spark-gdb,代码行数:60,代码来源:GDBRDD.scala

示例4: UnencryptedRDD

//设置package包名称以及导入依赖的类
package org.apache.datacommons.protectr.rdds

import com.n1analytics.paillier.{PaillierContext, PaillierPublicKey}
import org.apache.datacommons.protectr.encryptors.EncryptionKeyPair
import org.apache.datacommons.protectr.types.{CSV, FileType}
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.{Partition, TaskContext}


class UnencryptedRDD(parent: RDD[String],fileType: FileType = CSV)
  extends RDD[String](parent) {

  def encryptHomomorphically(keyPair: EncryptionKeyPair, columnIndex: Int)
  : HomomorphicallyEncryptedRDD = {
    val publicKey: PaillierPublicKey = keyPair.getPublicKey
    val signedContext: PaillierContext = publicKey.createSignedContext
    val encryptedRDD = this.map(row => {
      val values: Array[String] = fileType.parseRecord(row)
      val numericValue: String = values(columnIndex)
      values(columnIndex) = signedContext.encrypt(numericValue.toDouble).toString
      fileType.join(values)
    })
    new HomomorphicallyEncryptedRDD(encryptedRDD, keyPair, fileType)
  }

  @DeveloperApi
  override def compute(split: Partition, context: TaskContext): Iterator[String] = {
    parent.compute(split, context)
  }

  override protected def getPartitions: Array[Partition] = parent.partitions
} 
开发者ID:data-commons,项目名称:protectr,代码行数:34,代码来源:UnencryptedRDD.scala

示例5: HomomorphicallyEncryptedRDD

//设置package包名称以及导入依赖的类
package org.apache.datacommons.protectr.rdds

import java.math.BigInteger

import com.n1analytics.paillier.{PaillierPrivateKey, EncryptedNumber}
import org.apache.datacommons.protectr.encryptors.EncryptionKeyPair
import org.apache.datacommons.protectr.types.FileType
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.rdd.RDD
import org.apache.spark.{Partition, TaskContext}

class HomomorphicallyEncryptedRDD
(RDD: RDD[String], keyPair: EncryptionKeyPair, fileType: FileType) extends RDD[String](RDD) {

  def sum(columnIndex: Int): BigInteger = {
    val finalRecord = this.reduce((firstRow, secondRow) => {
      val firstRecord: Array[String] = fileType.parseRecord(firstRow)
      val secondRecord: Array[String] = fileType.parseRecord(secondRow)
      val firstNumber: EncryptedNumber = EncryptedNumber.create(
        firstRecord(columnIndex), keyPair.getPrivateKey)
      val secondNumber: EncryptedNumber = EncryptedNumber.create(
        secondRecord(columnIndex), keyPair.getPrivateKey)
      firstRecord(columnIndex) = firstNumber.add(secondNumber).toString
      fileType.join(firstRecord)
    })
    val sum: String = fileType.parseRecord(finalRecord)(columnIndex)
    val result: EncryptedNumber = EncryptedNumber.create(sum, keyPair.getPrivateKey)
    result.decrypt(keyPair.getPrivateKey).decodeApproximateBigInteger
  }

  def decrypt(columnIndex: Int): UnencryptedRDD = {
    val privateKey: PaillierPrivateKey = keyPair.getPrivateKey
    val javaRDD = this.map(row =>{
        val values: Array[String] = fileType.parseRecord(row)
        val encryptedNumber: EncryptedNumber = EncryptedNumber.create(
          values(columnIndex), keyPair.getPrivateKey)
        val bigInteger: BigInteger = privateKey.decrypt(encryptedNumber).decodeApproximateBigInteger
        values(columnIndex) = bigInteger.toString
        fileType.join(values)
      })
    new UnencryptedRDD(javaRDD,fileType)
  }

  override protected def getPartitions = RDD.partitions

  @DeveloperApi
  override def compute(split: Partition, context: TaskContext): Iterator[String] = {
    RDD.compute(split, context)
  }
} 
开发者ID:data-commons,项目名称:protectr,代码行数:51,代码来源:HomomorphicallyEncryptedRDD.scala

示例6: IndexReadRDD

//设置package包名称以及导入依赖的类
package top.myetl.lucenerdd.rdd

import java.util.concurrent.atomic.AtomicBoolean

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.lucene.search.IndexSearcher
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark._
import org.apache.spark.rdd.RDD
import org.apache.spark.storage.StorageLevel
import top.myetl.lucenerdd.convert.DocToBean
import top.myetl.lucenerdd.query.MyQuery
import top.myetl.lucenerdd.query.MyQuery.must
import top.myetl.lucenerdd.util.FsUtils

import scala.reflect.ClassTag


class IndexReadRDD( _sc: SparkContext,
                    tableName: String,
                    deps: Seq[Dependency[_]] = Nil)
  extends RDD[IndexSearcher](_sc, deps){

  @DeveloperApi
  override def compute(split: Partition, context: TaskContext): Iterator[IndexSearcher] = {
    val p =  split.asInstanceOf[IndexRDDPartition]

    new Iterator[IndexSearcher]{
      val isHasNext = new AtomicBoolean(true)
      override def hasNext: Boolean = {
        val is = isHasNext.get()
        isHasNext.set(false)
        is
      }
      override def next(): IndexSearcher = p.indexSearcher
    }
  }

  override protected def getPartitions: Array[Partition] = {
    val baseDir = FsUtils.getHdfsBaseDir(sparkContext.getConf)
    val tableDir = FsUtils.dirName(baseDir, tableName)
    val tablePath = new Path(tableDir)

//    val configuration = SparkHadoopUtil.get.conf
    val configuration = new Configuration()
    val fs: FileSystem = FsUtils.get(tablePath, configuration)
    val paths = FsUtils.listLuceneDir(fs, tablePath)
    FsUtils.close(fs)

    paths.indices.map(i =>
      new IndexRDDPartition(i, FsUtils.dirName(tableDir, paths(i)))
    ).toArray
  }

  override def persist(newLevel: StorageLevel): IndexReadRDD.this.type = {
    super.persist(StorageLevel.MEMORY_ONLY)
  }

} 
开发者ID:myetl,项目名称:sparkLu,代码行数:61,代码来源:LuceneRDD.scala


注:本文中的org.apache.spark.annotation.DeveloperApi类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。