本文整理汇总了Scala中org.apache.spark.api.java.StorageLevels类的典型用法代码示例。如果您正苦于以下问题:Scala StorageLevels类的具体用法?Scala StorageLevels怎么用?Scala StorageLevels使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了StorageLevels类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: ADAMContextExtensions
//设置package包名称以及导入依赖的类
package org.bdgenomics.adam.rdd
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.SparkContext
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.converters.FastaConverter
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD
import org.bdgenomics.utils.instrumentation.Metrics
import org.apache.spark.rdd.MetricsContext._
import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.Feature
object ADAMContextExtensions {
implicit class spExt(val sparkContext: SparkContext) extends HDFSFilesExtensions{
def loadFastaPersistent(
filePath: String,
fragmentLength: Long = 10000L): NucleotideContigFragmentRDD = {
val fastaData: RDD[(LongWritable, Text)] = sparkContext.newAPIHadoopFile(
filePath,
classOf[TextInputFormat],
classOf[LongWritable],
classOf[Text]
)
if (Metrics.isRecording) fastaData.instrument() else fastaData
val remapData = fastaData.map(kv => (kv._1.get, kv._2.toString))
// convert rdd and cache
val fragmentRdd = FastaConverter(remapData, fragmentLength)
.persist(StorageLevels.MEMORY_AND_DISK)
NucleotideContigFragmentRDD(fragmentRdd)
}
def mergeFeatures(features: List[FeatureRDD]): Option[FeatureRDD] = features match {
case Nil => None
case head :: Nil => Some(head)
case head :: tail =>
val merged = tail.foldLeft(head){
case (acc, feature) =>
val joined = acc.broadcastRegionJoin(feature)
acc.transform(_ => joined.rdd.map{
case (one, two) =>
one.setStart(Math.min(one.getStart, two.getStart))
one.setEnd(Math.max(one.getEnd, two.getEnd))
one
})
}
Some(merged)
}
}
}
示例2: armsGuided
//设置package包名称以及导入依赖的类
package comp.bio.aging.crispr
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.{ReferencePosition, ReferenceRegion}
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD
import comp.bio.aging.playground.extensions._
import scala.collection.immutable.{List, Nil}
trait HomologyArms {
def armsGuided(fragmentRDD: NucleotideContigFragmentRDD,
guidedCats: RDD[(String, List[CutDS])],
left: Long, right: Long, avoidSites: Set[String] = Set.empty, allowOverlap: Boolean = true): RDD[KnockIn] = {
arms(fragmentRDD, guidedCats.values.flatMap(f=>f), left, right, avoidSites, allowOverlap)
}
def arms(fragmentRDD: NucleotideContigFragmentRDD,
cuts: RDD[CutDS],
left: Long, right: Long, avoidSites: Set[String] = Set.empty, allowOverlap: Boolean = true): RDD[KnockIn] = {
val positiveCuts: RDD[(ReferenceRegion, CutDS)] = cuts.filter(_.positive(left)).map{
case (cut) => cut.armsRegion(left, right) -> cut
}.persist(StorageLevels.MEMORY_AND_DISK)
val extracted: RDD[(ReferenceRegion, String)] = fragmentRDD.extractRegions(positiveCuts.keys.collect().toList)
.filter{
case (_, str) => !avoidSites.exists( s=> str.contains(s))
}
val joined: RDD[(ReferenceRegion, (CutDS, String))] = positiveCuts.join(extracted) //region,guide, value
joined.map{
case (region, (cut, regionSeq)) => cut.knockin(regionSeq, region, left, right, allowOverlap)
}
}
}
示例3: SparkStreamingOnKafkaReceiver
//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.streaming
import org.apache.spark.SparkConf
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Durations, StreamingContext}
object SparkStreamingOnKafkaReceiver {
def main(args: Array[String]): Unit = {
val conf = new SparkConf().setMaster("local[2]").setAppName("SparkStreamingOnKafkaReceiver")
val sc = new StreamingContext(conf, Durations.seconds(30))
val topicMap = Map[String, Int]("HelloKafka" -> 1) // keytopic, value???????
val lines = KafkaUtils.createStream(sc, "Master:2181,Worker1:2181,Worker2:2181", "MyFirstConsumerGroup", topicMap,
StorageLevels.MEMORY_AND_DISK_SER_2)
val words = lines.flatMap(_._2.split(" ")).map((_, 1))
val wordCounts = words.reduceByKey(_+_)
wordCounts.print()
sc.start()
sc.awaitTermination()
}
}
示例4: SparkStreamPullDataFromFlume
//设置package包名称以及导入依赖的类
package com.jjzhk.sparkexamples.streaming
import org.apache.spark.SparkConf
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.streaming.flume.FlumeUtils
import org.apache.spark.streaming.{Durations, StreamingContext}
object SparkStreamPullDataFromFlume {
def main(args: Array[String]): Unit = {
val conf = new SparkConf()
conf.setMaster("local[4]").setAppName("SparkStreamPullDataFromFlume")
val sc = new StreamingContext(conf, Durations.seconds(30))
val lines = FlumeUtils.createPollingStream(sc, "Master", 9898, StorageLevels.MEMORY_ONLY)
val words = lines.map(e => e.event).flatMap(event => {
val s = new String(event.getBody.array())
s.split(" ")
}).map((_, 1))
val wordCounts = words.reduceByKey(_+_)
wordCounts.print()
sc.start()
sc.awaitTermination()
}
}