本文整理汇总了Scala中org.apache.hadoop.io.NullWritable类的典型用法代码示例。如果您正苦于以下问题:Scala NullWritable类的具体用法?Scala NullWritable怎么用?Scala NullWritable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了NullWritable类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: Utils
//设置package包名称以及导入依赖的类
package com.larry.da.jobs.idmap
import org.apache.commons.io.output.ByteArrayOutputStream
import org.apache.hadoop.io.{NullWritable, BytesWritable}
import org.apache.spark.rdd.RDD
import org.apache.spark.serializer.KryoSerializer
import scala.reflect.ClassTag
object Utils {
val keys="0123456789ABCDEFHIJKMNPRSTUVWXYZ"
val keyDic = Map( keys zip (0 until 32 ) toSeq :_* )
def compressAguid(uid:Long)={
var n = uid
val res = 0 until 13 map(i=>{ val index = n & 31; n = n >>> 5; keys(index.toInt)})
res.mkString("").reverse + "u"
}
def unCompressAguid(uid:String)={
val res = uid.take(13).map(s=>keyDic(s))
var n = res.head.toLong
res.tail.foreach(p=> {
n = (n << 5) | p
})
n
}
def saveAsObjectFile[T: ClassTag](rdd: RDD[T], path: String) {
val kryoSerializer = new KryoSerializer(rdd.context.getConf)
rdd.mapPartitions(iter => iter.grouped(10)
.map(_.toArray))
.map(splitArray => {
//initializes kyro and calls your registrator class
val kryo = kryoSerializer.newKryo()
//convert data to bytes
val bao = new ByteArrayOutputStream()
val output = kryoSerializer.newKryoOutput()
output.setOutputStream(bao)
kryo.writeClassAndObject(output, splitArray)
output.close()
// We are ignoring key field of sequence file
val byteWritable = new BytesWritable(bao.toByteArray)
(NullWritable.get(), byteWritable)
}).saveAsSequenceFile(path)
}
}
示例2: App
//设置package包名称以及导入依赖的类
package com.dataoptimo.imgprocessing
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.fs.FSDataInputStream
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.BytesWritable
import org.apache.hadoop.io.SequenceFile
import org.apache.hadoop.io.IOUtils
import com.dataoptimo.imgprocessing.convert.Converter
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import com.dataoptimo.imgprocessing.format.WholeFileInputFormat
import org.opencv.core.Mat
object App {
def main(args: Array[String]){
val confHadoop = new Configuration()
confHadoop.addResource(new Path("/users/fawadalam/Documents/cloudera_vm/conf/core-site.xml"))
confHadoop.addResource(new Path("/users/fawadalam/Documents/cloudera_vm/conf/hdfs-site.xml"))
val conf = new SparkConf()
conf.setMaster("local")
conf.setAppName("ll")
val sc = new SparkContext(conf)
//val rdd = sc.newAPIHadoopFile[NullWritable,BytesWritable,WholeFileInputFormat]("/user/fawadalam/images/img_99697.jpg", classOf[WholeFileInputFormat], classOf[NullWritable], classOf[BytesWritable], confHadoop)
val rdd = sc.newAPIHadoopFile("/user/fawadalam/images/img_99697.jpg", classOf[WholeFileInputFormat], classOf[NullWritable], classOf[BytesWritable], confHadoop)
val mat = rdd.map(x=>OpenCVOps.imageToMat(x._2.getBytes))
rdd.map(x=>OpenCVOps.imageToMat(x._2.getBytes)).map(x=>x.mat.cols).foreach(println)
val x = mat.collect()
println(x(0).mat.cols)
}
}
示例3: TimelyImplicits
//设置package包名称以及导入依赖的类
package io.gzet.timeseries.timely
import io.gzet.utils.spark.accumulo.AccumuloConfig
import org.apache.accumulo.core.client.ClientConfiguration
import org.apache.accumulo.core.client.mapreduce.{AbstractInputFormat, InputFormatBase}
import org.apache.accumulo.core.client.security.tokens.PasswordToken
import org.apache.accumulo.core.data.Range
import org.apache.accumulo.core.security.Authorizations
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import scala.collection.JavaConversions._
object TimelyImplicits {
implicit class AccumuloReader(sc: SparkContext) {
def timely(accumuloConfig: AccumuloConfig, rowPrefix: Option[String] = None): RDD[Metric] = {
val conf = sc.hadoopConfiguration
val job = Job.getInstance(conf)
val clientConfig: ClientConfiguration = new ClientConfiguration()
.withInstance(accumuloConfig.accumuloInstance)
.withZkHosts(accumuloConfig.zookeeperHosts)
val authorizations = new Authorizations(List("INTERNAL", "CONFIDENTIAL", "SECRET").map(_.getBytes()))
AbstractInputFormat.setConnectorInfo(job, accumuloConfig.accumuloUser, new PasswordToken(accumuloConfig.accumuloPassword))
AbstractInputFormat.setZooKeeperInstance(job, clientConfig)
AbstractInputFormat.setScanAuthorizations(job, authorizations)
InputFormatBase.setInputTableName(job, "timely.metrics")
if(rowPrefix.isDefined) {
val ranges = List(Range.prefix(rowPrefix.get))
InputFormatBase.setRanges(job, ranges)
}
val rdd = sc.newAPIHadoopRDD(job.getConfiguration,
classOf[AccumuloTimelyInputFormat],
classOf[NullWritable],
classOf[TimelyWritable]
) values
rdd map {
timely =>
val Array(tagK, tagV) = timely.getMetricType.split("=", 2)
Metric(
timely.getMetric,
timely.getTime,
timely.getMetricValue,
Map(tagK -> tagV)
)
}
}
}
}
示例4: SuccinctAnnotationOutputFormat
//设置package包名称以及导入依赖的类
package edu.berkeley.cs.succinct.annot.serde
import java.io.File
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext}
class SuccinctAnnotationOutputFormat
extends FileOutputFormat[NullWritable, (Int, Iterator[(String, String, String)])] {
override def getRecordWriter(job: TaskAttemptContext):
RecordWriter[NullWritable, (Int, Iterator[(String, String, String)])] = {
val conf = job.getConfiguration
val ignoreParseErrors = conf.get("succinct.annotations.ignoreParseErrors", "true").toBoolean
val serializeInMemory = conf.get("succinct.annotations.serializeInMemory", "true").toBoolean
val dirs = conf.get("spark.local.dir", System.getProperty("java.io.tmpdir")).split(",")
println("ignoreParseErrors = " + ignoreParseErrors + " serializeInMemory = " + serializeInMemory
+ "Spark local dir = " + dirs(0) + " persistInMemory = false")
val path = FileOutputFormat.getOutputPath(job)
new SuccinctAnnotationRecordWriter(path, ignoreParseErrors, conf, (serializeInMemory, new File(dirs(0))))
}
}
示例5: NullAndTextWritableConverterTest
//设置package包名称以及导入依赖的类
package com.groupon.dse.mezzanine.converter
import com.groupon.dse.mezzanine.util.SparkContextSetup
import org.apache.hadoop.io.NullWritable
import org.scalatest.FlatSpec
class NullAndTextWritableConverterTest extends FlatSpec with SparkContextSetup with WritableConverterTestSetup {
it should "convert a RDD[TopicAndEvent] to a RDD[NullWritable, Text]" in {
val converter = new NullAndTextWritableConverter
val convertedData = sc.parallelize(topicAndEventData).map(topicAndEvent => {
converter.convert(topicAndEvent)
}).map({
// Need to `map` here since the Hadoop `Writable` isn't serializable
case (nullWritable, eventText) => (nullWritable.toString, eventText.toString)
}).collect()
assert(convertedData.length === topicAndEventData.length)
convertedData.zip(topicAndEventData).foreach({
case ((nullWritable, eventText), topicAndEvent) => {
assert(nullWritable === NullWritable.get().toString)
assert(eventText === topicAndEvent.event)
}
})
}
}