本文整理汇总了Scala中org.apache.hadoop.io.IntWritable类的典型用法代码示例。如果您正苦于以下问题:Scala IntWritable类的具体用法?Scala IntWritable怎么用?Scala IntWritable使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IntWritable类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: FrequencyMapReducer
//设置package包名称以及导入依赖的类
package com.argcv.cse8803.mapreducebasic
import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{IntWritable, Text}
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
object FrequencyMapReducer extends Awakable {
def main(args: Array[String]): Unit = {
// create a hadoop job and set main class
val job = Job.getInstance()
job.setJarByClass(FrequencyMapReducer.getClass)
job.setJobName("Frequency")
// set the input & output path
FileInputFormat.addInputPath(job, new Path(args.head))
FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))
// set mapper & reducer
job.setMapperClass(FrequencyMapper.instance)
job.setReducerClass(FrequencyReducer.instance)
// specify the type of the output
job.setOutputKeyClass(new Text().getClass)
job.setOutputValueClass(new IntWritable().getClass)
// run
logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
}
}
示例2:
//设置package包名称以及导入依赖的类
package epam.idobrovolskiy.wikipedia
import epam.idobrovolskiy.wikipedia.trending.time.PlainDatesExtractor
import epam.idobrovolskiy.wikipedia.trending.tokenizer.StopWordsTokenizer
import org.apache.hadoop.io.{IntWritable, Text}
package object trending extends scala.AnyRef {
val AppName = "wikipedia-trending"
val DefaultTokenizer = new StopWordsTokenizer
val TopTokenCount = 10
val DefaultInputWikiDumpFilename = "wiki_small"
val DefaultPrepHeaderFilename = "wiki_prep_headers"
val DefaultPrepFullFilename = "wiki_prep_full"
val DefaultDateCitationsFileName = "wiki_date_citations"
val DefaultDateIndexFileName = "wiki_index_dates"
val DefaultDocIndexFileName = "wiki_index_docs"
val DefaultTarget = preprocessing.PreprocessingTarget.Stdout
val DefaultPathForPlainTextExtraction = "./data/out"
val DefaultWikipediaDumpFilesPath = "./data/in"
val DefaultPlainTextExtractor = preprocessing.attardi.AttardiPlainTextExtractor
val HdfsNameNodeHost = "hdfs://sandbox.hortonworks.com:8020"
val HdfsRootPath = "/user/idobrovolskiy/wikipedia-trending/"
val PreprocessedFileHeaderBodyDelimiter = "\n\n"
type PreprocessedSequenceFileKeyType = IntWritable
type PreprocessedSequenceFileValueType = Text
val DefaultDatesExtractor = new PlainDatesExtractor
lazy val spark = common.SparkUtils.sparkSession
}
示例3: FrequencyMapReducer
//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr
import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{ IntWritable, Text }
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
object FrequencyMapReducer extends Awakable {
def main(args: Array[String]): Unit = {
// create a hadoop job and set main class
val job = Job.getInstance()
job.setJarByClass(FrequencyMapReducer.getClass)
job.setJobName("Frequency")
// set the input & output path
FileInputFormat.addInputPath(job, new Path(args.head))
FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))
// set mapper & reducer
job.setMapperClass(FrequencyMapper.instance)
job.setReducerClass(FrequencyReducer.instance)
// specify the type of the output
job.setOutputKeyClass(new Text().getClass)
job.setOutputValueClass(new IntWritable().getClass)
// run
logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
}
}
示例4: FrequencyMapper
//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr
import org.apache.hadoop.io.{ IntWritable, LongWritable, Text }
import org.apache.hadoop.mapreduce.Mapper
class FrequencyMapper extends Mapper[LongWritable, Text, Text, IntWritable] {
type Context = Mapper[LongWritable, Text, Text, IntWritable]#Context
override def map(offset: LongWritable, lineText: Text, context: Context): Unit = {
val line = lineText.toString
val eventID: String = line.split(",")(1)
context.write(new Text(eventID), FrequencyMapper.ONE)
}
}
object FrequencyMapper {
def instance = new FrequencyMapper().getClass
lazy val ONE = new IntWritable(1)
}
示例5: SequenceSource
//设置package包名称以及导入依赖的类
package io.eels.component.sequence
import java.util.concurrent.atomic.AtomicBoolean
import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import io.eels._
import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription}
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}
case class SequenceSource(path: Path)(implicit conf: Configuration) extends Source with Logging {
logger.debug(s"Creating sequence source from $path")
override def schema: StructType = SequenceSupport.schema(path)
override def parts(): Seq[Publisher[Seq[Row]]] = List(new SequencePublisher(path))
}
object SequenceReaderIterator {
def apply(schema: StructType, reader: SequenceFile.Reader): Iterator[Row] = new Iterator[Row] {
private val k = new IntWritable()
private val v = new BytesWritable()
// throw away the header
reader.next(k, v)
override def next(): Row = Row(schema, SequenceSupport.toValues(v).toVector)
override def hasNext(): Boolean = reader.next(k, v)
}
}
class SequencePublisher(val path: Path)(implicit conf: Configuration) extends Publisher[Seq[Row]] with Logging with Using {
override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {
try {
using(SequenceSupport.createReader(path)) { reader =>
val schema = SequenceSupport.schema(path)
val running = new AtomicBoolean(true)
subscriber.subscribed(Subscription.fromRunning(running))
SequenceReaderIterator(schema, reader)
.takeWhile(_ => running.get)
.grouped(DataStream.DefaultBatchSize)
.foreach(subscriber.next)
subscriber.completed()
}
} catch {
case t: Throwable => subscriber.error(t)
}
}
}
示例6: SequenceSupport
//设置package包名称以及导入依赖的类
package io.eels.component.sequence
import java.io.StringReader
import java.nio.charset.Charset
import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import io.eels.component.csv.{CsvFormat, CsvSupport}
import io.eels.schema.{Field, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}
object SequenceSupport extends Logging with Using {
def createReader(path: Path)(implicit conf: Configuration): SequenceFile.Reader =
new SequenceFile.Reader(conf, SequenceFile.Reader.file(path))
def toValues(v: BytesWritable): Array[String] = toValues(new String(v.copyBytes(), Charset.forName("UTF8")))
def toValues(str: String): Array[String] = {
val parser = CsvSupport.createParser(CsvFormat(), false, false, false, null, null)
parser.beginParsing(new StringReader(str))
val record = parser.parseNext()
parser.stopParsing()
record
}
def schema(path: Path)(implicit conf: Configuration): StructType = {
logger.debug(s"Fetching sequence schema for $path")
using(createReader(path)) { it =>
val k = new IntWritable()
val v = new BytesWritable()
val fields: Array[Field] = {
it.next(k, v)
toValues(v).map { it => new Field(it) }
}
StructType(fields.toList)
}
}
}
示例7: SequenceSinkTest
//设置package包名称以及导入依赖的类
package io.eels.component.sequence
import io.eels.datastream.DataStream
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}
import org.scalatest.{Matchers, WordSpec}
class SequenceSinkTest extends WordSpec with Matchers {
private val ds = DataStream.fromValues(
StructType("a", "b", "c", "d"),
Seq(
List("1", "2", "3", "4"),
List("5", "6", "7", "8")
)
)
"SequenceSink" should {
"write sequence files" in {
implicit val conf = new Configuration
implicit val fs = FileSystem.get(conf)
val path = new Path("seqsink.seq")
if (fs.exists(path))
fs.delete(path, true)
ds.to(SequenceSink(path))
val reader = new SequenceFile.Reader(new Configuration, SequenceFile.Reader.file(path))
val k = new IntWritable
val v = new BytesWritable
val set = for (_ <- 1 to 3) yield {
reader.next(k, v)
new String(v.copyBytes)
}
set.toSet shouldBe Set(
"a,b,c,d",
"1,2,3,4",
"5,6,7,8"
)
reader.close()
fs.delete(path, true)
}
}
}
示例8: LoadFileSequence
//设置package包名称以及导入依赖的类
package com.git.huanghaifeng.spark.load
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.hadoop.io.{ IntWritable, Text }
object LoadFileSequence {
def main(args: Array[String]) {
val master = args(0)
val file_path = args(1)
val sc = new SparkContext(master, "BasicSequenceFile", System.getenv("SPARK_HOME"))
val out_data = sc.parallelize(List(("Holden", 3), ("Kay", 6), ("Snail", 2)))
out_data.saveAsSequenceFile(file_path)
val in_data = sc.sequenceFile(file_path, classOf[Text], classOf[IntWritable]).map{
case (x, y) =>
(x.toString, y.get())
}
println(in_data.collect().toList)
}
}
示例9: SequenceSink
//设置package包名称以及导入依赖的类
package io.eels.component.sequence
import java.io.StringWriter
import com.univocity.parsers.csv.{CsvWriter, CsvWriterSettings}
import io.eels.{Row, Sink, SinkWriter}
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}
case class SequenceSink(path: Path)(implicit conf: Configuration) extends Sink {
override def open(schema: StructType): SinkWriter = new SequenceSinkWriter(schema, path)
class SequenceSinkWriter(schema: StructType, path: Path) extends SinkWriter {
val writer = SequenceFile.createWriter(conf,
SequenceFile.Writer.file(path),
SequenceFile.Writer.keyClass(classOf[IntWritable]),
SequenceFile.Writer.valueClass(classOf[BytesWritable])
)
val key = new IntWritable(0)
val headers = valuesToCsv(schema.fieldNames())
writer.append(key, new BytesWritable(headers.getBytes))
override def close(): Unit = writer.close()
override def write(row: Row): Unit = {
this.synchronized {
val csv = valuesToCsv(row.values)
writer.append(key, new BytesWritable(csv.getBytes()))
key.set(key.get() + 1)
}
}
private def valuesToCsv(values: Seq[Any]): String = {
val swriter = new StringWriter()
val csv = new CsvWriter(swriter, new CsvWriterSettings())
csv.writeRow(values.map {
case null => null
case other => other.toString
}: _*)
csv.close()
swriter.toString().trim()
}
}
}