当前位置: 首页>>代码示例>>Scala>>正文


Scala IntWritable类代码示例

本文整理汇总了Scala中org.apache.hadoop.io.IntWritable的典型用法代码示例。如果您正苦于以下问题:Scala IntWritable类的具体用法?Scala IntWritable怎么用?Scala IntWritable使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IntWritable类的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: FrequencyMapReducer

//设置package包名称以及导入依赖的类
package com.argcv.cse8803.mapreducebasic

import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{IntWritable, Text}
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat


object FrequencyMapReducer extends Awakable {

  def main(args: Array[String]): Unit = {
    // create a hadoop job and set main class
    val job = Job.getInstance()
    job.setJarByClass(FrequencyMapReducer.getClass)
    job.setJobName("Frequency")

    // set the input & output path
    FileInputFormat.addInputPath(job, new Path(args.head))
    FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))

    // set mapper & reducer
    job.setMapperClass(FrequencyMapper.instance)
    job.setReducerClass(FrequencyReducer.instance)

    // specify the type of the output
    job.setOutputKeyClass(new Text().getClass)
    job.setOutputValueClass(new IntWritable().getClass)

    // run
    logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
  }

} 
开发者ID:yuikns,项目名称:cse8803,代码行数:37,代码来源:FrequencyMapReducer.scala

示例2:

//设置package包名称以及导入依赖的类
package epam.idobrovolskiy.wikipedia

import epam.idobrovolskiy.wikipedia.trending.time.PlainDatesExtractor
import epam.idobrovolskiy.wikipedia.trending.tokenizer.StopWordsTokenizer
import org.apache.hadoop.io.{IntWritable, Text}

package object trending extends scala.AnyRef {
  val AppName = "wikipedia-trending"

  val DefaultTokenizer = new StopWordsTokenizer
  val TopTokenCount = 10

  val DefaultInputWikiDumpFilename = "wiki_small"
  val DefaultPrepHeaderFilename = "wiki_prep_headers"
  val DefaultPrepFullFilename = "wiki_prep_full"
  val DefaultDateCitationsFileName = "wiki_date_citations"
  val DefaultDateIndexFileName = "wiki_index_dates"
  val DefaultDocIndexFileName = "wiki_index_docs"

  val DefaultTarget = preprocessing.PreprocessingTarget.Stdout
  val DefaultPathForPlainTextExtraction = "./data/out"
  val DefaultWikipediaDumpFilesPath = "./data/in"
  val DefaultPlainTextExtractor = preprocessing.attardi.AttardiPlainTextExtractor

  val HdfsNameNodeHost = "hdfs://sandbox.hortonworks.com:8020"
  val HdfsRootPath = "/user/idobrovolskiy/wikipedia-trending/"

  val PreprocessedFileHeaderBodyDelimiter = "\n\n"
  type PreprocessedSequenceFileKeyType = IntWritable
  type PreprocessedSequenceFileValueType = Text

  val DefaultDatesExtractor = new PlainDatesExtractor

  lazy val spark = common.SparkUtils.sparkSession
} 
开发者ID:igor-dobrovolskiy-epam,项目名称:wikipedia-analysis-scala-core,代码行数:36,代码来源:package.scala

示例3: FrequencyMapReducer

//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr

import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{ IntWritable, Text }
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat


object FrequencyMapReducer extends Awakable {

  def main(args: Array[String]): Unit = {
    // create a hadoop job and set main class
    val job = Job.getInstance()
    job.setJarByClass(FrequencyMapReducer.getClass)
    job.setJobName("Frequency")

    // set the input & output path
    FileInputFormat.addInputPath(job, new Path(args.head))
    FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))

    // set mapper & reducer
    job.setMapperClass(FrequencyMapper.instance)
    job.setReducerClass(FrequencyReducer.instance)

    // specify the type of the output
    job.setOutputKeyClass(new Text().getClass)
    job.setOutputValueClass(new IntWritable().getClass)

    // run
    logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
  }

} 
开发者ID:yuikns,项目名称:iphigenia,代码行数:37,代码来源:FrequencyMapReducer.scala

示例4: FrequencyMapper

//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr

import org.apache.hadoop.io.{ IntWritable, LongWritable, Text }
import org.apache.hadoop.mapreduce.Mapper


class FrequencyMapper extends Mapper[LongWritable, Text, Text, IntWritable] {
  type Context = Mapper[LongWritable, Text, Text, IntWritable]#Context

  override def map(offset: LongWritable, lineText: Text, context: Context): Unit = {
    val line = lineText.toString
    val eventID: String = line.split(",")(1)
    context.write(new Text(eventID), FrequencyMapper.ONE)
  }
}

object FrequencyMapper {
  def instance = new FrequencyMapper().getClass

  lazy val ONE = new IntWritable(1)
} 
开发者ID:yuikns,项目名称:iphigenia,代码行数:22,代码来源:FrequencyMapper.scala

示例5: SequenceSource

//设置package包名称以及导入依赖的类
package io.eels.component.sequence

import java.util.concurrent.atomic.AtomicBoolean

import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import io.eels._
import io.eels.datastream.{DataStream, Publisher, Subscriber, Subscription}
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}

case class SequenceSource(path: Path)(implicit conf: Configuration) extends Source with Logging {
  logger.debug(s"Creating sequence source from $path")

  override def schema: StructType = SequenceSupport.schema(path)
  override def parts(): Seq[Publisher[Seq[Row]]] = List(new SequencePublisher(path))
}

object SequenceReaderIterator {
  def apply(schema: StructType, reader: SequenceFile.Reader): Iterator[Row] = new Iterator[Row] {
    private val k = new IntWritable()
    private val v = new BytesWritable()
    // throw away the header
    reader.next(k, v)
    override def next(): Row = Row(schema, SequenceSupport.toValues(v).toVector)
    override def hasNext(): Boolean = reader.next(k, v)
  }
}

class SequencePublisher(val path: Path)(implicit conf: Configuration) extends Publisher[Seq[Row]] with Logging with Using {

  override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {
    try {
      using(SequenceSupport.createReader(path)) { reader =>
        val schema = SequenceSupport.schema(path)
        val running = new AtomicBoolean(true)
        subscriber.subscribed(Subscription.fromRunning(running))
        SequenceReaderIterator(schema, reader)
          .takeWhile(_ => running.get)
          .grouped(DataStream.DefaultBatchSize)
          .foreach(subscriber.next)

        subscriber.completed()
      }
    } catch {
      case t: Throwable => subscriber.error(t)
    }
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:52,代码来源:SequenceSource.scala

示例6: SequenceSupport

//设置package包名称以及导入依赖的类
package io.eels.component.sequence

import java.io.StringReader
import java.nio.charset.Charset

import com.sksamuel.exts.Logging
import com.sksamuel.exts.io.Using
import io.eels.component.csv.{CsvFormat, CsvSupport}
import io.eels.schema.{Field, StructType}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}

object SequenceSupport extends Logging with Using {

  def createReader(path: Path)(implicit conf: Configuration): SequenceFile.Reader =
    new SequenceFile.Reader(conf, SequenceFile.Reader.file(path))

  def toValues(v: BytesWritable): Array[String] = toValues(new String(v.copyBytes(), Charset.forName("UTF8")))

  def toValues(str: String): Array[String] = {
    val parser = CsvSupport.createParser(CsvFormat(), false, false, false, null, null)
    parser.beginParsing(new StringReader(str))
    val record = parser.parseNext()
    parser.stopParsing()
    record
  }

  def schema(path: Path)(implicit conf: Configuration): StructType = {
    logger.debug(s"Fetching sequence schema for $path")
    using(createReader(path)) { it =>
      val k = new IntWritable()
      val v = new BytesWritable()
      val fields: Array[Field] = {
        it.next(k, v)
        toValues(v).map { it => new Field(it) }
      }
      StructType(fields.toList)
    }
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:42,代码来源:SequenceSupport.scala

示例7: SequenceSinkTest

//设置package包名称以及导入依赖的类
package io.eels.component.sequence

import io.eels.datastream.DataStream
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}
import org.scalatest.{Matchers, WordSpec}

class SequenceSinkTest extends WordSpec with Matchers {

  private val ds = DataStream.fromValues(
    StructType("a", "b", "c", "d"),
    Seq(
      List("1", "2", "3", "4"),
      List("5", "6", "7", "8")
    )
  )

  "SequenceSink" should {
    "write sequence files" in {

      implicit val conf = new Configuration
      implicit val fs = FileSystem.get(conf)

      val path = new Path("seqsink.seq")
      if (fs.exists(path))
        fs.delete(path, true)

      ds.to(SequenceSink(path))

      val reader = new SequenceFile.Reader(new Configuration, SequenceFile.Reader.file(path))

      val k = new IntWritable
      val v = new BytesWritable

      val set = for (_ <- 1 to 3) yield {
        reader.next(k, v)
        new String(v.copyBytes)
      }

      set.toSet shouldBe Set(
        "a,b,c,d",
        "1,2,3,4",
        "5,6,7,8"
      )

      reader.close()

      fs.delete(path, true)
    }
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:54,代码来源:SequenceSinkTest.scala

示例8: LoadFileSequence

//设置package包名称以及导入依赖的类
package com.git.huanghaifeng.spark.load

import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.hadoop.io.{ IntWritable, Text }

object LoadFileSequence {
    def main(args: Array[String]) {
        val master = args(0)
        val file_path = args(1)

        val sc = new SparkContext(master, "BasicSequenceFile", System.getenv("SPARK_HOME"))
        val out_data = sc.parallelize(List(("Holden", 3), ("Kay", 6), ("Snail", 2)))
        out_data.saveAsSequenceFile(file_path)

        val in_data = sc.sequenceFile(file_path, classOf[Text], classOf[IntWritable]).map{
            case (x, y) =>
                (x.toString, y.get())
        }
        println(in_data.collect().toList)
    }
} 
开发者ID:prucehuang,项目名称:quickly-start-spark,代码行数:23,代码来源:LoadFileSequence.scala

示例9: SequenceSink

//设置package包名称以及导入依赖的类
package io.eels.component.sequence

import java.io.StringWriter

import com.univocity.parsers.csv.{CsvWriter, CsvWriterSettings}
import io.eels.{Row, Sink, SinkWriter}
import io.eels.schema.StructType
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{BytesWritable, IntWritable, SequenceFile}

case class SequenceSink(path: Path)(implicit conf: Configuration) extends Sink {

  override def open(schema: StructType): SinkWriter = new SequenceSinkWriter(schema, path)

  class SequenceSinkWriter(schema: StructType, path: Path) extends SinkWriter {

    val writer = SequenceFile.createWriter(conf,
        SequenceFile.Writer.file(path),
      SequenceFile.Writer.keyClass(classOf[IntWritable]),
      SequenceFile.Writer.valueClass(classOf[BytesWritable])
    )

    val key = new IntWritable(0)

    val headers = valuesToCsv(schema.fieldNames())
    writer.append(key, new BytesWritable(headers.getBytes))

    override def close(): Unit = writer.close()

    override def write(row: Row): Unit = {
      this.synchronized {
        val csv = valuesToCsv(row.values)
        writer.append(key, new BytesWritable(csv.getBytes()))
        key.set(key.get() + 1)
      }
    }

    private def valuesToCsv(values: Seq[Any]): String = {
      val swriter = new StringWriter()
      val csv = new CsvWriter(swriter, new CsvWriterSettings())
      csv.writeRow(values.map {
        case null => null
        case other => other.toString
      }: _*)
      csv.close()
      swriter.toString().trim()
    }
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:51,代码来源:SequenceSink.scala


注:本文中的org.apache.hadoop.io.IntWritable类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。