当前位置: 首页>>代码示例>>Scala>>正文


Scala LongWritable类代码示例

本文整理汇总了Scala中org.apache.hadoop.io.LongWritable的典型用法代码示例。如果您正苦于以下问题:Scala LongWritable类的具体用法?Scala LongWritable怎么用?Scala LongWritable使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LongWritable类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: FPMiningPreprocessingApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.FileSplit
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.HadoopRDD
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions

import com.google.common.io.Files

object FPMiningPreprocessingApp {

  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: FPMiningPreprocessingApp <appname> <inputpath> <outputpath>")
      System.exit(1)
    }
    val Seq(appName, iPath, oPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val delim = " "

    val sc = new SparkContext(conf)
    sc.hadoopFile(iPath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], sc.defaultMinPartitions)
      .asInstanceOf[HadoopRDD[LongWritable, Text]]
      .mapPartitionsWithInputSplit((iSplit, iter) =>
        iter.map(splitAndLine => (Files.getNameWithoutExtension(iSplit.asInstanceOf[FileSplit].getPath.toString), splitAndLine._2.toString.split(" ")(1))))
      .filter(r => r._2 != "0")
      .map(r => (r._1, r._2))
      .distinct()
      .groupByKey()
      .map(r => r._2.mkString(" "))
      .sample(false, 0.7)
      .coalesce(1)
      .saveAsTextFile(oPath)
  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:45,代码来源:L9-13FPMiningPreprocessing.scala

示例2: RedditVariationApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditVariationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditVariationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val merged = comments.union(comments)

    val repartitionedComments = comments.repartition(4)

    val rddMin = comments.glom().map(arr =>
      arr.minBy(rec => ((parse(rec) \ "created_utc").values.toString.toInt)))

    ssc.start()
    ssc.awaitTermination()

  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:50,代码来源:L3-DStreamVariation.scala

示例3: FunctionalSyntaxOWLExpressionsDataSetBuilder

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.flink.dataset

import net.sansa_stack.owl.common.parsing.{FunctionalSyntaxExpressionBuilder, FunctionalSyntaxPrefixParsing}
import net.sansa_stack.owl.flink.hadoop.FunctionalSyntaxInputFormat
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.hadoop.io.{LongWritable, Text}


object FunctionalSyntaxOWLExpressionsDataSetBuilder extends FunctionalSyntaxPrefixParsing {
  def build(env: ExecutionEnvironment, filePath: String): OWLExpressionsDataSet = {
    import org.apache.flink.api.scala._
    val hadoopDataSet: DataSet[(LongWritable, Text)] =
      env.readHadoopFile[LongWritable, Text](
        new FunctionalSyntaxInputFormat,
        classOf[LongWritable],
        classOf[Text],
        filePath
      )
    val rawDataSet = hadoopDataSet.map(_._2.toString)

    val tmp: Seq[(String, String)] = rawDataSet.filter(isPrefixDeclaration(_)).map(parsePrefix(_)).collect()
    val prefixes: Map[String, String] = tmp.toMap

    val builder = new FunctionalSyntaxExpressionBuilder(prefixes)

    rawDataSet.map(builder.clean(_)).filter(_ != null)
  }

} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:30,代码来源:FunctionalSyntaxOWLExpressionsDataSetBuilder.scala

示例4: ManchesterSyntaxOWLExpressionsDataSetBuilder

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.flink.dataset

import net.sansa_stack.owl.common.parsing.{ManchesterSyntaxExpressionBuilder, ManchesterSyntaxPrefixParsing}
import net.sansa_stack.owl.flink.hadoop.ManchesterSyntaxInputFormat
import org.apache.flink.api.scala.ExecutionEnvironment
import org.apache.hadoop.io.{LongWritable, Text}


object ManchesterSyntaxOWLExpressionsDataSetBuilder extends  ManchesterSyntaxPrefixParsing {
  def build(env: ExecutionEnvironment, filePath: String): OWLExpressionsDataSet = {
    buildAndGetPrefixes(env, filePath)._1
  }

  private[dataset] def buildAndGetPrefixes(env: ExecutionEnvironment,
      filePath: String): (OWLExpressionsDataSet, Map[String, String]) = {

    import org.apache.flink.api.scala._
    val hadoopDataSet: DataSet[(LongWritable, Text)] =
      env.readHadoopFile[LongWritable, Text](
        new ManchesterSyntaxInputFormat,
        classOf[LongWritable],
        classOf[Text],
        filePath
      )
    val rawDataSet = hadoopDataSet.map(_._2.toString)

    val tmp: Seq[(String, String)] = rawDataSet.filter(isPrefixDeclaration(_)).map(parsePrefix(_)).collect()
    val prefixes: Map[String, String] = tmp.toMap

    val builder = new ManchesterSyntaxExpressionBuilder(prefixes)

    (rawDataSet.map(builder.clean(_)).filter(_ != null), prefixes)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:35,代码来源:ManchesterSyntaxOWLExpressionsDataSetBuilder.scala

示例5: ManchesterSyntaxOWLExpressionsRDDBuilder

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.rdd

import net.sansa_stack.owl.common.parsing.{ManchesterSyntaxExpressionBuilder, ManchesterSyntaxPrefixParsing}
import net.sansa_stack.owl.spark.hadoop.ManchesterSyntaxInputFormat
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.spark.SparkContext


object ManchesterSyntaxOWLExpressionsRDDBuilder extends ManchesterSyntaxPrefixParsing {
  def build(sc: SparkContext, filePath: String): OWLExpressionsRDD = {
    buildAndGetPrefixes(sc, filePath)._1
  }

  private[spark] def buildAndGetPrefixes(sc: SparkContext, filePath: String): (OWLExpressionsRDD, Map[String, String]) = {
    val rawRDD = sc.hadoopFile(
      filePath,
      classOf[ManchesterSyntaxInputFormat],
      classOf[LongWritable],
      classOf[Text],
      sc.defaultMinPartitions).map(_._2.toString)

    val tmp: Array[(String, String)] =
      rawRDD.filter(isPrefixDeclaration(_)).map(parsePrefix).collect()
    val prefixes: Map[String, String] = tmp.toMap

    val builder = new ManchesterSyntaxExpressionBuilder(prefixes)
    (rawRDD.map(builder.clean(_)).filter(_ != null), prefixes)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:30,代码来源:ManchesterSyntaxOWLExpressionsRDDBuilder.scala

示例6: FunctionalSyntaxOWLExpressionsRDDBuilder

//设置package包名称以及导入依赖的类
package net.sansa_stack.owl.spark.rdd

import net.sansa_stack.owl.common.parsing.{FunctionalSyntaxExpressionBuilder, FunctionalSyntaxPrefixParsing}
import net.sansa_stack.owl.spark.hadoop.FunctionalSyntaxInputFormat
import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.spark.SparkContext


object FunctionalSyntaxOWLExpressionsRDDBuilder extends Serializable with FunctionalSyntaxPrefixParsing {
  def build(sc: SparkContext, filePath: String): OWLExpressionsRDD = {
    val hadoopRDD = sc.hadoopFile(
      filePath, classOf[FunctionalSyntaxInputFormat], classOf[LongWritable],
      classOf[Text], sc.defaultMinPartitions)

    val rawRDD = hadoopRDD.map(entry => entry._2.toString)

    val tmp: Array[(String, String)] =
          rawRDD.filter(isPrefixDeclaration(_)).map(parsePrefix).collect()
    val prefixes: Map[String, String] = tmp.toMap

    val builder = new FunctionalSyntaxExpressionBuilder(prefixes)

    rawRDD.map(builder.clean(_)).filter(_ != null)
  }
} 
开发者ID:SANSA-Stack,项目名称:SANSA-OWL,代码行数:26,代码来源:FunctionalSyntaxOWLExpressionsRDDBuilder.scala

示例7: ADAMContextExtensions

//设置package包名称以及导入依赖的类
package org.bdgenomics.adam.rdd

import org.apache.hadoop.io.{LongWritable, Text}
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.SparkContext
import org.apache.spark.api.java.StorageLevels
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.converters.FastaConverter
import org.bdgenomics.adam.rdd.contig.NucleotideContigFragmentRDD
import org.bdgenomics.utils.instrumentation.Metrics
import org.apache.spark.rdd.MetricsContext._
import org.bdgenomics.adam.rdd.feature.FeatureRDD
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.formats.avro.Feature


object ADAMContextExtensions {

  implicit class spExt(val sparkContext: SparkContext) extends HDFSFilesExtensions{

    def loadFastaPersistent(
                   filePath: String,
                   fragmentLength: Long = 10000L): NucleotideContigFragmentRDD = {
      val fastaData: RDD[(LongWritable, Text)] = sparkContext.newAPIHadoopFile(
        filePath,
        classOf[TextInputFormat],
        classOf[LongWritable],
        classOf[Text]
      )
      if (Metrics.isRecording) fastaData.instrument() else fastaData

      val remapData = fastaData.map(kv => (kv._1.get, kv._2.toString))

      // convert rdd and cache
      val fragmentRdd = FastaConverter(remapData, fragmentLength)
        .persist(StorageLevels.MEMORY_AND_DISK)

      NucleotideContigFragmentRDD(fragmentRdd)
    }

    def mergeFeatures(features: List[FeatureRDD]): Option[FeatureRDD] = features match {
      case Nil => None
      case head :: Nil => Some(head)
      case head :: tail =>
        val merged = tail.foldLeft(head){
          case (acc, feature) =>
            val joined = acc.broadcastRegionJoin(feature)
            acc.transform(_ => joined.rdd.map{
              case (one, two) =>
                one.setStart(Math.min(one.getStart, two.getStart))
                one.setEnd(Math.max(one.getEnd, two.getEnd))
                one
            })
        }
        Some(merged)
    }

  }


} 
开发者ID:antonkulaga,项目名称:adam-playground,代码行数:62,代码来源:ADAMContextExtensions.scala

示例8: VoyagerApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions

object VoyagerApp {
  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: VoyagerApp <appname> <inputPath> <outputPath>")
      System.exit(1)
    }
    val Seq(appName, inputPath, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
      .set("spark.executor.extraJavaOptions", "-XX:+UseConcMarkSweepGC")

    val ssc = new StreamingContext(conf, Seconds(10))

    val voyager1 = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)
    voyager1.map(rec => {
      val attrs = rec.split("\\s+")
      ((attrs(0).toInt), attrs.slice(18, 28).map(_.toDouble))
    }).filter(pflux => pflux._2.exists(_ > 1.0)).map(rec => (rec._1, 1))
      .reduceByKey(_ + _)
      .transform(rec => rec.sortByKey(ascending = false, numPartitions = 1)).saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:42,代码来源:L4-1Voyager.scala

示例9: VoyagerAppKryo

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD.rddToOrderedRDDFunctions
import org.apache.spark.streaming.Seconds
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions

object VoyagerAppKryo {
  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: VoyagerAppKryo <appname> <inputPath> <outputPath>")
      System.exit(1)
    }
    val Seq(appName, inputPath, outputPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)
      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
      .registerKryoClasses(Array(classOf[ProtonFlux]))

    val ssc = new StreamingContext(conf, Seconds(10))

    val voyager1 = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)
    val projected = voyager1.map(rec => {
      val attrs = rec.split("\\s+")
      new ProtonFlux(attrs(0), attrs(18), attrs(19), attrs(20), attrs(21),
        attrs(22), attrs(23), attrs(24), attrs(25), attrs(26), attrs(27),
        attrs(28))
    })
    val filtered = projected.filter(pflux => pflux.isSolarStorm)
    val yearlyBreakdown = filtered.map(rec => (rec.year, 1))
      .reduceByKey(_ + _)
      .transform(rec => rec.sortByKey(ascending = false))
    yearlyBreakdown.saveAsTextFiles(outputPath)

    ssc.start()
    ssc.awaitTermination()
  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:48,代码来源:L4-4Kryo.scala

示例10: CollabFilteringPreprocessingApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text
import org.apache.hadoop.mapred.FileSplit
import org.apache.hadoop.mapred.TextInputFormat
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.rdd.HadoopRDD
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions

import com.google.common.io.Files

object CollabFilteringPreprocessingApp {

  def main(args: Array[String]) {
    if (args.length != 3) {
      System.err.println(
        "Usage: CollabFilteringPreprocessingApp <appname> <inputpath> <outputpath>")
      System.exit(1)
    }
    val Seq(appName, iPath, oPath) = args.toSeq

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val delim = " "

    val sc = new SparkContext(conf)
    sc.hadoopFile(iPath, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], sc.defaultMinPartitions)
      .asInstanceOf[HadoopRDD[LongWritable, Text]]
      .mapPartitionsWithInputSplit((iSplit, iter) =>
        iter.map(splitAndLine => (Files.getNameWithoutExtension(iSplit.asInstanceOf[FileSplit].getPath.toString), splitAndLine._2.toString.split(" ")(1))))
      .filter(r => r._2 != "0")
      .map(r => ((r._1, r._2), 1))
      .reduceByKey(_ + _)
      .map(r => r._1._1.replace("subject", "") + delim + r._1._2 + delim + r._2)
      .sample(false, 0.7)
      .coalesce(1)
      .saveAsTextFile(oPath)
  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:44,代码来源:L9-11CollabFilteringPreprocessing.scala

示例11: RedditAggregationApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditAggregationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditAggregationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val recCount = comments.count()

    val recCountValue = comments.countByValue()

    val totalWords = comments.map(rec => ((parse(rec) \ "body").values.toString))
      .flatMap(body => body.split(" "))
      .map(word => 1)
      .reduce(_ + _)

    ssc.start()
    ssc.awaitTermination()

  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:52,代码来源:L3-DStreamAggregation.scala

示例12: FrequencyMapper

//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr

import org.apache.hadoop.io.{ IntWritable, LongWritable, Text }
import org.apache.hadoop.mapreduce.Mapper


class FrequencyMapper extends Mapper[LongWritable, Text, Text, IntWritable] {
  type Context = Mapper[LongWritable, Text, Text, IntWritable]#Context

  override def map(offset: LongWritable, lineText: Text, context: Context): Unit = {
    val line = lineText.toString
    val eventID: String = line.split(",")(1)
    context.write(new Text(eventID), FrequencyMapper.ONE)
  }
}

object FrequencyMapper {
  def instance = new FrequencyMapper().getClass

  lazy val ONE = new IntWritable(1)
} 
开发者ID:yuikns,项目名称:iphigenia,代码行数:22,代码来源:FrequencyMapper.scala


注:本文中的org.apache.hadoop.io.LongWritable类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。