本文整理汇总了Scala中org.apache.hadoop.mapreduce.Job类的典型用法代码示例。如果您正苦于以下问题:Scala Job类的具体用法?Scala Job怎么用?Scala Job使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Job类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: FrequencyMapReducer
//设置package包名称以及导入依赖的类
package com.argcv.cse8803.mapreducebasic
import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{IntWritable, Text}
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
object FrequencyMapReducer extends Awakable {
def main(args: Array[String]): Unit = {
// create a hadoop job and set main class
val job = Job.getInstance()
job.setJarByClass(FrequencyMapReducer.getClass)
job.setJobName("Frequency")
// set the input & output path
FileInputFormat.addInputPath(job, new Path(args.head))
FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))
// set mapper & reducer
job.setMapperClass(FrequencyMapper.instance)
job.setReducerClass(FrequencyReducer.instance)
// specify the type of the output
job.setOutputKeyClass(new Text().getClass)
job.setOutputValueClass(new IntWritable().getClass)
// run
logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
}
}
示例2: TimelyImplicits
//设置package包名称以及导入依赖的类
package io.gzet.timeseries.timely
import io.gzet.utils.spark.accumulo.AccumuloConfig
import org.apache.accumulo.core.client.ClientConfiguration
import org.apache.accumulo.core.client.mapreduce.{AbstractInputFormat, InputFormatBase}
import org.apache.accumulo.core.client.security.tokens.PasswordToken
import org.apache.accumulo.core.data.Range
import org.apache.accumulo.core.security.Authorizations
import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.SparkContext
import org.apache.spark.rdd.RDD
import scala.collection.JavaConversions._
object TimelyImplicits {
implicit class AccumuloReader(sc: SparkContext) {
def timely(accumuloConfig: AccumuloConfig, rowPrefix: Option[String] = None): RDD[Metric] = {
val conf = sc.hadoopConfiguration
val job = Job.getInstance(conf)
val clientConfig: ClientConfiguration = new ClientConfiguration()
.withInstance(accumuloConfig.accumuloInstance)
.withZkHosts(accumuloConfig.zookeeperHosts)
val authorizations = new Authorizations(List("INTERNAL", "CONFIDENTIAL", "SECRET").map(_.getBytes()))
AbstractInputFormat.setConnectorInfo(job, accumuloConfig.accumuloUser, new PasswordToken(accumuloConfig.accumuloPassword))
AbstractInputFormat.setZooKeeperInstance(job, clientConfig)
AbstractInputFormat.setScanAuthorizations(job, authorizations)
InputFormatBase.setInputTableName(job, "timely.metrics")
if(rowPrefix.isDefined) {
val ranges = List(Range.prefix(rowPrefix.get))
InputFormatBase.setRanges(job, ranges)
}
val rdd = sc.newAPIHadoopRDD(job.getConfiguration,
classOf[AccumuloTimelyInputFormat],
classOf[NullWritable],
classOf[TimelyWritable]
) values
rdd map {
timely =>
val Array(tagK, tagV) = timely.getMetricType.split("=", 2)
Metric(
timely.getMetric,
timely.getTime,
timely.getMetricValue,
Map(tagK -> tagV)
)
}
}
}
}
示例3: FrequencyMapReducer
//设置package包名称以及导入依赖的类
package com.argcv.iphigenia.example.hdfs.mr
import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{ IntWritable, Text }
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
object FrequencyMapReducer extends Awakable {
def main(args: Array[String]): Unit = {
// create a hadoop job and set main class
val job = Job.getInstance()
job.setJarByClass(FrequencyMapReducer.getClass)
job.setJobName("Frequency")
// set the input & output path
FileInputFormat.addInputPath(job, new Path(args.head))
FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))
// set mapper & reducer
job.setMapperClass(FrequencyMapper.instance)
job.setReducerClass(FrequencyReducer.instance)
// specify the type of the output
job.setOutputKeyClass(new Text().getClass)
job.setOutputValueClass(new IntWritable().getClass)
// run
logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
}
}
示例4: DefaultSource
//设置package包名称以及导入依赖的类
package pl.jborkowskijmartin.spark.mf
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.FileStatus
import org.apache.hadoop.mapreduce.Job
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.execution.datasources.{FileFormat, OutputWriterFactory, PartitionedFile}
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.unsafe.types.UTF8String
class DefaultSource extends FileFormat {
override def inferSchema(sparkSession: SparkSession, options: Map[String, String], files: Seq[FileStatus]):
Option[StructType] = {
println(">>>InferSchema")
Some(StructType(
StructField("line", StringType, nullable = true) :: Nil
))
}
override def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataSchema: StructType):
OutputWriterFactory = {
println(">>> prepareWrite")
null
}
override def buildReader(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType,
requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String],
hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
pf => Iterator(InternalRow(UTF8String.fromString("hello")))
}
}