本文整理汇总了Scala中org.apache.hadoop.mapred.JobConf类的典型用法代码示例。如果您正苦于以下问题:Scala JobConf类的具体用法?Scala JobConf怎么用?Scala JobConf使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了JobConf类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: BaseOutputFormat
//设置package包名称以及导入依赖的类
package kr.acon.lib.io
import java.io.DataOutputStream
import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.FileOutputFormat
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.RecordWriter
import org.apache.hadoop.util.Progressable
import org.apache.hadoop.util.ReflectionUtils
import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet
abstract class BaseOutputFormat extends FileOutputFormat[Long, LongOpenHashBigSet] {
@inline def getRecordWriter(out: DataOutputStream): RecordWriter[Long, LongOpenHashBigSet]
@inline override def getRecordWriter(ignored: FileSystem,
job: JobConf,
name: String,
progress: Progressable) = {
val isCompressed = FileOutputFormat.getCompressOutput(job)
if (!isCompressed) {
val file = FileOutputFormat.getTaskOutputPath(job, name)
val fs = file.getFileSystem(job)
val fileOut = fs.create(file, progress)
getRecordWriter(fileOut)
} else {
val codecClass = FileOutputFormat.getOutputCompressorClass(job, classOf[GzipCodec])
val codec = ReflectionUtils.newInstance(codecClass, job)
val file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension())
val fs = file.getFileSystem(job)
val fileOut = fs.create(file, progress)
val fileOutWithCodec = new DataOutputStream(codec.createOutputStream(fileOut))
getRecordWriter(fileOutWithCodec)
}
}
}
示例2: saveParquet
//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet
import com.newegg.eims.DataPorter.Base._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.metadata.CompressionCodecName
def saveParquet(path: String, hadoopConf: JobConf = new JobConf(),
compressionCodecName: CompressionCodecName = CompressionCodecName.SNAPPY): Path = {
hadoopConf.set(ParquetOutputFormat.COMPRESSION, compressionCodecName.name())
val rows = set.toDataRowSet.toRowIterator
val schema = rows.getSchema
val writer = ParquetFileFormat.prepareWrite(schema, new Path(path), hadoopConf)
try {
while (rows.hasNext) {
val row = rows.next()
writer.write(row)
}
} finally {
writer.close()
}
new Path(path)
}
}
}
示例3: ParquetOutputWriter
//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet
import com.newegg.eims.DataPorter.Base.{DataSetSchema, IDataRow}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.{JobConf, TaskAttemptContextImpl}
import org.apache.hadoop.mapreduce.TaskAttemptContext
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.api.WriteSupport
class ParquetOutputWriter(dataSetSchema: DataSetSchema, path: Path, conf: JobConf) {
class IDataRowParquetOutputFormat(support: ParquetWriteSupport, filePath: Path) extends ParquetOutputFormat[IDataRow]() {
override def getWriteSupport(configuration: Configuration): WriteSupport[IDataRow] = {
support
}
override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
filePath
}
}
private val context = new TaskAttemptContextImpl(conf, new org.apache.hadoop.mapred.TaskAttemptID())
private val formatter = {
val support = new ParquetOutputFormat[IDataRow]().getWriteSupport(conf).asInstanceOf[ParquetWriteSupport]
support.setSchema(dataSetSchema)
new IDataRowParquetOutputFormat(support, path)
}
private val recordWriter = formatter.getRecordWriter(context)
def write(row: IDataRow): Unit = recordWriter.write(null, row)
def close(): Unit = recordWriter.close(context)
}
示例4: ParquetInputReader
//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet
import com.newegg.eims.DataPorter.Base.DataSetSchema
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import parquet.hadoop.{ParquetFileReader, ParquetReader}
class ParquetInputReader(path: Path, conf: JobConf) extends Iterable[ParquetDataRow] {
ParquetFileFormat.prepare(conf)
private val footer = ParquetFileReader.readFooter(conf, path)
private val schema = new ParquetSchemaConverter(conf).convert(footer.getFileMetaData.getSchema)
def getSchema: DataSetSchema = schema
override def iterator: Iterator[ParquetDataRow] = new Iterator[ParquetDataRow] {
private val support = new ParquetReadSupport
support.setSchema(schema)
private val reader = ParquetReader.builder(support, path).withConf(conf).build()
private var current: ParquetDataRow = _
nextRow()
private def nextRow() = {
current = reader.read()
if (!hasNext) reader.close()
}
override def hasNext: Boolean = current != null
override def next(): ParquetDataRow = {
val res = current
if (hasNext) {
nextRow()
}
res
}
}
}
示例5: GetSearchCount
//设置package包名称以及导入依赖的类
package mad_nectarine.spark
import java.util.Properties
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{MapWritable, Text}
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.{Logging, SparkConf}
import org.elasticsearch.hadoop.mr.EsInputFormat
object GetSearchCount extends Logging {
def main(args: Array[String]) {
//validate args
if (args.length < 1) {
throw new IllegalArgumentException("search word is required")
}
//create spark conf
val sparkConf = new SparkConf()
sparkConf.setAppName("mad_nectarine.GetTweetsSearchCount")
val context = new JavaSparkContext(sparkConf)
try {
//load config
System.out.println("executing... [load config]")
val fs = FileSystem.get(context.hadoopConfiguration());
val propertiesStream = fs.open(new Path("hdfs:///tmp/spark.to-words.properties"))
val properties = new Properties()
properties.load(propertiesStream)
//create es conf
System.out.println("executing... [create es conf]")
val esConf = new JobConf()
esConf.set("es.nodes", properties.getProperty("logic.search-count.nodes"))
esConf.set("es.resource", properties.getProperty("logic.search-count.resource"))
var query = properties.getProperty("logic.search-count.query").replace("@@search_word", args(0))
query = query.replace("\\r\\n","")
query = query.replace("\\n","")
query = query.replace("\\r","")
System.out.println(s"query is ${query}")
esConf.set("es.query", query)
//load data from elasticsearch
System.out.println("executing... [load data from elasticsearch]")
val esRDD = context.hadoopRDD(esConf,
classOf[EsInputFormat[Text, MapWritable]],
classOf[Text],
classOf[MapWritable]
)
System.out.println("Count of records founds is " + esRDD.count())
} finally{
context.stop()
}
}
}