当前位置: 首页>>代码示例>>Scala>>正文


Scala JobConf类代码示例

本文整理汇总了Scala中org.apache.hadoop.mapred.JobConf的典型用法代码示例。如果您正苦于以下问题:Scala JobConf类的具体用法?Scala JobConf怎么用?Scala JobConf使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了JobConf类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: BaseOutputFormat

//设置package包名称以及导入依赖的类
package kr.acon.lib.io

import java.io.DataOutputStream

import org.apache.hadoop.fs.FileSystem
import org.apache.hadoop.io.compress.GzipCodec
import org.apache.hadoop.mapred.FileOutputFormat
import org.apache.hadoop.mapred.JobConf
import org.apache.hadoop.mapred.RecordWriter
import org.apache.hadoop.util.Progressable
import org.apache.hadoop.util.ReflectionUtils

import it.unimi.dsi.fastutil.longs.LongOpenHashBigSet

abstract class BaseOutputFormat extends FileOutputFormat[Long, LongOpenHashBigSet] {
  @inline def getRecordWriter(out: DataOutputStream): RecordWriter[Long, LongOpenHashBigSet]

  @inline override def getRecordWriter(ignored: FileSystem,
                               job: JobConf,
                               name: String,
                               progress: Progressable) = {
    val isCompressed = FileOutputFormat.getCompressOutput(job)
    if (!isCompressed) {
      val file = FileOutputFormat.getTaskOutputPath(job, name)
      val fs = file.getFileSystem(job)
      val fileOut = fs.create(file, progress)
      getRecordWriter(fileOut)
    } else {
      val codecClass = FileOutputFormat.getOutputCompressorClass(job, classOf[GzipCodec])
      val codec = ReflectionUtils.newInstance(codecClass, job)
      val file = FileOutputFormat.getTaskOutputPath(job, name + codec.getDefaultExtension())
      val fs = file.getFileSystem(job)
      val fileOut = fs.create(file, progress)
      val fileOutWithCodec = new DataOutputStream(codec.createOutputStream(fileOut))
      getRecordWriter(fileOutWithCodec)
    }
  }
} 
开发者ID:chan150,项目名称:TrillionG,代码行数:39,代码来源:BaseOutputFormat.scala

示例2: saveParquet

//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet

import com.newegg.eims.DataPorter.Base._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.metadata.CompressionCodecName


    def saveParquet(path: String, hadoopConf: JobConf = new JobConf(),
                    compressionCodecName: CompressionCodecName = CompressionCodecName.SNAPPY): Path = {
      hadoopConf.set(ParquetOutputFormat.COMPRESSION, compressionCodecName.name())
      val rows = set.toDataRowSet.toRowIterator
      val schema = rows.getSchema
      val writer = ParquetFileFormat.prepareWrite(schema, new Path(path), hadoopConf)
      try {
        while (rows.hasNext) {
          val row = rows.next()
          writer.write(row)
        }
      } finally {
        writer.close()
      }
      new Path(path)
    }
  }

} 
开发者ID:CodeBabyBear,项目名称:DataPorter,代码行数:29,代码来源:Converts.scala

示例3: ParquetOutputWriter

//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet

import com.newegg.eims.DataPorter.Base.{DataSetSchema, IDataRow}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.{JobConf, TaskAttemptContextImpl}
import org.apache.hadoop.mapreduce.TaskAttemptContext
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.api.WriteSupport


class ParquetOutputWriter(dataSetSchema: DataSetSchema, path: Path, conf: JobConf) {

  class IDataRowParquetOutputFormat(support: ParquetWriteSupport, filePath: Path) extends ParquetOutputFormat[IDataRow]() {
    override def getWriteSupport(configuration: Configuration): WriteSupport[IDataRow] = {
      support
    }

    override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = {
      filePath
    }
  }

  private val context = new TaskAttemptContextImpl(conf, new org.apache.hadoop.mapred.TaskAttemptID())
  private val formatter = {
    val support = new ParquetOutputFormat[IDataRow]().getWriteSupport(conf).asInstanceOf[ParquetWriteSupport]
    support.setSchema(dataSetSchema)
    new IDataRowParquetOutputFormat(support, path)
  }

  private val recordWriter = formatter.getRecordWriter(context)

  def write(row: IDataRow): Unit = recordWriter.write(null, row)

  def close(): Unit = recordWriter.close(context)
} 
开发者ID:CodeBabyBear,项目名称:DataPorter,代码行数:37,代码来源:ParquetOutputWriter.scala

示例4: ParquetInputReader

//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet

import com.newegg.eims.DataPorter.Base.DataSetSchema
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import parquet.hadoop.{ParquetFileReader, ParquetReader}


class ParquetInputReader(path: Path, conf: JobConf) extends Iterable[ParquetDataRow] {
  ParquetFileFormat.prepare(conf)
  private val footer = ParquetFileReader.readFooter(conf, path)
  private val schema = new ParquetSchemaConverter(conf).convert(footer.getFileMetaData.getSchema)

  def getSchema: DataSetSchema = schema

  override def iterator: Iterator[ParquetDataRow] = new Iterator[ParquetDataRow] {
    private val support = new ParquetReadSupport
    support.setSchema(schema)
    private val reader = ParquetReader.builder(support, path).withConf(conf).build()
    private var current: ParquetDataRow = _
    nextRow()

    private def nextRow() = {
      current = reader.read()
      if (!hasNext) reader.close()
    }

    override def hasNext: Boolean = current != null

    override def next(): ParquetDataRow = {
      val res = current
      if (hasNext) {
        nextRow()
      }
      res
    }
  }
} 
开发者ID:CodeBabyBear,项目名称:DataPorter,代码行数:39,代码来源:ParquetInputReader.scala

示例5: GetSearchCount

//设置package包名称以及导入依赖的类
package mad_nectarine.spark

import java.util.Properties
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.hadoop.io.{MapWritable, Text}
import org.apache.hadoop.mapred.JobConf
import org.apache.spark.api.java.JavaSparkContext
import org.apache.spark.{Logging, SparkConf}
import org.elasticsearch.hadoop.mr.EsInputFormat

object GetSearchCount extends Logging {
  def main(args: Array[String]) {

    //validate args
    if (args.length < 1) {
      throw new IllegalArgumentException("search word is required")
    }

    //create spark conf
    val sparkConf = new SparkConf()
    sparkConf.setAppName("mad_nectarine.GetTweetsSearchCount")
    val context = new JavaSparkContext(sparkConf)

    try {
      //load config
      System.out.println("executing... [load config]")
      val fs = FileSystem.get(context.hadoopConfiguration());
      val propertiesStream = fs.open(new Path("hdfs:///tmp/spark.to-words.properties"))
      val properties = new Properties()
      properties.load(propertiesStream)

      //create es conf
      System.out.println("executing... [create es conf]")
      val esConf = new JobConf()
      esConf.set("es.nodes", properties.getProperty("logic.search-count.nodes"))
      esConf.set("es.resource", properties.getProperty("logic.search-count.resource"))
      var query = properties.getProperty("logic.search-count.query").replace("@@search_word", args(0))
      query = query.replace("\\r\\n","")
      query = query.replace("\\n","")
      query = query.replace("\\r","")
      System.out.println(s"query is ${query}")
      esConf.set("es.query", query)

      //load data from elasticsearch
      System.out.println("executing... [load data from elasticsearch]")
      val esRDD = context.hadoopRDD(esConf,
        classOf[EsInputFormat[Text, MapWritable]],
        classOf[Text],
        classOf[MapWritable]
      )
      System.out.println("Count of records founds is " + esRDD.count())

    } finally{
      context.stop()
    }
  }
} 
开发者ID:mad-nectarine,项目名称:nlp-test.spark.to-words,代码行数:58,代码来源:GetSearchCount.scala


注:本文中的org.apache.hadoop.mapred.JobConf类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。