Scala TableScan类代码示例

本文整理汇总了Scala中org.apache.spark.sql.sources.TableScan的典型用法代码示例。如果您正苦于以下问题:Scala TableScan类的具体用法?Scala TableScan怎么用?Scala TableScan使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


示例1: TikaMetadataRelation

package com.jasonfeist.spark.tika

import org.apache.spark.input.PortableDataStream
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types.{StructType}
import org.slf4j.LoggerFactory

class TikaMetadataRelation protected[tika] (path: String,
                                            userSchema: StructType,
                                            metadataExtractor: MetadataExtractor,
                                            fieldDataExtractor: FieldDataExtractor)
                          (@transient val sqlContext: SQLContext)
  extends BaseRelation with TableScan with Serializable {

  val logger = LoggerFactory.getLogger(classOf[TikaMetadataRelation])

  override def schema: StructType = this.userSchema

  override def buildScan(): RDD[Row] = {

    val rdd = sqlContext

  def extractFunc(
                    file: (String, PortableDataStream)
                  ) : Row  =
    val extractedData = metadataExtractor.extract(file)
    val rowArray = new Array[Any](schema.fields.length)
    var index = 0
    while (index < schema.fields.length) {
      val field = schema(index)
      val fieldData = fieldDataExtractor.matchedField(field.name,
        field.dataType, extractedData._1, file._1, extractedData._2,
      rowArray(index) = fieldData
      index = index + 1

示例2: StudyRelation

package com.study.spark.datasource

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types.{StringType, StructField, StructType}
import org.apache.spark.sql.{Row, SQLContext}

class StudyRelation(parameters: Map[String, String])(@transient val sqlContext: SQLContext)
  extends BaseRelation with TableScan {

  override def schema: StructType = {
    // ??? ?? ?????, ?? ??? ???? ????. ???? ?????? ???? ??????, ???? ?? ??? ????
    val fields: Array[StructField] = new Array[StructField](3)
    fields.update(0, new StructField("field1", StringType))
    fields.update(1, new StructField("field2", StringType))
    fields.update(2, new StructField("field2", StringType))
    new StructType(fields.asInstanceOf[Array[StructField]])

  // RDD[Row]? ???? StudyRDD? ???.
  override def buildScan(): RDD[Row] = new StudyRDD(sqlContext, schema)

示例3: sample

package com.rishabh.spark.datasource.s3

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{Row, SQLContext}

case class sample(id: Integer)

class S3Relation(accesskey: String, secretkey: String, fileType: String, bucket: String, path:
String, write: Boolean)
                 val sqlContext: SQLContext) extends BaseRelation with TableScan {

  import sqlContext.implicits._

  val dummyData = Seq(sample(1))
  var df = sqlContext.sparkContext.parallelize(dummyData, 4).toDF()
  val s3Path = "s3a://" + bucket + path

  val hadoopConf = sqlContext.sparkContext.hadoopConfiguration
  hadoopConf.set("fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem")
  hadoopConf.set("fs.s3a.access.key", accesskey)
  hadoopConf.set("fs.s3a.secret.key", secretkey)

  override def schema: StructType = {
    fileType match {
      case "json" =>
        df = sqlContext.read.json(s3Path)
      case "csv" =>
        df = sqlContext.read.format("com.databricks.spark.csv").load(s3Path)
      case "parquet" =>
        df = sqlContext.read.parquet(s3Path)

  override def buildScan(): RDD[Row] = {

示例4: NTriplesRelation

package net.sansa_stack.inference.spark.data.loader.sql

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types.{StringType, StructField, StructType}

import net.sansa_stack.inference.utils.NTriplesStringToRDFTriple

class NTriplesRelation(location: String, userSchema: StructType)
                      (@transient val sqlContext: SQLContext)
    extends BaseRelation
      with TableScan
      with Serializable {
    override def schema: StructType = {
      if (this.userSchema != null) {
      else {
            StructField("s", StringType, true),
            StructField("p", StringType, true),
            StructField("o", StringType, true)
    override def buildScan(): RDD[Row] = {
      val rdd = sqlContext

      val converter = new NTriplesStringToRDFTriple()

      val rows = rdd.flatMap(x => converter.apply(x)).map(t => Row.fromSeq(Seq(t.s, t.p, t.o)))


示例5: GDBRelation

package com.esri.gdb

import org.apache.spark.Logging
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext}

case class GDBRelation(gdbPath: String, gdbName: String, numPartition: Int)
                      (@transient val sqlContext: SQLContext)
  extends BaseRelation with Logging with TableScan {

  override val schema = inferSchema()

  private def inferSchema() = {
    val sc = sqlContext.sparkContext
    GDBTable.findTable(gdbPath, gdbName, sc.hadoopConfiguration) match {
      case Some(catTab) => {
        val table = GDBTable(gdbPath, catTab.hexName, sc.hadoopConfiguration)
        try {
        } finally {
      case _ => {
        log.error(s"Cannot find '$gdbName' in $gdbPath, creating an empty schema !")

  override def buildScan(): RDD[Row] = {
    GDBRDD(sqlContext.sparkContext, gdbPath, gdbName, numPartition)

示例6: DatasetRelation

package com.springml.spark.sftp

import com.databricks.spark.avro._
import org.apache.log4j.Logger
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.{DataFrame, Row, SQLContext}
import org.apache.spark.sql.sources.{BaseRelation, TableScan}
import org.apache.spark.sql.types.StructType

case class DatasetRelation(
    fileLocation: String,
    fileType: String,
    inferSchema: String,
    header: String,
    delimiter: String,
    customSchema: StructType,
    sqlContext: SQLContext) extends BaseRelation with TableScan {

    private val logger = Logger.getLogger(classOf[DatasetRelation])

    val df = read()

    private def read(): DataFrame = {
      var dataframeReader = sqlContext.read
      if (customSchema != null) {
        dataframeReader = dataframeReader.schema(customSchema)

      var df: DataFrame = null
      if (fileType.equals("json")) {
        df = dataframeReader.json(fileLocation)
      } else if (fileType.equals("parquet")) {
        df = dataframeReader.parquet(fileLocation)
      } else if (fileType.equals("csv")) {
        df = dataframeReader.
          option("header", header).
          option("delimiter", delimiter).
          option("inferSchema", inferSchema).
      } else if (fileType.equals("avro")) {
        df = dataframeReader.avro(fileLocation)


    override def schema: StructType = {

    override def buildScan(): RDD[Row] = {

