本文整理汇总了Scala中org.apache.spark.sql.sources.SchemaRelationProvider类的典型用法代码示例。如果您正苦于以下问题:Scala SchemaRelationProvider类的具体用法?Scala SchemaRelationProvider怎么用?Scala SchemaRelationProvider使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了SchemaRelationProvider类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: DefaultSource
//设置package包名称以及导入依赖的类
package com.jasonfeist.spark.tika
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types._
class DefaultSource
extends RelationProvider with SchemaRelationProvider {
def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType): BaseRelation = {
parameters.getOrElse("path", sys.error("No path specified."))
new TikaMetadataRelation(
parameters.get("path").get,
schema,
new MetadataExtractor(),
new FieldDataExtractor())(sqlContext)
}
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
val struct =
StructType(
StructField("detectedtype", StringType, true) ::
StructField("language", StringType, true) ::
StructField("filename", StringType, true) ::
StructField("author", StringType, true) ::
StructField("text", StringType, true) ::
StructField("creation-date", TimestampType, true) ::
StructField("title", StringType, true) ::
StructField("content-length", IntegerType, true) ::
StructField("last-modified", DateType, true) :: Nil
)
createRelation(sqlContext, parameters, struct)
}
}
示例2: DefaultSource
//设置package包名称以及导入依赖的类
package com.springml.spark.zuora
import com.springml.spark.zuora.model.ZuoraInput
import org.apache.log4j.Logger
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
import scala.collection.mutable
class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
@transient val logger = Logger.getLogger(classOf[DefaultSource])
override def createRelation(sqlContext: SQLContext,
parameters: Map[String, String]): BaseRelation = {
createRelation(sqlContext, parameters, null)
}
override def createRelation(sqlContext: SQLContext,
parameters: Map[String, String],
schema: StructType): BaseRelation = {
val email = param(parameters, "email")
val password = param(parameters, "password")
val zoql = param(parameters, "zoql")
val instanceUrl = parameters.getOrElse("instanceURL", "https://rest.zuora.com")
val apiVersion = parameters.getOrElse("apiVersion", "38.0")
// TODO
val pageSizeParam = parameters.getOrElse("pageSize", "1000")
val pageSize = pageSizeParam.toInt
val zuoraInput = new ZuoraInput(email, password, zoql, instanceUrl, apiVersion, pageSize)
val records = new ZuoraReader(zuoraInput) read()
new DatasetRelation(records, sqlContext, schema)
}
override def createRelation(sqlContext: SQLContext,
mode: SaveMode,
parameters: Map[String, String],
data: DataFrame): BaseRelation = {
logger.error("Save not supported by Zuora connector")
throw new UnsupportedOperationException
}
private def param(parameters: Map[String, String],
paramName: String) : String = {
val paramValue = parameters.getOrElse(paramName,
sys.error(s"""'$paramName' must be specified for Spark Zuora package"""))
if ("password".equals(paramName)) {
logger.debug("Param " + paramName + " value " + paramValue)
}
paramValue
}
}
示例3: DefaultSource
//设置package包名称以及导入依赖的类
package net.sansa_stack.inference.spark.data.loader.sql
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
class DefaultSource extends RelationProvider with SchemaRelationProvider {
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String])
: BaseRelation = {
createRelation(sqlContext, parameters, null)
}
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]
, schema: StructType)
: BaseRelation = {
parameters.getOrElse("path", sys.error("'path' must be specified for our data."))
return new NTriplesRelation(parameters.get("path").get, schema)(sqlContext)
}
}
示例4: DefaultSource
//设置package包名称以及导入依赖的类
package de.usu.research.sake.sparksparql
import org.apache.spark.sql.sources.SchemaRelationProvider
import org.apache.spark.sql.sources.RelationProvider
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.StructType
class DefaultSource extends RelationProvider with SchemaRelationProvider with DataSourceRegister {
override def shortName(): String = "sparql"
override def createRelation(
sqlContext: SQLContext,
parameters: Map[String, String],
schema: StructType): SparqlRelation = {
val service = checkService(parameters)
val query = checkQuery(parameters)
SparqlRelation(service, query, schema)(sqlContext)
}
private def checkService(parameters: Map[String, String]): String = {
parameters.getOrElse("service", sys.error("'service' must be specified for SPARQL data."))
}
private def checkQuery(parameters: Map[String, String]): String = {
parameters.getOrElse("query", sys.error("'query' must be specified for SPARQL data."))
}
}
示例5: DefaultSource
//设置package包名称以及导入依赖的类
package com.logicstack.spark
import com.google.api.client.googleapis.auth.oauth2.GoogleCredential
import com.google.api.client.googleapis.javanet.GoogleNetHttpTransport
import com.google.api.client.json.jackson2.JacksonFactory
import com.google.api.services.analyticsreporting.v4.AnalyticsReporting
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.{BaseRelation, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
class DefaultSource extends SchemaRelationProvider {
private val httpTransport = GoogleNetHttpTransport.newTrustedTransport
private val jsonFactory = JacksonFactory.getDefaultInstance
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType): ReportRelation = {
val clientId = parameters("clientId")
val clientSecret = parameters("clientSecret")
val accessToken = parameters("accessToken")
val refreshToken = parameters("refreshToken")
val credential = new GoogleCredential.Builder()
.setJsonFactory(jsonFactory)
.setTransport(httpTransport)
.setClientSecrets(clientId, clientSecret)
.build()
.setAccessToken(accessToken)
.setRefreshToken(refreshToken)
val analyticsReporting = new AnalyticsReporting.Builder(httpTransport, jsonFactory, credential)
.setApplicationName("spark-ga")
.build()
new ReportRelation(
analyticsReporting,
parameters("profile"),
parameters("startDate"),
parameters("endDate"),
schema
)(sqlContext)
}
}
示例6: DefaultSource
//设置package包名称以及导入依赖的类
package com.github.traviscrawford.spark.dynamodb
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.sources.BaseRelation
import org.apache.spark.sql.sources.RelationProvider
import org.apache.spark.sql.sources.SchemaRelationProvider
import org.apache.spark.sql.types.StructType
private[dynamodb] class DefaultSource
extends RelationProvider with SchemaRelationProvider {
override def createRelation(
sqlContext: SQLContext,
parameters: Map[String, String])
: BaseRelation = getDynamoDBRelation(sqlContext, parameters)
override def createRelation(
sqlContext: SQLContext,
parameters: Map[String, String],
schema: StructType)
: BaseRelation = getDynamoDBRelation(sqlContext, parameters, Some(schema))
private def getDynamoDBRelation(
sqlContext: SQLContext,
parameters: Map[String, String],
maybeSchema: Option[StructType] = None)
: DynamoDBRelation = {
val tableName = parameters.getOrElse("table",
throw new IllegalArgumentException("Required parameter 'table' was unspecified.")
)
DynamoDBRelation(
tableName = tableName,
maybeFilterExpression = parameters.get("filter_expression"),
maybePageSize = parameters.get("page_size"),
maybeRegion = parameters.get("region"),
maybeSegments = parameters.get("segments"),
maybeRateLimit = parameters.get("rate_limit_per_segment").map(Integer.parseInt),
maybeSchema = maybeSchema,
maybeCredentials = parameters.get("aws_credentials_provider"),
maybeEndpoint = parameters.get("endpoint"))(sqlContext)
}
}
示例7: DefaultSource
//设置package包名称以及导入依赖的类
package com.springml.spark.workday
import com.springml.spark.workday.model.{WWSInput, XPathInput}
import com.springml.spark.workday.util.CSVUtil
import org.apache.log4j.Logger
import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
class DefaultSource extends RelationProvider with SchemaRelationProvider with CreatableRelationProvider {
@transient val logger = Logger.getLogger(classOf[DefaultSource])
override def createRelation(sqlContext: SQLContext,
parameters: Map[String, String]): BaseRelation = {
createRelation(sqlContext, parameters, null)
}
override def createRelation(sqlContext: SQLContext,
parameters: Map[String, String],
schema: StructType): BaseRelation = {
val username = param(parameters, "username")
val password = param(parameters, "password")
val wwsEndpoint = param(parameters, "wwsEndpoint")
val objectTag = param(parameters, "objectTagPath")
val detailsTag = param(parameters, "detailsTagPath")
val request = param(parameters, "request")
val xpath = param(parameters, "xpathMap")
val namespacePrefix = parameters.get("namespacePrefixMap")
val wwsInput = new WWSInput(username, password, wwsEndpoint, request)
val xPathInput = new XPathInput(objectTag, detailsTag)
CSVUtil.populateXPathInput(xpath, xPathInput)
xPathInput.namespaceMap = CSVUtil.readCSV(namespacePrefix.get)
logger.debug("Namespace Map" + xPathInput.namespaceMap)
val records = new WWSReader(wwsInput, xPathInput) read()
new DatasetRelation(records, sqlContext, schema)
}
override def createRelation(sqlContext: SQLContext,
mode: SaveMode,
parameters: Map[String, String],
data: DataFrame): BaseRelation = {
logger.error("Save not supported by workday connector")
throw new UnsupportedOperationException
}
private def param(parameters: Map[String, String],
paramName: String) : String = {
val paramValue = parameters.getOrElse(paramName,
sys.error(s"""'$paramName' must be specified for Spark Workday package"""))
if ("password".equals(paramName)) {
logger.debug("Param " + paramName + " value " + paramValue)
}
paramValue
}
}
示例8: DefaultSource
//设置package包名称以及导入依赖的类
package org.apache.spark.sql.sparkcv
import org.apache.spark.internal.Logging
import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode}
import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, SchemaRelationProvider}
import org.apache.spark.sql.types.StructType
import org.bytedeco.javacpp.opencv_core.IplImage
import org.bytedeco.javacpp.opencv_imgcodecs.cvLoadImage
class DefaultSource
extends RelationProvider
with SchemaRelationProvider
with CreatableRelationProvider
with Logging {
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String]): BaseRelation = {
createRelation(sqlContext, parameters, new StructType())
}
override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType): BaseRelation = {
assert(parameters.get("path").isDefined, "path parameter is required")
val image: IplImage = cvLoadImage("src/main/resources/birds-of-paradise.jpg")
ImageRelation(sqlContext, parameters, schema)
}
override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = {
ImageRelation(sqlContext, parameters, data.schema)
}
}