本文整理汇总了Scala中org.apache.spark.sql.types.IntegerType类的典型用法代码示例。如果您正苦于以下问题:Scala IntegerType类的具体用法?Scala IntegerType怎么用?Scala IntegerType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了IntegerType类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: SchemaTest
//设置package包名称以及导入依赖的类
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.Row
import com.gxq.learn.recontool.utils.SparkContextFactory
object SchemaTest {
def main(args: Array[String]): Unit = {
val (sc, ss) = SparkContextFactory.getSparkContext("local")
val data = sc.parallelize(Seq("Bern;10;12")) // mock for real data
val schema = new StructType()
.add("city", StringType, true)
.add("female", IntegerType, true)
.add("male", IntegerType, true)
val cities = data.map(line => {
val Array(city, female, male) = line.split(";")
Row(
city,
female.toInt,
male.toInt)
})
val citiesDF = ss.createDataFrame(cities, schema)
citiesDF.show
}
}
示例2: main
//设置package包名称以及导入依赖的类
package org.sparksamples.df
//import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType};
package object UserData {
def main(args: Array[String]): Unit = {
val customSchema = StructType(Array(
StructField("no", IntegerType, true),
StructField("age", StringType, true),
StructField("gender", StringType, true),
StructField("occupation", StringType, true),
StructField("zipCode", StringType, true)));
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val spark = SparkSession
.builder()
.appName("SparkUserData").config(spConfig)
.getOrCreate()
val user_df = spark.read.format("com.databricks.spark.csv")
.option("delimiter", "|").schema(customSchema)
.load("/home/ubuntu/work/ml-resources/spark-ml/data/ml-100k/u.user")
val first = user_df.first()
println("First Record : " + first)
val num_genders = user_df.groupBy("gender").count().count()
val num_occupations = user_df.groupBy("occupation").count().count()
val num_zipcodes = user_df.groupBy("zipCode").count().count()
println("num_users : " + user_df.count())
println("num_genders : "+ num_genders)
println("num_occupations : "+ num_occupations)
println("num_zipcodes: " + num_zipcodes)
println("Distribution by Occupation")
println(user_df.groupBy("occupation").count().show())
}
}
示例3:
//设置package包名称以及导入依赖的类
package org.sparksamples
import java.awt.Font
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import org.jfree.chart.axis.CategoryLabelPositions
import scalax.chart.module.ChartFactories
val customSchema = StructType(Array(
StructField("user_id", IntegerType, true),
StructField("movie_id", IntegerType, true),
StructField("rating", IntegerType, true),
StructField("timestamp", IntegerType, true)))
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val spark = SparkSession
.builder()
.appName("SparkRatingData").config(spConfig)
.getOrCreate()
val rating_df = spark.read.format("com.databricks.spark.csv")
.option("delimiter", "\t").schema(customSchema)
.load("../../data/ml-100k/u.data")
val rating_df_count = rating_df.groupBy("rating").count().sort("rating")
//val rating_df_count_sorted = rating_df_count.sort("count")
rating_df_count.show()
val rating_df_count_collection = rating_df_count.collect()
val ds = new org.jfree.data.category.DefaultCategoryDataset
val mx = scala.collection.immutable.ListMap()
for( x <- 0 until rating_df_count_collection.length) {
val occ = rating_df_count_collection(x)(0)
val count = Integer.parseInt(rating_df_count_collection(x)(1).toString)
ds.addValue(count,"UserAges", occ.toString)
}
//val sorted = ListMap(ratings_count.toSeq.sortBy(_._1):_*)
//val ds = new org.jfree.data.category.DefaultCategoryDataset
//sorted.foreach{ case (k,v) => ds.addValue(v,"Rating Values", k)}
val chart = ChartFactories.BarChart(ds)
val font = new Font("Dialog", Font.PLAIN,5);
chart.peer.getCategoryPlot.getDomainAxis().
setCategoryLabelPositions(CategoryLabelPositions.UP_90);
chart.peer.getCategoryPlot.getDomainAxis.setLabelFont(font)
chart.show()
Util.sc.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:59,代码来源:CountByRatingChart.scala
示例4: UserRatingsChart
//设置package包名称以及导入依赖的类
package org.sparksamples
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import scalax.chart.module.ChartFactories
object UserRatingsChart {
def main(args: Array[String]) {
val customSchema = StructType(Array(
StructField("user_id", IntegerType, true),
StructField("movie_id", IntegerType, true),
StructField("rating", IntegerType, true),
StructField("timestamp", IntegerType, true)))
val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
val spark = SparkSession
.builder()
.appName("SparkRatingData").config(spConfig)
.getOrCreate()
val rating_df = spark.read.format("com.databricks.spark.csv")
.option("delimiter", "\t").schema(customSchema)
.load("../../data/ml-100k/u.data")
val rating_nos_by_user = rating_df.groupBy("user_id").count().sort("count")
val ds = new org.jfree.data.category.DefaultCategoryDataset
rating_nos_by_user.show(rating_nos_by_user.collect().length)
val rating_nos_by_user_collect =rating_nos_by_user.collect()
var mx = Map(0 -> 0)
val min = 1
val max = 1000
val bins = 100
val step = (max/bins).toInt
for (i <- step until (max + step) by step) {
mx += (i -> 0);
}
for( x <- 0 until rating_nos_by_user_collect.length) {
val user_id = Integer.parseInt(rating_nos_by_user_collect(x)(0).toString)
val count = Integer.parseInt(rating_nos_by_user_collect(x)(1).toString)
ds.addValue(count,"Ratings", user_id)
}
// ------------------------------------------------------------------
val chart = ChartFactories.BarChart(ds)
chart.peer.getCategoryPlot.getDomainAxis().setVisible(false)
chart.show()
Util.sc.stop()
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:58,代码来源:UserRatingsChart.scala
示例5: TestData
//设置package包名称以及导入依赖的类
package be.dataminded.wharlord.test
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
object TestData {
def makeIntegerDf(spark: SparkSession, numbers: Seq[Int]): DataFrame =
spark.createDataFrame(
spark.sparkContext.makeRDD(numbers.map(Row(_))),
StructType(List(StructField("column", IntegerType, nullable = false)))
)
def makeNullableStringDf(spark: SparkSession, strings: Seq[String]): DataFrame =
spark.createDataFrame(spark.sparkContext.makeRDD(strings.map(Row(_))), StructType(List(StructField("column", StringType, nullable = true))))
def makeIntegersDf(spark: SparkSession, row1: Seq[Int], rowN: Seq[Int]*): DataFrame = {
val rows = row1 :: rowN.toList
val numCols = row1.size
val rdd = spark.sparkContext.makeRDD(rows.map(Row(_:_*)))
val schema = StructType((1 to numCols).map(idx => StructField("column" + idx, IntegerType, nullable = false)))
spark.createDataFrame(rdd, schema)
}
}
示例6: ConversionsSuite
//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.documents
import com.orientechnologies.orient.core.metadata.schema.OType
import com.orientechnologies.orient.core.record.impl.ODocument
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite
class ConversionsSuite extends FunSuite {
test("Spark datatype to OrientDB datatype test") {
val orientDBType = Conversions.sparkDTtoOrientDBDT(StringType)
assert(orientDBType === OType.STRING)
}
test("Convert Spark Row to Orient DB ODocument") {
val expectedData = new ODocument()
expectedData.field("key", 1, OType.INTEGER)
expectedData.field("value", "Spark datasource for Orient DB", OType.STRING)
val conf = new SparkConf().setAppName("ConversionsSuite").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new SQLContext(sc)
val rows = sqlContext.createDataFrame(sc.parallelize(Seq(Row(1, "Spark datasource for Orient DB"))),
StructType(Array(StructField("key", IntegerType, true),
StructField("value", StringType, true)))).collect()
val actualData = Conversions.convertRowsToODocuments(rows(0))
assert(expectedData.field[Int]("key") == actualData.field[Int]("key"))
assert(expectedData.field[String]("value") == actualData.field[String]("value"))
sc.stop()
}
test("Convert OrientDB ODocument to Spark Row") {
val oDocument = new ODocument()
oDocument.field("key", 1, OType.INTEGER)
oDocument.field("value", "Orient DB ODocument to Spark Row", OType.STRING)
val schema = StructType(Array(StructField("key", IntegerType),
StructField("value", StringType)))
val expectedData = Row(1, "Orient DB ODocument to Spark Row")
val actualData = Conversions.convertODocumentsToRows(oDocument, schema)
assert(expectedData === actualData)
}
test("Return field of correct type") {
val field = Conversions.orientDBDTtoSparkDT(IntegerType, "1")
assert(field.isInstanceOf[Int])
}
}
示例7: SchemaToMongo
//设置package包名称以及导入依赖的类
package nsmc.conversion.types
import com.mongodb.casbah.Imports._
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StructType, IntegerType, StringType, StructField}
object SchemaToMongo {
def getMongoRecord(schema: Seq[StructField], r: Row) : DBObject = {
val converted = schema.zip(r.toSeq).map(toMongo)
MongoDBObject(converted:_*)
}
private def toMongo(p:(StructField, Any)) : (String, Any) = {
p match {
case (sf, a) =>
sf.dataType match {
// TODO: leaving out some of the atomic types
case StringType => (sf.name, a)
case IntegerType => (sf.name, a)
case StructType(s) => (sf.name, getMongoRecord(s, a.asInstanceOf[Row]))
}
}
}
}
示例8: Locus
//设置package包名称以及导入依赖的类
package org.broadinstitute.hail.variant
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.broadinstitute.hail.check.Gen
import org.json4s._
object Locus {
val simpleContigs: Seq[String] = (1 to 22).map(_.toString) ++ Seq("X", "Y", "MT")
val schema: StructType =
StructType(Array(
StructField("contig", StringType, nullable = false),
StructField("position", IntegerType, nullable = false)))
def gen(contigs: Seq[String]): Gen[Locus] =
Gen.zip(Gen.oneOfSeq(contigs), Gen.posInt)
.map { case (contig, pos) => Locus(contig, pos) }
def gen: Gen[Locus] = gen(simpleContigs)
}
case class Locus(contig: String, position: Int) extends Ordered[Locus] {
def compare(that: Locus): Int = {
var c = Contig.compare(contig, that.contig)
if (c != 0)
return c
position.compare(that.position)
}
def toJSON: JValue = JObject(
("contig", JString(contig)),
("position", JInt(position)))
override def toString: String = s"$contig:$position"
}
示例9: DataFrameHelpersSpec
//设置package包名称以及导入依赖的类
package com.github.mrpowers.spark.daria.sql
import org.scalatest.FunSpec
import org.apache.spark.sql.types.{IntegerType, StringType}
import SparkSessionExt._
class DataFrameHelpersSpec
extends FunSpec
with SparkSessionTestWrapper {
describe(".toArrayOfMaps") {
it("converts a DataFrame into an array of maps") {
val sourceDF = spark.createDF(
List(
("doctor", 4, "high"),
("dentist", 10, "high")
), List(
("profession", StringType, true),
("some_number", IntegerType, true),
("pay_grade", StringType, true)
)
)
val actual = DataFrameHelpers.toArrayOfMaps(sourceDF)
val expected = Array(
Map("profession" -> "doctor", "some_number" -> 4, "pay_grade" -> "high"),
Map("profession" -> "dentist", "some_number" -> 10, "pay_grade" -> "high")
)
assert(actual === expected)
}
}
}
示例10: TestMetadataConstructor
//设置package包名称以及导入依赖的类
package com.springml.spark.salesforce.metadata
import org.apache.spark.sql.types.{StructType, StringType, IntegerType, LongType,
FloatType, DateType, TimestampType, BooleanType, StructField}
import org.scalatest.FunSuite
import com.springml.spark.salesforce.Utils
class TestMetadataConstructor extends FunSuite {
test("Test Metadata generation") {
val columnNames = List("c1", "c2", "c3", "c4")
val columnStruct = columnNames.map(colName => StructField(colName, StringType, true))
val schema = StructType(columnStruct)
val schemaString = MetadataConstructor.generateMetaString(schema,"sampleDataSet", Utils.metadataConfig(null))
assert(schemaString.length > 0)
assert(schemaString.contains("sampleDataSet"))
}
test("Test Metadata generation With Custom MetadataConfig") {
val columnNames = List("c1", "c2", "c3", "c4")
val intField = StructField("intCol", IntegerType, true)
val longField = StructField("longCol", LongType, true)
val floatField = StructField("floatCol", FloatType, true)
val dateField = StructField("dateCol", DateType, true)
val timestampField = StructField("timestampCol", TimestampType, true)
val stringField = StructField("stringCol", StringType, true)
val someTypeField = StructField("someTypeCol", BooleanType, true)
val columnStruct = Array[StructField] (intField, longField, floatField, dateField, timestampField, stringField, someTypeField)
val schema = StructType(columnStruct)
var metadataConfig = Map("string" -> Map("wave_type" -> "Text"))
metadataConfig += ("integer" -> Map("wave_type" -> "Numeric", "precision" -> "10", "scale" -> "0", "defaultValue" -> "100"))
metadataConfig += ("float" -> Map("wave_type" -> "Numeric", "precision" -> "10", "scale" -> "2"))
metadataConfig += ("long" -> Map("wave_type" -> "Numeric", "precision" -> "18", "scale" -> "0"))
metadataConfig += ("date" -> Map("wave_type" -> "Date", "format" -> "yyyy/MM/dd"))
metadataConfig += ("timestamp" -> Map("wave_type" -> "Date", "format" -> "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"))
val schemaString = MetadataConstructor.generateMetaString(schema, "sampleDataSet", metadataConfig)
assert(schemaString.length > 0)
assert(schemaString.contains("sampleDataSet"))
assert(schemaString.contains("Numeric"))
assert(schemaString.contains("precision"))
assert(schemaString.contains("scale"))
assert(schemaString.contains("18"))
assert(schemaString.contains("Text"))
assert(schemaString.contains("Date"))
assert(schemaString.contains("format"))
assert(schemaString.contains("defaultValue"))
assert(schemaString.contains("100"))
assert(schemaString.contains("yyyy/MM/dd"))
assert(schemaString.contains("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"))
}
}
示例11: SqlShiftMySQLDialect
//设置package包名称以及导入依赖的类
package com.goibibo.sqlshift.commons
import java.sql.Types
import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
import org.apache.spark.sql.types.{DataType, IntegerType, LongType, MetadataBuilder}
case object SqlShiftMySQLDialect extends JdbcDialect {
override def canHandle(url: String): Boolean = url.startsWith("jdbc:mysql")
override def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
// This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
// byte arrays instead of longs.
md.putLong("binarylong", 1)
Option(LongType)
} else if (typeName.equals("TINYINT")) {
Option(IntegerType)
} else None
}
override def quoteIdentifier(colName: String): String = {
s"`$colName`"
}
override def getTableExistsQuery(table: String): String = {
s"SELECT 1 FROM $table LIMIT 1"
}
def registerDialect(): Unit = {
}
}