当前位置: 首页>>代码示例>>Scala>>正文


Scala IntegerType类代码示例

本文整理汇总了Scala中org.apache.spark.sql.types.IntegerType的典型用法代码示例。如果您正苦于以下问题:Scala IntegerType类的具体用法?Scala IntegerType怎么用?Scala IntegerType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了IntegerType类的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: SchemaTest

//设置package包名称以及导入依赖的类
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.IntegerType
import org.apache.spark.sql.types.StringType
import org.apache.spark.sql.Row
import com.gxq.learn.recontool.utils.SparkContextFactory

object SchemaTest {
  def main(args: Array[String]): Unit = {
    val (sc, ss) = SparkContextFactory.getSparkContext("local")
    val data = sc.parallelize(Seq("Bern;10;12")) // mock for real data

    val schema = new StructType()
    .add("city", StringType, true)
    .add("female", IntegerType, true)
    .add("male", IntegerType, true)

    val cities = data.map(line => {
      val Array(city, female, male) = line.split(";")
      Row(
        city,
        female.toInt,
        male.toInt)
    })

    val citiesDF = ss.createDataFrame(cities, schema)
    citiesDF.show
  }
} 
开发者ID:gong1989313,项目名称:spark-bigdata,代码行数:30,代码来源:SchemaTest.scala

示例2: main

//设置package包名称以及导入依赖的类
package org.sparksamples.df
//import org.apache.spark.sql.SQLContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType};
package object UserData {
  def main(args: Array[String]): Unit = {
    val customSchema = StructType(Array(
      StructField("no", IntegerType, true),
      StructField("age", StringType, true),
      StructField("gender", StringType, true),
      StructField("occupation", StringType, true),
      StructField("zipCode", StringType, true)));
    val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
    val spark = SparkSession
      .builder()
      .appName("SparkUserData").config(spConfig)
      .getOrCreate()

    val user_df = spark.read.format("com.databricks.spark.csv")
      .option("delimiter", "|").schema(customSchema)
      .load("/home/ubuntu/work/ml-resources/spark-ml/data/ml-100k/u.user")
    val first = user_df.first()
    println("First Record : " + first)

    val num_genders = user_df.groupBy("gender").count().count()
    val num_occupations = user_df.groupBy("occupation").count().count()
    val num_zipcodes = user_df.groupBy("zipCode").count().count()

    println("num_users : " + user_df.count())
    println("num_genders : "+ num_genders)
    println("num_occupations : "+ num_occupations)
    println("num_zipcodes: " + num_zipcodes)
    println("Distribution by Occupation")
    println(user_df.groupBy("occupation").count().show())

  }
} 
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:40,代码来源:UserData.scala

示例3:

//设置package包名称以及导入依赖的类
package org.sparksamples

import java.awt.Font

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
import org.jfree.chart.axis.CategoryLabelPositions

import scalax.chart.module.ChartFactories



    val customSchema = StructType(Array(
      StructField("user_id", IntegerType, true),
      StructField("movie_id", IntegerType, true),
      StructField("rating", IntegerType, true),
      StructField("timestamp", IntegerType, true)))

    val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
    val spark = SparkSession
      .builder()
      .appName("SparkRatingData").config(spConfig)
      .getOrCreate()

    val rating_df = spark.read.format("com.databricks.spark.csv")
      .option("delimiter", "\t").schema(customSchema)
      .load("../../data/ml-100k/u.data")

    val rating_df_count = rating_df.groupBy("rating").count().sort("rating")
    //val rating_df_count_sorted = rating_df_count.sort("count")
    rating_df_count.show()
    val rating_df_count_collection = rating_df_count.collect()

    val ds = new org.jfree.data.category.DefaultCategoryDataset
    val mx = scala.collection.immutable.ListMap()

    for( x <- 0 until rating_df_count_collection.length) {
      val occ = rating_df_count_collection(x)(0)
      val count = Integer.parseInt(rating_df_count_collection(x)(1).toString)
      ds.addValue(count,"UserAges", occ.toString)
    }


    //val sorted =  ListMap(ratings_count.toSeq.sortBy(_._1):_*)
    //val ds = new org.jfree.data.category.DefaultCategoryDataset
    //sorted.foreach{ case (k,v) => ds.addValue(v,"Rating Values", k)}

    val chart = ChartFactories.BarChart(ds)
    val font = new Font("Dialog", Font.PLAIN,5);

    chart.peer.getCategoryPlot.getDomainAxis().
      setCategoryLabelPositions(CategoryLabelPositions.UP_90);
    chart.peer.getCategoryPlot.getDomainAxis.setLabelFont(font)
    chart.show()
    Util.sc.stop()
  }
} 
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:59,代码来源:CountByRatingChart.scala

示例4: UserRatingsChart

//设置package包名称以及导入依赖的类
package org.sparksamples

import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.types.{IntegerType, StructField, StructType}

import scalax.chart.module.ChartFactories


object UserRatingsChart {

  def main(args: Array[String]) {

    val customSchema = StructType(Array(
      StructField("user_id", IntegerType, true),
      StructField("movie_id", IntegerType, true),
      StructField("rating", IntegerType, true),
      StructField("timestamp", IntegerType, true)))

    val spConfig = (new SparkConf).setMaster("local").setAppName("SparkApp")
    val spark = SparkSession
      .builder()
      .appName("SparkRatingData").config(spConfig)
      .getOrCreate()

    val rating_df = spark.read.format("com.databricks.spark.csv")
      .option("delimiter", "\t").schema(customSchema)
      .load("../../data/ml-100k/u.data")

    val rating_nos_by_user = rating_df.groupBy("user_id").count().sort("count")
    val ds = new org.jfree.data.category.DefaultCategoryDataset
    rating_nos_by_user.show(rating_nos_by_user.collect().length)
    val rating_nos_by_user_collect =rating_nos_by_user.collect()

    var mx = Map(0 -> 0)
    val min = 1
    val max = 1000
    val bins = 100

    val step = (max/bins).toInt
    for (i <- step until (max + step) by step) {
      mx += (i -> 0);
    }
    for( x <- 0 until rating_nos_by_user_collect.length) {
      val user_id = Integer.parseInt(rating_nos_by_user_collect(x)(0).toString)
      val count = Integer.parseInt(rating_nos_by_user_collect(x)(1).toString)
      ds.addValue(count,"Ratings", user_id)
    }
   // ------------------------------------------------------------------

    val chart = ChartFactories.BarChart(ds)
    chart.peer.getCategoryPlot.getDomainAxis().setVisible(false)

    chart.show()
    Util.sc.stop()
  }
} 
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:58,代码来源:UserRatingsChart.scala

示例5: TestData

//设置package包名称以及导入依赖的类
package be.dataminded.wharlord.test

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SparkSession}

object TestData {

  def makeIntegerDf(spark: SparkSession, numbers: Seq[Int]): DataFrame =
    spark.createDataFrame(
      spark.sparkContext.makeRDD(numbers.map(Row(_))),
      StructType(List(StructField("column", IntegerType, nullable = false)))
    )

  def makeNullableStringDf(spark: SparkSession, strings: Seq[String]): DataFrame =
    spark.createDataFrame(spark.sparkContext.makeRDD(strings.map(Row(_))), StructType(List(StructField("column", StringType, nullable = true))))

  def makeIntegersDf(spark: SparkSession, row1: Seq[Int], rowN: Seq[Int]*): DataFrame = {
    val rows = row1 :: rowN.toList
    val numCols = row1.size
    val rdd = spark.sparkContext.makeRDD(rows.map(Row(_:_*)))
    val schema = StructType((1 to numCols).map(idx => StructField("column" + idx, IntegerType, nullable = false)))
    spark.createDataFrame(rdd, schema)
  }

} 
开发者ID:datamindedbe,项目名称:wharlord,代码行数:26,代码来源:TestData.scala

示例6: ConversionsSuite

//设置package包名称以及导入依赖的类
package org.apache.spark.orientdb.documents

import com.orientechnologies.orient.core.metadata.schema.OType
import com.orientechnologies.orient.core.record.impl.ODocument
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.{Row, SQLContext}
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.scalatest.FunSuite

class ConversionsSuite extends FunSuite {

  test("Spark datatype to OrientDB datatype test") {
    val orientDBType = Conversions.sparkDTtoOrientDBDT(StringType)
    assert(orientDBType === OType.STRING)
  }

  test("Convert Spark Row to Orient DB ODocument") {
    val expectedData = new ODocument()
    expectedData.field("key", 1, OType.INTEGER)
    expectedData.field("value", "Spark datasource for Orient DB", OType.STRING)

    val conf = new SparkConf().setAppName("ConversionsSuite").setMaster("local[*]")
    val sc = new SparkContext(conf)
    val sqlContext = new SQLContext(sc)

    val rows = sqlContext.createDataFrame(sc.parallelize(Seq(Row(1, "Spark datasource for Orient DB"))),
      StructType(Array(StructField("key", IntegerType, true),
        StructField("value", StringType, true)))).collect()

    val actualData = Conversions.convertRowsToODocuments(rows(0))
    assert(expectedData.field[Int]("key") == actualData.field[Int]("key"))
    assert(expectedData.field[String]("value") == actualData.field[String]("value"))
    sc.stop()
  }

  test("Convert OrientDB ODocument to Spark Row") {
    val oDocument = new ODocument()
    oDocument.field("key", 1, OType.INTEGER)
    oDocument.field("value", "Orient DB ODocument to Spark Row", OType.STRING)

    val schema = StructType(Array(StructField("key", IntegerType),
      StructField("value", StringType)))

    val expectedData = Row(1, "Orient DB ODocument to Spark Row")
    val actualData = Conversions.convertODocumentsToRows(oDocument, schema)

    assert(expectedData === actualData)
  }

  test("Return field of correct type") {
    val field = Conversions.orientDBDTtoSparkDT(IntegerType, "1")
    assert(field.isInstanceOf[Int])
  }
} 
开发者ID:orientechnologies,项目名称:spark-orientdb,代码行数:55,代码来源:ConversionsSuite.scala

示例7: SchemaToMongo

//设置package包名称以及导入依赖的类
package nsmc.conversion.types

import com.mongodb.casbah.Imports._
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{StructType, IntegerType, StringType, StructField}

object SchemaToMongo {
  def getMongoRecord(schema: Seq[StructField], r: Row) : DBObject = {
    val converted = schema.zip(r.toSeq).map(toMongo)
    MongoDBObject(converted:_*)
  }

  private def toMongo(p:(StructField, Any)) : (String, Any) = {
    p match {
      case (sf, a) =>
        sf.dataType match {
          // TODO: leaving out some of the atomic types
          case StringType => (sf.name, a)
          case IntegerType => (sf.name, a)
          case StructType(s) => (sf.name, getMongoRecord(s, a.asInstanceOf[Row]))
        }
    }
  }


} 
开发者ID:baank,项目名称:spark-mongodb-connector,代码行数:27,代码来源:SchemaToMongo.scala

示例8: Locus

//设置package包名称以及导入依赖的类
package org.broadinstitute.hail.variant

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.broadinstitute.hail.check.Gen
import org.json4s._

object Locus {
  val simpleContigs: Seq[String] = (1 to 22).map(_.toString) ++ Seq("X", "Y", "MT")

  val schema: StructType =
    StructType(Array(
      StructField("contig", StringType, nullable = false),
      StructField("position", IntegerType, nullable = false)))

  def gen(contigs: Seq[String]): Gen[Locus] =
    Gen.zip(Gen.oneOfSeq(contigs), Gen.posInt)
      .map { case (contig, pos) => Locus(contig, pos) }

  def gen: Gen[Locus] = gen(simpleContigs)
}

case class Locus(contig: String, position: Int) extends Ordered[Locus] {
  def compare(that: Locus): Int = {
    var c = Contig.compare(contig, that.contig)
    if (c != 0)
      return c

    position.compare(that.position)
  }

  def toJSON: JValue = JObject(
    ("contig", JString(contig)),
    ("position", JInt(position)))

  override def toString: String = s"$contig:$position"
} 
开发者ID:Sun-shan,项目名称:Hail_V2,代码行数:37,代码来源:Locus.scala

示例9: DataFrameHelpersSpec

//设置package包名称以及导入依赖的类
package com.github.mrpowers.spark.daria.sql

import org.scalatest.FunSpec
import org.apache.spark.sql.types.{IntegerType, StringType}
import SparkSessionExt._

class DataFrameHelpersSpec
    extends FunSpec
    with SparkSessionTestWrapper {

  describe(".toArrayOfMaps") {

    it("converts a DataFrame into an array of maps") {

      val sourceDF = spark.createDF(
        List(
          ("doctor", 4, "high"),
          ("dentist", 10, "high")
        ), List(
          ("profession", StringType, true),
          ("some_number", IntegerType, true),
          ("pay_grade", StringType, true)
        )
      )

      val actual = DataFrameHelpers.toArrayOfMaps(sourceDF)

      val expected = Array(
        Map("profession" -> "doctor", "some_number" -> 4, "pay_grade" -> "high"),
        Map("profession" -> "dentist", "some_number" -> 10, "pay_grade" -> "high")
      )

      assert(actual === expected)

    }

  }

} 
开发者ID:MrPowers,项目名称:spark-daria,代码行数:40,代码来源:DataFrameHelpersSpec.scala

示例10: TestMetadataConstructor

//设置package包名称以及导入依赖的类
package com.springml.spark.salesforce.metadata

import org.apache.spark.sql.types.{StructType, StringType, IntegerType, LongType,
  FloatType, DateType, TimestampType, BooleanType, StructField}
import org.scalatest.FunSuite
import com.springml.spark.salesforce.Utils


class TestMetadataConstructor extends FunSuite {

  test("Test Metadata generation") {
    val columnNames = List("c1", "c2", "c3", "c4")
    val columnStruct = columnNames.map(colName => StructField(colName, StringType, true))
    val schema = StructType(columnStruct)

    val schemaString = MetadataConstructor.generateMetaString(schema,"sampleDataSet", Utils.metadataConfig(null))
    assert(schemaString.length > 0)
    assert(schemaString.contains("sampleDataSet"))
  }

  test("Test Metadata generation With Custom MetadataConfig") {
    val columnNames = List("c1", "c2", "c3", "c4")
    val intField = StructField("intCol", IntegerType, true)
    val longField = StructField("longCol", LongType, true)
    val floatField = StructField("floatCol", FloatType, true)
    val dateField = StructField("dateCol", DateType, true)
    val timestampField = StructField("timestampCol", TimestampType, true)
    val stringField = StructField("stringCol", StringType, true)
    val someTypeField = StructField("someTypeCol", BooleanType, true)

    val columnStruct = Array[StructField] (intField, longField, floatField, dateField, timestampField, stringField, someTypeField)

    val schema = StructType(columnStruct)

    var metadataConfig = Map("string" -> Map("wave_type" -> "Text"))
    metadataConfig += ("integer" -> Map("wave_type" -> "Numeric", "precision" -> "10", "scale" -> "0", "defaultValue" -> "100"))
    metadataConfig += ("float" -> Map("wave_type" -> "Numeric", "precision" -> "10", "scale" -> "2"))
    metadataConfig += ("long" -> Map("wave_type" -> "Numeric", "precision" -> "18", "scale" -> "0"))
    metadataConfig += ("date" -> Map("wave_type" -> "Date", "format" -> "yyyy/MM/dd"))
    metadataConfig += ("timestamp" -> Map("wave_type" -> "Date", "format" -> "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"))


    val schemaString = MetadataConstructor.generateMetaString(schema, "sampleDataSet", metadataConfig)
    assert(schemaString.length > 0)
    assert(schemaString.contains("sampleDataSet"))
    assert(schemaString.contains("Numeric"))
    assert(schemaString.contains("precision"))
    assert(schemaString.contains("scale"))
    assert(schemaString.contains("18"))
    assert(schemaString.contains("Text"))
    assert(schemaString.contains("Date"))
    assert(schemaString.contains("format"))
    assert(schemaString.contains("defaultValue"))
    assert(schemaString.contains("100"))
    assert(schemaString.contains("yyyy/MM/dd"))
    assert(schemaString.contains("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"))
  }
} 
开发者ID:kturgut,项目名称:spark-salesforce,代码行数:59,代码来源:TestMetadataConstructor.scala

示例11: SqlShiftMySQLDialect

//设置package包名称以及导入依赖的类
package com.goibibo.sqlshift.commons


import java.sql.Types
import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
import org.apache.spark.sql.types.{DataType, IntegerType, LongType, MetadataBuilder}


case object SqlShiftMySQLDialect extends JdbcDialect {

    override def canHandle(url: String): Boolean = url.startsWith("jdbc:mysql")

    override def getCatalystType(sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
        if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
            // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
            // byte arrays instead of longs.
            md.putLong("binarylong", 1)
            Option(LongType)
        } else if (typeName.equals("TINYINT")) {
            Option(IntegerType)
        } else None
    }

    override def quoteIdentifier(colName: String): String = {
        s"`$colName`"
    }

    override def getTableExistsQuery(table: String): String = {
        s"SELECT 1 FROM $table LIMIT 1"
    }

    def registerDialect(): Unit = {
        
    }
} 
开发者ID:goibibo,项目名称:SqlShift,代码行数:36,代码来源:SqlShiftMySQLDialect.scala


注:本文中的org.apache.spark.sql.types.IntegerType类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。