当前位置: 首页>>代码示例>>Scala>>正文


Scala AttributeGroup类代码示例

本文整理汇总了Scala中org.apache.spark.ml.attribute.AttributeGroup的典型用法代码示例。如果您正苦于以下问题:Scala AttributeGroup类的具体用法?Scala AttributeGroup怎么用?Scala AttributeGroup使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了AttributeGroup类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: TreeUtils

//设置package包名称以及导入依赖的类
package org.apache.spark.ml

import org.apache.spark.ml.attribute.{AttributeGroup, NominalAttribute, NumericAttribute}
import org.apache.spark.sql.DataFrame

object TreeUtils {
  
  def setMetadata(
      data: DataFrame,
      featuresColName: String,
      featureArity: Array[Int]): DataFrame = {
    val featuresAttributes = featureArity.zipWithIndex.map { case (arity: Int, feature: Int) =>
      if (arity > 0) {
        NominalAttribute.defaultAttr.withIndex(feature).withNumValues(arity)
      } else {
        NumericAttribute.defaultAttr.withIndex(feature)
      }
    }
    val featuresMetadata = new AttributeGroup("features", featuresAttributes).toMetadata()
    data.select(data(featuresColName).as(featuresColName, featuresMetadata))
  }
} 
开发者ID:summerDG,项目名称:spark-sql-perf,代码行数:23,代码来源:TreeUtils.scala

示例2: VectorSlicerJob

//设置package包名称以及导入依赖的类
import io.hydrosphere.mist.api._
import io.hydrosphere.mist.api.ml._
import java.util

import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute}
import org.apache.spark.ml.feature.VectorSlicer
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.sql.{Row, SparkSession}
import org.apache.spark.sql.types.StructType

object VectorSlicerJob extends MLMistJob{
  def session: SparkSession = SparkSession
    .builder()
    .appName(context.appName)
    .config(context.getConf)
    .getOrCreate()

  def train(savePath: String): Map[String, Any] = {
    val data = util.Arrays.asList(
      Row(Vectors.sparse(3, Seq((0, -2.0), (1, 2.3)))),
      Row(Vectors.dense(-2.0, 2.3, 0.0))
    )
    val defaultAttr = NumericAttribute.defaultAttr
    val attrs = Array("f1", "f2", "f3").map(defaultAttr.withName)
    val attrGroup = new AttributeGroup("userFeatures", attrs.asInstanceOf[Array[Attribute]])

    val df = session.createDataFrame(data, StructType(Array(attrGroup.toStructField())))

    val slicer = new VectorSlicer().setInputCol("userFeatures").setOutputCol("features")
    slicer.setIndices(Array(1)).setNames(Array("f3"))

    val pipeline = new Pipeline().setStages(Array(slicer))

    val model = pipeline.fit(df)

    model.write.overwrite().save(savePath)
    Map.empty[String, Any]
  }

  def serve(modelPath: String, features: List[List[Double]]): Map[String, Any] = {
    import LocalPipelineModel._

    val pipeline = PipelineLoader.load(modelPath)
    val data = LocalData(
      LocalDataColumn("userFeatures", features)
    )

    val result: LocalData = pipeline.transform(data)
    Map("result" -> result.select("userFeatures", "features").toMapList)
  }
} 
开发者ID:Hydrospheredata,项目名称:mist,代码行数:53,代码来源:VectorSlicerJob.scala

示例3: setFunction

//设置package包名称以及导入依赖的类
package spark.feature

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param.{ParamMap, _}
import org.apache.spark.ml.util._
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, UserDefinedFunction}


  def setFunction(value: String=>Double) = set(function, value)
  def getFunction() =  $(function)

  override def transform(dataset: DataFrame): DataFrame = {
    val outputSchema = transformSchema(dataset.schema)
    val metadata = outputSchema($(outputCol)).metadata
    val dummy = udf { x: Any => $(expr) }
    var data = dataset.select(col("*"), dummy(col($(inputCols).head)).as("0"))
    val substitute: (String => ((String, Double) => String)) = name => (exp, elem) => exp.replace(name, elem.toString)
    def subst(v: String) = udf(substitute(v))
    $(inputCols).view.zipWithIndex foreach { case (v, i) => data = data.select(col("*"), subst(v)(data(i.toString), data(v)).as((i + 1).toString)).drop(i.toString) }
    val eval = udf($(function))
    data.select(col("*"), eval(data($(inputCols).length.toString)).as($(outputCol), metadata)).drop($(inputCols).length.toString)
  }


  override def transformSchema(schema: StructType): StructType = {
    // TODO: Assertions on inputCols
    val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
    val col = attrGroup.toStructField()
    require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
    StructType(schema.fields :+ col)
  }

  override def copy(extra: ParamMap): FeatureFuTransformer = defaultCopy(extra)
} 
开发者ID:laxmanjangley,项目名称:FFrame,代码行数:38,代码来源:FeatureFuTransformer.scala

示例4: TreeUtils

//设置package包名称以及导入依赖的类
package org.apache.spark.ml

import org.apache.spark.ml.attribute.{AttributeGroup, NominalAttribute, NumericAttribute}
import org.apache.spark.sql.DataFrame

object TreeUtils {
  
  def setMetadata(
      data: DataFrame,
      labelColName: String,
      numClasses: Int,
      featuresColName: String,
      featureArity: Array[Int]): DataFrame = {
    val labelAttribute = if (numClasses == 0) {
      NumericAttribute.defaultAttr.withName(labelColName)
    } else {
      NominalAttribute.defaultAttr.withName(labelColName).withNumValues(numClasses)
    }
    val labelMetadata = labelAttribute.toMetadata()
    val featuresAttributes = featureArity.zipWithIndex.map { case (arity: Int, feature: Int) =>
      if (arity > 0) {
        NominalAttribute.defaultAttr.withIndex(feature).withNumValues(arity)
      } else {
        NumericAttribute.defaultAttr.withIndex(feature)
      }
    }
    val featuresMetadata = new AttributeGroup("features", featuresAttributes).toMetadata()
    data.select(data(featuresColName).as(featuresColName, featuresMetadata),
      data(labelColName).as(labelColName, labelMetadata))
  }
} 
开发者ID:sparkonpower,项目名称:spark-sql-perf-spark2.0.0,代码行数:32,代码来源:TreeUtils.scala


注:本文中的org.apache.spark.ml.attribute.AttributeGroup类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。