当前位置: 首页>>代码示例>>Scala>>正文


Scala ParamMap类代码示例

本文整理汇总了Scala中org.apache.spark.ml.param.ParamMap的典型用法代码示例。如果您正苦于以下问题:Scala ParamMap类的具体用法?Scala ParamMap怎么用?Scala ParamMap使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ParamMap类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: MedicineProcess

//设置package包名称以及导入依赖的类
package cn.com.warlock.practice.ml

import java.io.BufferedReader
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}

import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}

import scala.collection.mutable.Set

class MedicineProcess(override val uid: String, private val dict: String)
  extends UnaryTransformer[Seq[String], Seq[String], MedicineProcess] {

  def this(dict: String) = this(Identifiable.randomUID("med"), dict)

  // ?????????
  private val wordsSet = loadDict

  // ????
  private def loadDict: Set[String] = {
    val br: BufferedReader = Files.newBufferedReader(Paths.get(dict), StandardCharsets.UTF_8)
    val words = Set[String]()

    var count = 0

    while (br.ready()) {
      words += br.readLine()
      count += 1
    }

    println(s"load med words: $count")

    words
  }

  override protected def createTransformFunc: Seq[String] => Seq[String] = (words: Seq[String]) => {
    // ?? "???", arr ?????????, c ??????? word
    words.foldLeft(List[String]())((arr, c) => {
      val newC = wordsSet.contains(c) match {
        case true => List(c, "_MED_")
        case false => List(c)
      }
      arr ++ newC
    })
  }

  override protected def validateInputType(inputType: DataType): Unit = {
    require(inputType.isInstanceOf[ArrayType],
      s"The input column must be ArrayType, but got $inputType.")
  }

  override protected def outputDataType: DataType = new ArrayType(StringType, true)

  override def copy(extra: ParamMap): MedicineProcess = defaultCopy(extra)
} 
开发者ID:warlock-china,项目名称:spark-meepo,代码行数:59,代码来源:MedicineProcess.scala

示例2: GloVe

//设置package包名称以及导入依赖的类
package org.apache.spark.ml.feature

import org.apache.spark.ml.Estimator
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable}
import org.apache.spark.mllib.feature
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.StructType

final class GloVe(override val uid: String)
  extends Estimator[GloVeModel] with GloVeBase with DefaultParamsWritable {

  def this() = this(Identifiable.randomUID("glove"))

  def setInputCol(value: String): this.type = set(inputCol, value)

  def setOutputCol(value: String): this.type = set(outputCol, value)

  def setDim(value: Int): this.type = set(dim, value)

  def setAlpha(value: Double): this.type = set(alpha, value)

  def setWindow(value: Int): this.type = set(window, value)

  def setStepSize(value: Double): this.type = set(stepSize, value)

  def setMaxIter(value: Int): this.type = set(maxIter, value)

  def setSeed(value: Long): this.type = set(seed, value)

  def setMinCount(value: Int): this.type = set(minCount, value)

  override def fit(dataset: Dataset[_]): GloVeModel = {
    transformSchema(dataset.schema, logging = true)
    val input = dataset.select($(inputCol)).rdd.map(_.getAs[Seq[String]](0))
    val wordVectors = new feature.GloVe()
      .setLearningRate($(stepSize))
      .setMinCount($(minCount))
      .setNumIterations($(maxIter))
      .setSeed($(seed))
      .setDim($(dim))
      .fit(input)
    copyValues(new GloVeModel(uid, wordVectors).setParent(this))
  }

  override def transformSchema(schema: StructType): StructType = {
    validateAndTransformSchema(schema)
  }

  override def copy(extra: ParamMap): GloVe = defaultCopy(extra)
} 
开发者ID:mdymczyk,项目名称:spark-miner,代码行数:52,代码来源:GloVe.scala

示例3: setFunction

//设置package包名称以及导入依赖的类
package spark.feature

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param.{ParamMap, _}
import org.apache.spark.ml.util._
import org.apache.spark.sql.functions.{col, udf}
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, UserDefinedFunction}


  def setFunction(value: String=>Double) = set(function, value)
  def getFunction() =  $(function)

  override def transform(dataset: DataFrame): DataFrame = {
    val outputSchema = transformSchema(dataset.schema)
    val metadata = outputSchema($(outputCol)).metadata
    val dummy = udf { x: Any => $(expr) }
    var data = dataset.select(col("*"), dummy(col($(inputCols).head)).as("0"))
    val substitute: (String => ((String, Double) => String)) = name => (exp, elem) => exp.replace(name, elem.toString)
    def subst(v: String) = udf(substitute(v))
    $(inputCols).view.zipWithIndex foreach { case (v, i) => data = data.select(col("*"), subst(v)(data(i.toString), data(v)).as((i + 1).toString)).drop(i.toString) }
    val eval = udf($(function))
    data.select(col("*"), eval(data($(inputCols).length.toString)).as($(outputCol), metadata)).drop($(inputCols).length.toString)
  }


  override def transformSchema(schema: StructType): StructType = {
    // TODO: Assertions on inputCols
    val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
    val col = attrGroup.toStructField()
    require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
    StructType(schema.fields :+ col)
  }

  override def copy(extra: ParamMap): FeatureFuTransformer = defaultCopy(extra)
} 
开发者ID:laxmanjangley,项目名称:FFrame,代码行数:38,代码来源:FeatureFuTransformer.scala

示例4: getOutputCol

//设置package包名称以及导入依赖的类
package spark.progress

import org.apache.spark.ml.Transformer
import org.apache.spark.ml.attribute.AttributeGroup
import org.apache.spark.ml.param.{ParamMap, _}
import org.apache.spark.ml.util._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.{DataFrame, Row}


	def getOutputCol() = $(outputcol)

	def getExpr() = $(expr)

	def getInputCols() = $(inputcols).toArray

	def getNumFeatures() = $(numFeatures)

	def getFunction() = $(function)

	override def transform(dataset: DataFrame): DataFrame = {
		val outputSchema = transformSchema(dataset.schema)
		val metadata = outputSchema($(outputcol)).metadata
		val f = udf {(r: Row) => {
			val exp = $(expr)
			for (i <- 1 to $(numFeatures)) {
				exp.replace(dataset.columns.toSeq(i), r.getInt(i).toString)
			}
			$(function)(exp)
		}}
		val x = lit($(expr))
		dataset.select(col("*"), f(struct(dataset.columns.map(dataset(_)) : _*)).as($(outputcol), metadata))
	}


	override def transformSchema(schema: StructType): StructType = {
		val attrGroup = new AttributeGroup($(outputcol), $(numFeatures))
		val col = attrGroup.toStructField()
		require(!schema.fieldNames.contains(col.name), s"Column ${col.name} already exists.")
		StructType(schema.fields :+ col)
	}

	override def copy(extra: ParamMap): ExpressionEval = defaultCopy(extra)
} 
开发者ID:laxmanjangley,项目名称:FFrame,代码行数:46,代码来源:ExpressionEval.scala


注:本文中的org.apache.spark.ml.param.ParamMap类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。