当前位置: 首页>>代码示例>>Scala>>正文


Scala Identifiable类代码示例

本文整理汇总了Scala中org.apache.spark.ml.util.Identifiable的典型用法代码示例。如果您正苦于以下问题:Scala Identifiable类的具体用法?Scala Identifiable怎么用?Scala Identifiable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Identifiable类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: MedicineProcess

//设置package包名称以及导入依赖的类
package cn.com.warlock.practice.ml

import java.io.BufferedReader
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}

import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}

import scala.collection.mutable.Set

class MedicineProcess(override val uid: String, private val dict: String)
  extends UnaryTransformer[Seq[String], Seq[String], MedicineProcess] {

  def this(dict: String) = this(Identifiable.randomUID("med"), dict)

  // ?????????
  private val wordsSet = loadDict

  // ????
  private def loadDict: Set[String] = {
    val br: BufferedReader = Files.newBufferedReader(Paths.get(dict), StandardCharsets.UTF_8)
    val words = Set[String]()

    var count = 0

    while (br.ready()) {
      words += br.readLine()
      count += 1
    }

    println(s"load med words: $count")

    words
  }

  override protected def createTransformFunc: Seq[String] => Seq[String] = (words: Seq[String]) => {
    // ?? "???", arr ?????????, c ??????? word
    words.foldLeft(List[String]())((arr, c) => {
      val newC = wordsSet.contains(c) match {
        case true => List(c, "_MED_")
        case false => List(c)
      }
      arr ++ newC
    })
  }

  override protected def validateInputType(inputType: DataType): Unit = {
    require(inputType.isInstanceOf[ArrayType],
      s"The input column must be ArrayType, but got $inputType.")
  }

  override protected def outputDataType: DataType = new ArrayType(StringType, true)

  override def copy(extra: ParamMap): MedicineProcess = defaultCopy(extra)
} 
开发者ID:warlock-china,项目名称:spark-meepo,代码行数:59,代码来源:MedicineProcess.scala

示例2: FeatureReducer

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.transformers

import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param._
import org.apache.spark.ml.util.{BasicParamsReadable, BasicParamsWritable, Identifiable}
import org.apache.spark.sql.types._


class FeatureReducer(override val uid: String)
  extends UnaryTransformer[String, String, FeatureReducer] with BasicParamsWritable {

  def this() = this(Identifiable.randomUID("featureReducer"))

  override protected def createTransformFunc: String => String = {
    raw =>
      val str = raw.toLowerCase()
        .replaceAll("https?:\\/\\/\\S*", "URL") // urls
        .replaceAll("\\[email protected]\\w*", "USERNAME") // @ mentions

      // Repeated letters
      "abcdefghijklmnopqrstuvwxyz".map(_.toString).fold(str) {
        (result, c) =>
          result.replaceAll(s"($c){2,}", s"$c$c")
      }
  }

  override protected def validateInputType(inputType: DataType): Unit = {
    require(inputType == StringType, s"Input type must be string type but got $inputType.")
  }

  override protected def outputDataType: DataType = StringType

  override def copy(extra: ParamMap): FeatureReducer = defaultCopy(extra)

}

object FeatureReducer extends BasicParamsReadable[FeatureReducer] {
  override def load(path: String): FeatureReducer = super.load(path)
} 
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:40,代码来源:FeatureReducer.scala

示例3: GloVe

//设置package包名称以及导入依赖的类
package org.apache.spark.ml.feature

import org.apache.spark.ml.Estimator
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.{DefaultParamsWritable, Identifiable}
import org.apache.spark.mllib.feature
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types.StructType

final class GloVe(override val uid: String)
  extends Estimator[GloVeModel] with GloVeBase with DefaultParamsWritable {

  def this() = this(Identifiable.randomUID("glove"))

  def setInputCol(value: String): this.type = set(inputCol, value)

  def setOutputCol(value: String): this.type = set(outputCol, value)

  def setDim(value: Int): this.type = set(dim, value)

  def setAlpha(value: Double): this.type = set(alpha, value)

  def setWindow(value: Int): this.type = set(window, value)

  def setStepSize(value: Double): this.type = set(stepSize, value)

  def setMaxIter(value: Int): this.type = set(maxIter, value)

  def setSeed(value: Long): this.type = set(seed, value)

  def setMinCount(value: Int): this.type = set(minCount, value)

  override def fit(dataset: Dataset[_]): GloVeModel = {
    transformSchema(dataset.schema, logging = true)
    val input = dataset.select($(inputCol)).rdd.map(_.getAs[Seq[String]](0))
    val wordVectors = new feature.GloVe()
      .setLearningRate($(stepSize))
      .setMinCount($(minCount))
      .setNumIterations($(maxIter))
      .setSeed($(seed))
      .setDim($(dim))
      .fit(input)
    copyValues(new GloVeModel(uid, wordVectors).setParent(this))
  }

  override def transformSchema(schema: StructType): StructType = {
    validateAndTransformSchema(schema)
  }

  override def copy(extra: ParamMap): GloVe = defaultCopy(extra)
} 
开发者ID:mdymczyk,项目名称:spark-miner,代码行数:52,代码来源:GloVe.scala


注:本文中的org.apache.spark.ml.util.Identifiable类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。