本文整理汇总了Scala中org.apache.spark.ml.UnaryTransformer类的典型用法代码示例。如果您正苦于以下问题:Scala UnaryTransformer类的具体用法?Scala UnaryTransformer怎么用?Scala UnaryTransformer使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了UnaryTransformer类的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: MedicineProcess
//设置package包名称以及导入依赖的类
package cn.com.warlock.practice.ml
import java.io.BufferedReader
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
import scala.collection.mutable.Set
class MedicineProcess(override val uid: String, private val dict: String)
extends UnaryTransformer[Seq[String], Seq[String], MedicineProcess] {
def this(dict: String) = this(Identifiable.randomUID("med"), dict)
// ?????????
private val wordsSet = loadDict
// ????
private def loadDict: Set[String] = {
val br: BufferedReader = Files.newBufferedReader(Paths.get(dict), StandardCharsets.UTF_8)
val words = Set[String]()
var count = 0
while (br.ready()) {
words += br.readLine()
count += 1
}
println(s"load med words: $count")
words
}
override protected def createTransformFunc: Seq[String] => Seq[String] = (words: Seq[String]) => {
// ?? "???", arr ?????????, c ??????? word
words.foldLeft(List[String]())((arr, c) => {
val newC = wordsSet.contains(c) match {
case true => List(c, "_MED_")
case false => List(c)
}
arr ++ newC
})
}
override protected def validateInputType(inputType: DataType): Unit = {
require(inputType.isInstanceOf[ArrayType],
s"The input column must be ArrayType, but got $inputType.")
}
override protected def outputDataType: DataType = new ArrayType(StringType, true)
override def copy(extra: ParamMap): MedicineProcess = defaultCopy(extra)
}
示例2: buildAnalyzer
//设置package包名称以及导入依赖的类
package com.sparklingpandas.sparklingml.feature
import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.sql.Dataset
import org.apache.spark.sql.types._
import org.apache.lucene.analysis.Analyzer
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute
@DeveloperApi
trait LuceneTransformer[T <:LuceneTransformer[T]]
extends UnaryTransformer[String, Array[String], T] {
// Implement this function to construct an analyzer based on the provided settings.
def buildAnalyzer(): Analyzer
override def outputDataType: DataType = ArrayType(StringType)
override def validateInputType(inputType: DataType): Unit = {
require(inputType.isInstanceOf[StringType],
s"The input column must be StringType, but got $inputType.")
}
override def createTransformFunc: String => Array[String] = {
val analyzer = buildAnalyzer()
(inputText: String) => {
val inputStream = analyzer.tokenStream($(inputCol), inputText)
val builder = Array.newBuilder[String]
val charTermAttr = inputStream.addAttribute(classOf[CharTermAttribute])
inputStream.reset()
while (inputStream.incrementToken) builder += charTermAttr.toString
inputStream.end()
inputStream.close()
builder.result()
}
}
}
示例3: FeatureReducer
//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.transformers
import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param._
import org.apache.spark.ml.util.{BasicParamsReadable, BasicParamsWritable, Identifiable}
import org.apache.spark.sql.types._
class FeatureReducer(override val uid: String)
extends UnaryTransformer[String, String, FeatureReducer] with BasicParamsWritable {
def this() = this(Identifiable.randomUID("featureReducer"))
override protected def createTransformFunc: String => String = {
raw =>
val str = raw.toLowerCase()
.replaceAll("https?:\\/\\/\\S*", "URL") // urls
.replaceAll("\\[email protected]\\w*", "USERNAME") // @ mentions
// Repeated letters
"abcdefghijklmnopqrstuvwxyz".map(_.toString).fold(str) {
(result, c) =>
result.replaceAll(s"($c){2,}", s"$c$c")
}
}
override protected def validateInputType(inputType: DataType): Unit = {
require(inputType == StringType, s"Input type must be string type but got $inputType.")
}
override protected def outputDataType: DataType = StringType
override def copy(extra: ParamMap): FeatureReducer = defaultCopy(extra)
}
object FeatureReducer extends BasicParamsReadable[FeatureReducer] {
override def load(path: String): FeatureReducer = super.load(path)
}