当前位置: 首页>>代码示例>>Scala>>正文


Scala ArrayType类代码示例

本文整理汇总了Scala中org.apache.spark.sql.types.ArrayType的典型用法代码示例。如果您正苦于以下问题:Scala ArrayType类的具体用法?Scala ArrayType怎么用?Scala ArrayType使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ArrayType类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: MedicineProcess

//设置package包名称以及导入依赖的类
package cn.com.warlock.practice.ml

import java.io.BufferedReader
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths}

import org.apache.spark.ml.UnaryTransformer
import org.apache.spark.ml.param.ParamMap
import org.apache.spark.ml.util.Identifiable
import org.apache.spark.sql.types.{ArrayType, DataType, StringType}

import scala.collection.mutable.Set

class MedicineProcess(override val uid: String, private val dict: String)
  extends UnaryTransformer[Seq[String], Seq[String], MedicineProcess] {

  def this(dict: String) = this(Identifiable.randomUID("med"), dict)

  // ?????????
  private val wordsSet = loadDict

  // ????
  private def loadDict: Set[String] = {
    val br: BufferedReader = Files.newBufferedReader(Paths.get(dict), StandardCharsets.UTF_8)
    val words = Set[String]()

    var count = 0

    while (br.ready()) {
      words += br.readLine()
      count += 1
    }

    println(s"load med words: $count")

    words
  }

  override protected def createTransformFunc: Seq[String] => Seq[String] = (words: Seq[String]) => {
    // ?? "???", arr ?????????, c ??????? word
    words.foldLeft(List[String]())((arr, c) => {
      val newC = wordsSet.contains(c) match {
        case true => List(c, "_MED_")
        case false => List(c)
      }
      arr ++ newC
    })
  }

  override protected def validateInputType(inputType: DataType): Unit = {
    require(inputType.isInstanceOf[ArrayType],
      s"The input column must be ArrayType, but got $inputType.")
  }

  override protected def outputDataType: DataType = new ArrayType(StringType, true)

  override def copy(extra: ParamMap): MedicineProcess = defaultCopy(extra)
} 
开发者ID:warlock-china,项目名称:spark-meepo,代码行数:59,代码来源:MedicineProcess.scala

示例2: EmailModel

//设置package包名称以及导入依赖的类
package com.hugolinton.model

import java.util.UUID

import com.pff.PSTMessage
import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructType}


case class EmailModel (id : String, emailBody : String, sentTo : Array[String], ccTo : Array[String]) extends Product

object EmailModel {

  final val schema = StructType(Array(StructField("id", StringType, false),
    (StructField("emailBody", StringType, false)),
    (StructField("sentTo", ArrayType(StringType))),
    (StructField("ccTo", ArrayType(StringType)))
  ))

  def pstToModel(pst : PSTMessage) = {
    val uniqueID = UUID.randomUUID().toString();
    val to = pst.getDisplayTo.replaceAll("""[.,\/#!$%\^&\*;:{}=\-_`~()\s]""", "").toLowerCase.split(";")
    val cc = pst.getDisplayCC.replaceAll("""[.,\/#!$%\^&\*;:{}=\-_`~()\s]""", "").toLowerCase.split(";")
    EmailModel(uniqueID,safeGet(pst.getBody),to,cc)
  }


  def safeGet(field : String) : String = {
    if(field.isEmpty || field == null){
      return ""
    }
    field
  }
} 
开发者ID:HugoLinton,项目名称:PST-to-Parquet,代码行数:34,代码来源:EmailModel.scala


注:本文中的org.apache.spark.sql.types.ArrayType类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。