本文整理汇总了Scala中org.apache.log4j.Logger类的典型用法代码示例。如果您正苦于以下问题:Scala Logger类的具体用法?Scala Logger怎么用?Scala Logger使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了Logger类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: LinearRegressionPipeline
//设置package包名称以及导入依赖的类
package org.sparksamples.regression.bikesharing
import org.apache.log4j.Logger
import org.apache.spark.ml.Pipeline
import org.apache.spark.ml.feature.{VectorAssembler, VectorIndexer}
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.mllib.evaluation.RegressionMetrics
import org.apache.spark.sql.{DataFrame, SparkSession}
object LinearRegressionPipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def linearRegressionWithVectorFormat(vectorAssembler: VectorAssembler, vectorIndexer: VectorIndexer, dataFrame: DataFrame) = {
val lr = new LinearRegression()
.setFeaturesCol("features")
.setLabelCol("label")
.setRegParam(0.1)
.setElasticNetParam(1.0)
.setMaxIter(10)
val pipeline = new Pipeline().setStages(Array(vectorAssembler, vectorIndexer, lr))
val Array(training, test) = dataFrame.randomSplit(Array(0.8, 0.2), seed = 12345)
val model = pipeline.fit(training)
val fullPredictions = model.transform(test).cache()
val predictions = fullPredictions.select("prediction").rdd.map(_.getDouble(0))
val labels = fullPredictions.select("label").rdd.map(_.getDouble(0))
val RMSE = new RegressionMetrics(predictions.zip(labels)).rootMeanSquaredError
println(s" Root mean squared error (RMSE): $RMSE")
}
def linearRegressionWithSVMFormat(spark: SparkSession) = {
// Load training data
val training = spark.read.format("libsvm")
.load("./src/main/scala/org/sparksamples/regression/dataset/BikeSharing/lsvmHours.txt")
val lr = new LinearRegression()
.setMaxIter(10)
.setRegParam(0.3)
.setElasticNetParam(0.8)
// Fit the model
val lrModel = lr.fit(training)
// Print the coefficients and intercept for linear regression
println(s"Coefficients: ${lrModel.coefficients} Intercept: ${lrModel.intercept}")
// Summarize the model over the training set and print out some metrics
val trainingSummary = lrModel.summary
println(s"numIterations: ${trainingSummary.totalIterations}")
println(s"objectiveHistory: ${trainingSummary.objectiveHistory.toList}")
trainingSummary.residuals.show()
println(s"RMSE: ${trainingSummary.rootMeanSquaredError}")
println(s"r2: ${trainingSummary.r2}")
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:61,代码来源:LinearRegressionPipeline.scala
示例2: MyApp
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.SparkConf
import org.apache.log4j.{Level, Logger}
object MyApp {
def main(args: Array[String]) {
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
val file = args(0)
val conf = new SparkConf(true).setAppName("My Application")
val sc = new SparkContext(conf)
val tf = sc.textFile(file,2)
val splits = tf.flatMap(line => line.split(" ")).map(word =>(word,1))
val counts = splits.reduceByKey((x,y)=>x+y)
System.out.println(counts.collect().mkString(", "))
sc.stop()
}
}
示例3: KMeansCases
//设置package包名称以及导入依赖的类
import org.apache.spark.SparkContext
import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
import org.apache.spark.mllib.linalg.Vectors
import org.apache.log4j.{Level, Logger}
class KMeansCases(sc: SparkContext, dataFile: String, numOfCenters: Int, maxIterations:Int) {
//hide logger from console
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
val data = sc.textFile(dataFile)
val parsedData = data.map(s => Vectors.dense(s.split('\t').map(_.toDouble))).cache()
def KMeansInitialCenters() = {
val initStartTime = System.nanoTime()
val centers = new KMeansInitialization().run(sc, dataFile, numOfCenters)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization to find centers took " + "%.3f".format(initTimeInSeconds) + " seconds.")
val initStartTime1 = System.nanoTime()
val model = new KMeansModel(centers)
val clusterModel = new KMeans().setK(numOfCenters).setMaxIterations(maxIterations).setInitialModel(model).run(parsedData)
val initTimeInSeconds1 = (System.nanoTime() - initStartTime1) / 1e9
println(s"Initialization with custom took " + "%.3f".format(initTimeInSeconds1) + " seconds.")
println("\nnumber of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansParallel() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.K_MEANS_PARALLEL)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeansParaller took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
def KMeansRandom() = {
val initStartTime = System.nanoTime()
val clusterModel = KMeans.train(parsedData, numOfCenters, maxIterations, 1, KMeans.RANDOM)
val initTimeInSeconds = (System.nanoTime() - initStartTime) / 1e9
println(s"Initialization with KMeasRandom took " + "%.3f".format(initTimeInSeconds) + " seconds.")
println("number of points per cluster")
clusterModel.predict(parsedData).map(x=>(x,1)).reduceByKey((a,b)=>a+b).foreach(x=>println(x._2))
}
}
示例4: driver
//设置package包名称以及导入依赖的类
import java.io._
import utils._
import SMOTE._
import org.apache.log4j.Logger
import org.apache.log4j.Level
import breeze.linalg._
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import scala.collection.mutable.{ArrayBuffer,Map}
object driver {
def main(args: Array[String]) {
val conf = new SparkConf()
val options = args.map { arg =>
arg.dropWhile(_ == '-').split('=') match {
case Array(opt, v) => (opt -> v)
case Array(opt) => (opt -> "")
case _ => throw new IllegalArgumentException("Invalid argument: "+arg)
}
}.toMap
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
val sc = new SparkContext(conf)
// read in general inputs
val inputDirectory = options.getOrElse("inputDirectory","")
val outputDirectory = options.getOrElse("outputDirectory","")
val numFeatures = options.getOrElse("numFeatures","0").toInt
val oversamplingPctg = options.getOrElse("oversamplingPctg","1.0").toDouble
val kNN = options.getOrElse("K","5").toInt
val delimiter = options.getOrElse("delimiter",",")
val numPartitions = options.getOrElse("numPartitions","20").toInt
SMOTE.runSMOTE(sc, inputDirectory, outputDirectory, numFeatures, oversamplingPctg, kNN, delimiter, numPartitions)
println("The algorithm has finished running")
sc.stop()
}
}
示例5: info
//设置package包名称以及导入依赖的类
package com.crawler.logger
import java.beans.Transient
import net.logstash.log4j.JSONEventLayoutV1
import org.apache.log4j.Logger
import org.joda.time.DateTime
import scala.collection.mutable
trait CrawlerLogger {
def info(string: Any)
def debug(string: Any)
def trace(string: Any)
def error(string: Any)
def warning(string: Any)
}
object CrawlerLoggerFactory {
private val loggers = mutable.HashMap[String, CrawlerLogger]()
def logger(appname: String, folder: String = "apps") = synchronized {
if (loggers.contains(appname)) loggers(appname)
else {
val newLogger = new CrawlerLoggerLog4j(appname, s"$folder/$appname")
loggers(appname) = newLogger
newLogger
}
}
}
class CrawlerLoggerLog4j(loggerName: String, filename: String) extends CrawlerLogger {
@Transient val logger = Logger.getLogger(loggerName)
logger.addAppender(new org.apache.log4j.RollingFileAppender(new JSONEventLayoutV1(), s"logs/$filename-${new DateTime().toString("yyyy-MM-dd-HH-mm")}"))
override def info(message: Any) = logger.info(message)
override def debug(message: Any) = logger.debug(message)
override def trace(message: Any) = logger.trace(message)
override def error(message: Any) = logger.error(message)
override def warning(message: Any) = logger.warn(message)
}
示例6: DecisionTreePipeline
//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier
import org.apache.log4j.Logger
import org.apache.spark.ml.classification.DecisionTreeClassifier
import org.apache.spark.ml.evaluation.MulticlassClassificationEvaluator
import org.apache.spark.ml.feature.{StringIndexer, VectorAssembler}
import org.apache.spark.ml.{Pipeline, PipelineStage}
import org.apache.spark.sql.DataFrame
import scala.collection.mutable
object DecisionTreePipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def decisionTreePipeline(vectorAssembler: VectorAssembler, dataFrame: DataFrame) = {
val Array(training, test) = dataFrame.randomSplit(Array(0.9, 0.1), seed = 12345)
// Set up Pipeline
val stages = new mutable.ArrayBuffer[PipelineStage]()
val labelIndexer = new StringIndexer()
.setInputCol("label")
.setOutputCol("indexedLabel")
stages += labelIndexer
val dt = new DecisionTreeClassifier()
.setFeaturesCol(vectorAssembler.getOutputCol)
.setLabelCol("indexedLabel")
.setMaxDepth(5)
.setMaxBins(32)
.setMinInstancesPerNode(1)
.setMinInfoGain(0.0)
.setCacheNodeIds(false)
.setCheckpointInterval(10)
stages += vectorAssembler
stages += dt
val pipeline = new Pipeline().setStages(stages.toArray)
// Fit the Pipeline
val startTime = System.nanoTime()
//val model = pipeline.fit(training)
val model = pipeline.fit(dataFrame)
val elapsedTime = (System.nanoTime() - startTime) / 1e9
println(s"Training time: $elapsedTime seconds")
//val holdout = model.transform(test).select("prediction","label")
val holdout = model.transform(dataFrame).select("prediction","label")
// Select (prediction, true label) and compute test error
val evaluator = new MulticlassClassificationEvaluator()
.setLabelCol("label")
.setPredictionCol("prediction")
.setMetricName("accuracy")
val mAccuracy = evaluator.evaluate(holdout)
println("Test set accuracy = " + mAccuracy)
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:60,代码来源:DecisionTreePipeline.scala
示例7: SVMPipeline
//设置package包名称以及导入依赖的类
package org.stumbleuponclassifier
import org.apache.log4j.Logger
import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.SVMWithSGD
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
object SVMPipeline {
@transient lazy val logger = Logger.getLogger(getClass.getName)
def svmPipeline(sc: SparkContext) = {
val records = sc.textFile("/home/ubuntu/work/ml-resources/spark-ml/train_noheader.tsv").map(line => line.split("\t"))
val data = records.map { r =>
val trimmed = r.map(_.replaceAll("\"", ""))
val label = trimmed(r.size - 1).toInt
val features = trimmed.slice(4, r.size - 1).map(d => if (d == "?") 0.0 else d.toDouble)
LabeledPoint(label, Vectors.dense(features))
}
// params for SVM
val numIterations = 10
// Run training algorithm to build the model
val svmModel = SVMWithSGD.train(data, numIterations)
// Clear the default threshold.
svmModel.clearThreshold()
val svmTotalCorrect = data.map { point =>
if(svmModel.predict(point.features) == point.label) 1 else 0
}.sum()
// calculate accuracy
val svmAccuracy = svmTotalCorrect / data.count()
println(svmAccuracy)
}
}
开发者ID:PacktPublishing,项目名称:Machine-Learning-with-Spark-Second-Edition,代码行数:42,代码来源:SVMPipeline.scala
示例8: LogUtils
//设置package包名称以及导入依赖的类
package org.iamShantanu101.spark.SentimentAnalyzer.utils
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{Logging, SparkContext}
object LogUtils extends Logging {
def setLogLevels(sparkContext: SparkContext) {
sparkContext.setLogLevel(Level.WARN.toString)
val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
if (!log4jInitialized) {
logInfo(
"""Setting log level to [WARN] for streaming executions.
|To override add a custom log4j.properties to the classpath.""".stripMargin)
Logger.getRootLogger.setLevel(Level.WARN)
}
}
}
示例9: CSVUtil
//设置package包名称以及导入依赖的类
package com.springml.spark.netsuite.util
import com.springml.spark.netsuite.model.XPathInput
import org.apache.log4j.Logger
object CSVUtil {
@transient val logger = Logger.getLogger(this.getClass.getName)
def readCSV(csvLocation : String) : Map[String, String] = {
var resultMap : Map[String, String] = Map.empty
if (csvLocation != null) {
val bufferedSource = scala.io.Source.fromFile(csvLocation)
for (line <- bufferedSource.getLines) {
if (!line.startsWith("#")) {
val cols = line.split(",").map(_.trim)
if (cols.length != 2) {
throw new Exception("Invalid Row : " + line + "\n Please make sure rows are specified as 'Key','Value' in " + csvLocation)
}
resultMap += cols(0) -> cols(1)
} else {
logger.info("Ignoring commented line " + line)
}
}
bufferedSource.close()
}
resultMap
}
}
示例10: Logging
//设置package包名称以及导入依赖的类
package org.hpi.esb.commons.util
import org.apache.log4j.{Level, Logger}
trait Logging {
var logger: Logger = Logger.getLogger("senskaLogger")
}
object Logging {
def setToInfo() {
Logger.getRootLogger.setLevel(Level.INFO)
}
def setToDebug() {
Logger.getRootLogger.setLevel(Level.DEBUG)
}
}
示例11: PrepArgParser
//设置package包名称以及导入依赖的类
import org.apache.log4j.{Level, Logger}
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.{SparkConf, SparkContext}
import utils.Utils
import scala.util.Try
import scalax.file.Path
class PrepArgParser(arguments: Seq[String]) extends org.rogach.scallop.ScallopConf(arguments) {
val dataset = opt[String](required = true, short = 'd',
descr = "absolute address of the libsvm dataset. This must be provided.")
val partitions = opt[Int](required = false, default = Some(4), short = 'p', validate = (0 <),
descr = "Number of spark partitions to be used. Optional.")
val dir = opt[String](required = true, default = Some("../results/"), short = 'w', descr = "working directory where results " +
"are stored. Default is \"../results\". ")
val method = opt[String](required = true, short = 'm',
descr = "Method can be either \"Regression\" or \"Classification\". This must be provided")
verify()
}
object PrepareData {
def main(args: Array[String]) {
//Spark conf
val conf = new SparkConf().setAppName("Distributed Machine Learning").setMaster("local[*]")
val sc = new SparkContext(conf)
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
//Turn off logs
val rootLogger = Logger.getRootLogger()
rootLogger.setLevel(Level.ERROR)
//Parse arguments
val parser = new PrepArgParser(args)
val dataset = parser.dataset()
var workingDir = parser.dir()
val numPartitions = parser.partitions()
val method = parser.method()
//Load data
val (train, test) = method match {
case "Classification" => Utils.loadAbsolutLibSVMBinaryClassification(dataset, numPartitions, sc)
case "Regression" => Utils.loadAbsolutLibSVMRegression(dataset, numPartitions, sc)
case _ => throw new IllegalArgumentException("The method " + method + " is not supported.")
}
// append "/" to workingDir if necessary
workingDir = workingDir + ( if (workingDir.takeRight(1) != "/") "/" else "" )
val trainPath: Path = Path.fromString(workingDir + "train")
Try(trainPath.deleteRecursively(continueOnFailure = false))
val testPath: Path = Path.fromString(workingDir + "test")
Try(testPath.deleteRecursively(continueOnFailure = false))
MLUtils.saveAsLibSVMFile(train, workingDir + "train")
MLUtils.saveAsLibSVMFile(test, workingDir + "test")
}
}
示例12: SparkPi
//设置package包名称以及导入依赖的类
package com.zmyuan.spark.submit
import kafka.utils.Logging
import org.apache.log4j.Logger
import scala.math.random
import org.apache.spark.sql.SparkSession
object SparkPi {
val loggerName = this.getClass.getName
lazy val logger = Logger.getLogger(loggerName)
def main(args: Array[String]) {
val spark = SparkSession
.builder
.appName("Spark Pi")
.getOrCreate()
val slices = if (args.length > 0) args(0).toInt else 2
val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow
val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
val x = random * 2 - 1
val y = random * 2 - 1
if (x*x + y*y < 1) 1 else 0
}.reduce(_ + _)
logger.info("Pi is roughly " + 4.0 * count / (n - 1))
spark.stop()
}
}
// scalastyle:on println
示例13: Helper
//设置package包名称以及导入依赖的类
import org.apache.log4j.Logger
import org.apache.log4j.Level
import scala.collection.mutable.HashMap
object Helper {
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger
.getLogger("org.apache.spark.storage.BlockManager")
.setLevel(Level.ERROR)
def configureTwitterCredentials(apiKey: String,
apiSecret: String,
accessToken: String,
accessTokenSecret: String) {
val configs = Map(
"apiKey" -> apiKey,
"apiSecret" -> apiSecret,
"accessToken" -> accessToken,
"accessTokenSecret" -> accessTokenSecret
)
println("Configuring Twitter OAuth")
configs.foreach {
case (key, value) =>
if (value.trim.isEmpty) {
throw new Exception(
"Error setting authentication - value for " + key + " not set -> value is " + value)
}
val fullKey = "twitter4j.oauth." + key.replace("api", "consumer")
System.setProperty(fullKey, value.trim)
println("\tProperty " + fullKey + " set as [" + value.trim + "]")
}
println()
}
}
示例14: ModelBuilder
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.sql.{DataFrame, SparkSession}
import scala.collection.parallel.mutable.ParArray
object ModelBuilder {
val log: Logger = LogManager.getLogger(ModelBuilder.getClass)
def buildModels(spark: SparkSession,
modelsOutputDir: Option[File],
sites: ParArray[String],
featureData: DataFrame): Unit = {
log.info("Building Models")
sites.foreach(target =>
try {
log.info("Building model for " + target)
log.info("Getting work data for " + target)
val workData: DataFrame = Utils.getWorkData(spark, featureData, target)
val Array(trainingData, testData) = workData.randomSplit(Array(0.7, 0.3))
log.info("Training model for " + target)
val model = Utils.REGRESSOR.fit(trainingData)
log.info("Writing model to file for " + target)
modelsOutputDir.foreach(o => model.write.save(new File(o, target).getAbsolutePath))
log.info("Testing model for " + target)
val predictions = model.transform(testData)
val rmse = Utils.EVALUATOR.evaluate(predictions)
log.info("Root Mean Squared Error (RMSE) on test data for " + target + " = " + rmse)
} catch {
case unknown: Throwable => log.error("Build model for " + target + " failed", unknown)
}
)
}
}
示例15: ScorePredictor
//设置package包名称以及导入依赖的类
package org.wikimedia.research.recommendation.job.translation
import java.io.File
import org.apache.log4j.{LogManager, Logger}
import org.apache.spark.ml.regression.RandomForestRegressionModel
import org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}
import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession}
import scala.collection.parallel.mutable.ParArray
object ScorePredictor {
val log: Logger = LogManager.getLogger(ScorePredictor.getClass)
def predictScores(spark: SparkSession,
modelsInputDir: File,
predictionsOutputDir: Option[File],
sites: ParArray[String],
featureData: DataFrame): Unit = {
log.info("Scoring items")
val predictions: Array[DataFrame] = sites.map(target => {
try {
log.info("Scoring for " + target)
log.info("Getting work data for " + target)
val workData: DataFrame = Utils.getWorkData(spark, featureData, target, exists = false)
log.info("Loading model for " + target)
val model = RandomForestRegressionModel.load(
new File(modelsInputDir, target).getAbsolutePath)
log.info("Scoring data for " + target)
val predictions = model
.setPredictionCol(target)
.transform(workData)
.select("id", target)
predictions
} catch {
case unknown: Throwable =>
log.error("Score for " + target + " failed", unknown)
val schema = StructType(Seq(
StructField("id", StringType, nullable = false),
StructField(target, DoubleType, nullable = true)))
spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
}
}).toArray
val predictedScores = predictions.reduce((left, right) => left.join(right, Seq("id"), "outer"))
log.info("Saving predictions")
predictionsOutputDir.foreach(f = o =>
predictedScores.coalesce(1)
.write
.mode(SaveMode.ErrorIfExists)
.option("header", value = true)
.option("compression", "bzip2")
.csv(new File(o, "allPredictions").getAbsolutePath))
}
}