当前位置: 首页>>代码示例>>Scala>>正文


Scala Path类代码示例

本文整理汇总了Scala中org.apache.hadoop.fs.Path的典型用法代码示例。如果您正苦于以下问题:Scala Path类的具体用法?Scala Path怎么用?Scala Path使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了Path类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: HdfsUtils

//设置package包名称以及导入依赖的类
package com.appleeye.spark.utils

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.SparkContext

object HdfsUtils {
  def pathExists(path: String, sc: SparkContext): Boolean = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    fs.exists(new Path(path))
  }

  def getFullPath(path:String, sc: SparkContext): String = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    fs.getFileStatus(new Path(path)).getPath().toString
  }

  def getAllFiles(path:String, sc: SparkContext): Seq[String] = {
    val conf = sc.hadoopConfiguration
    val fs = FileSystem.get(conf)
    val files = fs.listStatus(new Path(path))
    files.map(_.getPath().toString)
  }
} 
开发者ID:MiracleZhou,项目名称:SparkStreaming,代码行数:26,代码来源:HdfsUtils.scala

示例2: HdfsClientConf

//设置package包名称以及导入依赖的类
package pub.ayada.scala.hdfsutils

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.fs.FileSystem

class HdfsClientConf private (val coreStiteXMLPath: String, val hdfsStiteXMLPath: String) {
    private val conf = new Configuration();
    conf.addResource(new Path(coreStiteXMLPath));
    conf.addResource(new Path(hdfsStiteXMLPath));

    def getHdfsClientConf(): Configuration = conf

    def getHdpFileSystem(): FileSystem = FileSystem.get(conf);
}
object HdfsClientConf {

    private var instance: HdfsClientConf = null

    def getOneTimeInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
        new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath).getHdfsClientConf()
    }
    def setSingltonInstance(coreStiteXMLPath: String, hdfsStiteXMLPath: String): Configuration = {
        if (instance == null)
            instance = new HdfsClientConf(coreStiteXMLPath, hdfsStiteXMLPath)

        instance.getHdfsClientConf()
    }
    def getSingletonInstance(): HdfsClientConf = {
        if (instance == null)
            throw new NullPointerException("Instanciate HdfsClientConf before retriving")

        instance
    }
} 
开发者ID:k-ayada,项目名称:HdfsUtils,代码行数:36,代码来源:HdfsClientConf.scala

示例3: HDFS

//设置package包名称以及导入依赖的类
package org.mireynol.util

import java.io.{BufferedInputStream, OutputStreamWriter}

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.slf4j.{Logger, LoggerFactory}

import scala.collection.mutable.ListBuffer
import scala.io.Source

object HDFS {

  def log : Logger = LoggerFactory.getLogger( HDFS.getClass )

  val hadoop : FileSystem = {
    val conf = new Configuration( )
    conf.set( "fs.defaultFS", "hdfs://localhost:9000" )
    FileSystem.get( conf )
  }

  def readAndMap( path : String, mapper : ( String ) => Unit ) = {
    if ( hadoop.exists( new Path( path ) ) ) {
      val is = new BufferedInputStream( hadoop.open( new Path( path ) ) )
      Source.fromInputStream( is ).getLines( ).foreach( mapper )
    }
    else {
      // TODO - error logic here
    }
  }

  def write( filename : String, content : Iterator[ String ] ) = {
    val path = new Path( filename )
    val out = new OutputStreamWriter( hadoop.create( path, false ) )
    content.foreach( str => out.write( str + "\n" ) )
    out.flush( )
    out.close( )
  }

  def ls( path : String ) : List[ String ] = {
    val files = hadoop.listFiles( new Path( path ), false )
    val filenames = ListBuffer[ String ]( )
    while ( files.hasNext ) filenames += files.next( ).getPath( ).toString( )
    filenames.toList
  }

  def rm( path : String, recursive : Boolean ) : Unit = {
    if ( hadoop.exists( new Path( path ) ) ) {
      println( "deleting file : " + path )
      hadoop.delete( new Path( path ), recursive )
    }
    else {
      println( "File/Directory" + path + " does not exist" )
      log.warn( "File/Directory" + path + " does not exist" )
    }
  }

  def cat( path : String ) = Source.fromInputStream( hadoop.open( new Path( path ) ) ).getLines( ).foreach( println )

} 
开发者ID:reynoldsm88,项目名称:spark-drools,代码行数:61,代码来源:HDFS.scala

示例4: RedditVariationApp

//设置package包名称以及导入依赖的类
package org.apress.prospark

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.streaming.{ Milliseconds, Seconds, StreamingContext }
import org.apache.hadoop.io.{ Text, LongWritable, IntWritable }
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat
import org.apache.spark.streaming.dstream.DStream
import org.apache.hadoop.mapred.TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.{ TextOutputFormat => NewTextOutputFormat }
import org.apache.spark.streaming.dstream.PairDStreamFunctions
import org.apache.log4j.LogManager
import org.json4s._
import org.json4s.native.JsonMethods._
import java.text.SimpleDateFormat
import java.util.Date

object RedditVariationApp {
  def main(args: Array[String]) {
    if (args.length != 2) {
      System.err.println(
        "Usage: RedditVariationApp <appname> <input_path>")
      System.exit(1)
    }
    val Seq(appName, inputPath) = args.toSeq
    val LOG = LogManager.getLogger(this.getClass)

    val conf = new SparkConf()
      .setAppName(appName)
      .setJars(SparkContext.jarOfClass(this.getClass).toSeq)

    val ssc = new StreamingContext(conf, Seconds(1))
    LOG.info("Started at %d".format(ssc.sparkContext.startTime))

    val comments = ssc.fileStream[LongWritable, Text, TextInputFormat](inputPath, (f: Path) => true, newFilesOnly = false).map(pair => pair._2.toString)

    val merged = comments.union(comments)

    val repartitionedComments = comments.repartition(4)

    val rddMin = comments.glom().map(arr =>
      arr.minBy(rec => ((parse(rec) \ "created_utc").values.toString.toInt)))

    ssc.start()
    ssc.awaitTermination()

  }
} 
开发者ID:ZubairNabi,项目名称:prosparkstreaming,代码行数:50,代码来源:L3-DStreamVariation.scala

示例5: list

//设置package包名称以及导入依赖的类
package uk.co.odinconsultants.bitcoin.integration.hadoop

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.hdfs.DistributedFileSystem
import uk.co.odinconsultants.bitcoin.core.Logging
import uk.co.odinconsultants.bitcoin.integration.hadoop.HadoopForTesting.hdfsCluster

import scala.collection.mutable.ArrayBuffer

trait MiniHadoopClusterRunning extends Logging {

  val distributedFS: DistributedFileSystem  = hdfsCluster.getFileSystem
  val conf: Configuration                   = HadoopForTesting.conf
  val dir                                   = s"/${this.getClass.getSimpleName}/"

  def list(path: String): List[Path] = {
    info(s"Looking in $path")

    val files = distributedFS.listFiles(new Path(path), true)

    val allPaths = ArrayBuffer[Path]()
    while (files.hasNext) {
      val file = files.next
      allPaths += file.getPath
    }

    allPaths.toList
  }

  def copyToHdfs(inputFile: Path): Path = {
    val fromFile  = inputFile.getName
    distributedFS.mkdirs(new Path(dir))
    val toFile    = new Path(dir + fromFile)
    info(s"Copying '$fromFile' to '$toFile' (${toFile.getName})")
    distributedFS.copyFromLocalFile(false, true, inputFile, toFile)
    toFile
  }

  def localFile(local: String): Path = {
    val classLoader = getClass.getClassLoader
    val localFQN    = classLoader.getResource(local).getFile
    new Path(localFQN)
  }
} 
开发者ID:PhillHenry,项目名称:Cryptorigin,代码行数:46,代码来源:MiniHadoopClusterRunning.scala

示例6: Sentiment140Downloader

//设置package包名称以及导入依赖的类
package com.aluxian.tweeather.scripts

import java.net.URL
import java.util.zip.ZipInputStream

import org.apache.hadoop.fs.Path
import org.apache.spark.Logging


object Sentiment140Downloader extends Script with Logging {

  val downloadUrl = "http://cs.stanford.edu/people/alecmgo/trainingandtestdata.zip"

  override def main(args: Array[String]) {
    super.main(args)

    logInfo(s"Downloading sentiment140 dataset from $downloadUrl")
    val zip = new ZipInputStream(new URL(downloadUrl).openStream())
    val buffer = new Array[Byte](4 * 1024)

    Stream.continually(zip.getNextEntry)
      .takeWhile(_ != null)
      .foreach { entry =>
        val fileName = entry.getName
        val out = hdfs.create(new Path(s"tw/sentiment/140/downloaded/$fileName"))
        logInfo(s"Downloading $fileName")

        Stream.continually(zip.read(buffer))
          .takeWhile(_ != -1)
          .foreach { count =>
            out.write(buffer, 0, count)
          }

        out.close()
      }

    zip.close()
    logInfo("Downloading finished")
    sc.stop()
  }

} 
开发者ID:cnajeefa,项目名称:Tourism-Sentiment-Analysis,代码行数:43,代码来源:Sentiment140Downloader.scala

示例7: BenchmarkYarnPrepare

//设置package包名称以及导入依赖的类
package com.microsoft.spark.perf

import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}

object BenchmarkYarnPrepare {

  private def uploadFile(localPath: String, remoteWorkingDir: String): Unit = {
    new Path(remoteWorkingDir).getFileSystem(new Configuration()).
      copyFromLocalFile(new Path(localPath), new Path(remoteWorkingDir))
  }

  private def createRemoteWorkingDir(
      remoteWorkingDir: String,
      localJarPath: String,
      sparkSubmitParamsPath: String,
      benchmarkParamsPath: String): Unit = {
    uploadFile(localJarPath, remoteWorkingDir + "/spark-benchmark.jar")
    uploadFile(sparkSubmitParamsPath, remoteWorkingDir + "/spark.conf")
    uploadFile(benchmarkParamsPath, remoteWorkingDir + "/benchmark.conf")
  }

  def main(args: Array[String]): Unit = {
    val remoteWorkingDir = args(0)
    val localJarPath = args(1)
    val sparkSubmitParamsFilePath = args(2)
    val benchmarkParamsFilePath = args(3)

    createRemoteWorkingDir(remoteWorkingDir, localJarPath, sparkSubmitParamsFilePath,
      benchmarkParamsFilePath)
  }
} 
开发者ID:hdinsight,项目名称:SparkPerf,代码行数:33,代码来源:BenchmarkYarnPrepare.scala

示例8: AvroParquetWriterFn

//设置package包名称以及导入依赖的类
package io.eels.component.parquet.avro

import com.sksamuel.exts.Logging
import io.eels.component.parquet.ParquetWriterConfig
import org.apache.avro.Schema
import org.apache.avro.generic.GenericRecord
import org.apache.hadoop.fs.Path
import org.apache.parquet.avro.AvroParquetWriter
import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetWriter}


object AvroParquetWriterFn extends Logging {
  def apply(path: Path, avroSchema: Schema): ParquetWriter[GenericRecord] = {
    val config = ParquetWriterConfig()
    AvroParquetWriter.builder[GenericRecord](path)
      .withSchema(avroSchema)
      .withCompressionCodec(config.compressionCodec)
      .withPageSize(config.pageSize)
      .withRowGroupSize(config.blockSize)
      .withDictionaryEncoding(config.enableDictionary)
      .withWriteMode(ParquetFileWriter.Mode.CREATE)
      .withValidation(config.validating)
      .build()
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:26,代码来源:AvroParquetWriterFn.scala

示例9: FileUtils

//设置package包名称以及导入依赖的类
package com.asto.dmp.xxx.util

import com.asto.dmp.xxx.base.Constants
import com.asto.dmp.ycd.base.Constants
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.Logging
import org.apache.spark.rdd.RDD


object FileUtils extends Logging {
  private val conf = new Configuration()
  conf.set("fs.defaultFS", Constants.Hadoop.DEFAULT_FS)
  conf.set("mapreduce.jobtracker.address", Constants.Hadoop.JOBTRACKER_ADDRESS)

  def deleteFilesInHDFS(paths: String*) = {
    paths.foreach { path =>
      val filePath = new Path(path)
      val HDFSFilesSystem = filePath.getFileSystem(new Configuration())
      if (HDFSFilesSystem.exists(filePath)) {
        logInfo(s"?????$filePath")
        HDFSFilesSystem.delete(filePath, true)
      }
    }
  }

  def saveAsTextFile[T <: Product](rdd: RDD[T], savePath: String) = {
    deleteFilesInHDFS(savePath)
    logInfo(s"?${savePath}?????")
    rdd.map(_.productIterator.mkString(Constants.OutputPath.SEPARATOR)).coalesce(1).saveAsTextFile(savePath)
  }

  def saveAsTextFile(text: String, savePath: String) = {
    deleteFilesInHDFS(savePath)
    logInfo(s"?${savePath}?????")
    val out = FileSystem.get(conf).create(new Path(savePath))
    out.write(text.getBytes)
    out.flush()
    out.close()
  }
} 
开发者ID:zj-lingxin,项目名称:asto-sparksql,代码行数:42,代码来源:FileUtils.scala

示例10: FrequencyMapReducer

//设置package包名称以及导入依赖的类
package com.argcv.cse8803.mapreducebasic

import com.argcv.valhalla.console.ColorForConsole._
import com.argcv.valhalla.utils.Awakable
import org.apache.hadoop.fs.Path
import org.apache.hadoop.io.{IntWritable, Text}
import org.apache.hadoop.mapreduce.Job
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat


object FrequencyMapReducer extends Awakable {

  def main(args: Array[String]): Unit = {
    // create a hadoop job and set main class
    val job = Job.getInstance()
    job.setJarByClass(FrequencyMapReducer.getClass)
    job.setJobName("Frequency")

    // set the input & output path
    FileInputFormat.addInputPath(job, new Path(args.head))
    FileOutputFormat.setOutputPath(job, new Path(s"${args(1)}-${System.currentTimeMillis()}"))

    // set mapper & reducer
    job.setMapperClass(FrequencyMapper.instance)
    job.setReducerClass(FrequencyReducer.instance)

    // specify the type of the output
    job.setOutputKeyClass(new Text().getClass)
    job.setOutputValueClass(new IntWritable().getClass)

    // run
    logger.info(s"job finished, status [${if (job.waitForCompletion(true)) "OK".withColor(GREEN) else "FAILED".withColor(RED)}]")
  }

} 
开发者ID:yuikns,项目名称:cse8803,代码行数:37,代码来源:FrequencyMapReducer.scala

示例11: Converter

//设置package包名称以及导入依赖的类
package com.dataoptimo.imgprocessing.convert
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.Text
import org.apache.hadoop.io.BytesWritable
import org.apache.hadoop.fs.{Path, FileSystem}
import org.apache.hadoop.io.IOUtils
import org.apache.hadoop.io.SequenceFile
import java.io.IOException
import java.lang.IllegalArgumentException

class Converter(conf: Configuration) {
  
  def imageToSequence(srcPath: String, dstPath: String){
    try {
    val fs = FileSystem.get(conf);
    val inPath = new Path(srcPath);
    val outPath = new Path(dstPath);
    val key = new Text();
    val value = new BytesWritable();
    val in = fs.open(inPath);
    val buffer = new Array[Byte](in.available())
    in.read(buffer);
    var writer = SequenceFile.createWriter(fs, conf, outPath, key.getClass(),value.getClass());
    writer.append(new Text(inPath.getName()), new BytesWritable(buffer));
    IOUtils.closeStream(writer);
    }
    catch {
      case io: IOException => println(io.getMessage)
      case illegalArgument: IllegalArgumentException => println(illegalArgument.getMessage)
    }
    
    
    
  }
} 
开发者ID:mfawadalam,项目名称:imgprocessing,代码行数:36,代码来源:Converters.scala

示例12: HdfsWriteTest

//设置package包名称以及导入依赖的类
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{Path, FileSystem}
import org.scalatest._

class HdfsWriteTest extends FlatSpec with Matchers {
  "Hello" should "have tests" in {

    def write(uri: String, filePath: String, data: Array[Byte]) = {
//      System.setProperty("HADOOP_USER_NAME", "Mariusz")
      val path = new Path(filePath)
      val conf = new Configuration()
      conf.set("fs.defaultFS", uri)
      val fs = FileSystem.get(conf)
      val os = fs.create(path)
      os.write(data)
      fs.close()
    }

    write("hdfs://0.0.0.0:19000", "hdfs://0.0.0.0:19000/user/cloudera/test.txt", "Hello World".getBytes)


  }
} 
开发者ID:ralreiroe,项目名称:embarcadero,代码行数:24,代码来源:HdfsWriteTest.scala

示例13: main

//设置package包名称以及导入依赖的类
package io.github.qf6101.topwords

import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.sql.SparkSession


  def main(args: Array[String]) {
    // setup spark session
    val spark = SparkSession.builder().master("local[1]").appName(this.getClass.toString).getOrCreate()
    val inputFile = "test_data/story_of_stone.txt"
    val outputFile = "test_data/test_output"
    val files = FileSystem.get(spark.sparkContext.hadoopConfiguration)
    if (files.exists(new Path(outputFile))) files.delete(new Path(outputFile), true)
    val corpus = spark.sparkContext.textFile(inputFile)
    new TopWORDS(
      tauL = 10,
      tauF = 5,
      textLenThld = 2000,
      useProbThld = 1E-8,
      numIterations = 10,
      convergeTol = 1E-3,
      wordBoundaryThld = 0.0)
      .run(corpus, outputFile + "/dictionary", outputFile + "/segmented_texts")
  }
} 
开发者ID:qf6101,项目名称:topwords,代码行数:26,代码来源:TestTopWORDS.scala

示例14: ConvertsSpec

//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.HDFS

import java.io.File

import com.newegg.eims.DataPorter.HDFS.Converts._
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
import org.apache.hadoop.security.UserGroupInformation
import org.scalatest.{FlatSpec, Matchers}


class ConvertsSpec extends FlatSpec with Matchers {
  val currentDir = new File(".").getCanonicalPath + File.separator + "target" + File.separator

  "hdfs" should "can do with hadoop user" in {
    var test = 1
    new File(currentDir + "test.txt").doAs(UserGroupInformation.createRemoteUser("vq83"), f => test = 2)
    test should be(2)
  }

  it should "no copy file when not exists file path" in {
    new File(currentDir + "test.txt").copyToHDFS(new Path(currentDir + "test1.txt"), new Configuration())
    new File(currentDir + "test1.txt").exists() shouldBe false
  }

  it should "copy file when exists file path" in {
    val f = new File(currentDir + "test.txt")
    f.createNewFile()
    f.exists() shouldBe true
    new File(currentDir + "test.txt").copyToHDFS(new Path(currentDir + "test1.txt"), new Configuration())
    val f2 = new File(currentDir + "test1.txt")
    f2.exists() shouldBe true
    f.delete() shouldBe true
    f2.delete() shouldBe true
  }
} 
开发者ID:CodeBabyBear,项目名称:DataPorter,代码行数:37,代码来源:ConvertsSpec.scala

示例15: saveParquet

//设置package包名称以及导入依赖的类
package com.newegg.eims.DataPorter.Parquet

import com.newegg.eims.DataPorter.Base._
import org.apache.hadoop.fs.Path
import org.apache.hadoop.mapred.JobConf
import parquet.hadoop.ParquetOutputFormat
import parquet.hadoop.metadata.CompressionCodecName


    def saveParquet(path: String, hadoopConf: JobConf = new JobConf(),
                    compressionCodecName: CompressionCodecName = CompressionCodecName.SNAPPY): Path = {
      hadoopConf.set(ParquetOutputFormat.COMPRESSION, compressionCodecName.name())
      val rows = set.toDataRowSet.toRowIterator
      val schema = rows.getSchema
      val writer = ParquetFileFormat.prepareWrite(schema, new Path(path), hadoopConf)
      try {
        while (rows.hasNext) {
          val row = rows.next()
          writer.write(row)
        }
      } finally {
        writer.close()
      }
      new Path(path)
    }
  }

} 
开发者ID:CodeBabyBear,项目名称:DataPorter,代码行数:29,代码来源:Converts.scala


注:本文中的org.apache.hadoop.fs.Path类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。