当前位置: 首页>>代码示例>>Scala>>正文


Scala LocatedFileStatus类代码示例

本文整理汇总了Scala中org.apache.hadoop.fs.LocatedFileStatus的典型用法代码示例。如果您正苦于以下问题:Scala LocatedFileStatus类的具体用法?Scala LocatedFileStatus怎么用?Scala LocatedFileStatus使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了LocatedFileStatus类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。

示例1: HdfsOps

//设置package包名称以及导入依赖的类
package io.eels

import com.sksamuel.exts.Logging
import io.eels.util.{HdfsIterator, PathIterator}
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}

object HdfsOps extends Logging {

  def makePathVisible(path: Path)(implicit fs: FileSystem): Unit = {
    if (path.getName.startsWith(".")) {
      logger.info(s"Making $path visible by stripping leading .")
      val dest = new Path(path.getParent, path.getName.drop(1))
      fs.rename(path, dest)
    }
  }

  def findFiles(path: Path, recursive: Boolean, fs: FileSystem): Iterator[LocatedFileStatus] = {
    HdfsIterator.remote(fs.listFiles(path, recursive))
  }

  def mkdirsp(path: Path, fs: FileSystem): Boolean = PathIterator(path).forall(fs.mkdirs)
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:23,代码来源:HdfsOps.scala

示例2: HivePartitionScanner

//设置package包名称以及导入依赖的类
package io.eels.component.hive

import com.sksamuel.exts.Logging
import com.typesafe.config.{Config, ConfigFactory}
import io.eels.component.hive.partition.PartitionMetaData
import io.eels.schema.PartitionConstraint
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus}

// scans partitions for files, returning the files and the meta data object for each partition
class HivePartitionScanner(implicit fs: FileSystem) extends Logging {

  private val config: Config = ConfigFactory.load()
  private val missingPartitionAction: String = config.getString("eel.hive.source.missingPartitionAction")

  def scan(partitions: Seq[PartitionMetaData],
           constraints: Seq[PartitionConstraint] = Nil): Map[PartitionMetaData, Seq[LocatedFileStatus]] = {
    logger.debug(s"Scanning ${partitions.size} partitions for applicable files ${partitions.map(_.location).mkString(", ").take(100)}")

    // first we filter out any partitions not matching the constraints
    val filteredPartitions = partitions.filter { meta =>
      constraints.forall(_.eval(meta.partition))
    }
    logger.debug(s"Filtered partitions: ${filteredPartitions.map(_.location).mkString(", ")})")

    // next, we check that the directories that the partitions point to actually exist
    // this will avoid a situation where a location exists in the metastore but not on disk
    val exantPartitions = filteredPartitions.filter { partition =>
      if (fs.exists(partition.location)) {
        true
      } else {
        if (missingPartitionAction == "error") {
          throw new IllegalStateException(s"Partition [${partition.name}] was specified in the hive metastore at [${partition.location}] but did not exist on disk. To disable these exceptions set eel.hive.source.missingPartitionAction=warn or eel.hive.source.missingPartitionAction=none")
        } else if (missingPartitionAction == "warn") {
          logger.warn(s"Partition [${partition.name}] was specified in the hive metastore at [${partition.location}] but did not exist on disk. To disable these warnings set eel.hive.source.missingPartitionAction=none")
          false
        } else {
          false
        }
      }
    }

    // next we grab all the data files from each of these partitions
    exantPartitions.map { meta =>
      meta -> HiveFileScanner(meta.location, false)
    }.toMap
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:48,代码来源:HivePartitionScanner.scala

示例3: HiveFileScanner

//设置package包名称以及导入依赖的类
package io.eels.component.hive

import com.sksamuel.exts.Logging
import com.typesafe.config.ConfigFactory
import io.eels.util.HdfsIterator
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}

// given a hadoop path, will look for files inside that path that match the
// configured settings for hidden files
// does not return directories
object HiveFileScanner extends Logging {

  private val config = ConfigFactory.load()
  private val ignoreHiddenFiles = config.getBoolean("eel.hive.source.ignoreHiddenFiles")
  private val hiddenFilePattern = config.getString("eel.hive.source.hiddenFilePattern")

  // returns true if the given file should be considered based on the config settings
  private def skip(file: LocatedFileStatus): Boolean = {
    file.getLen == 0L || ignoreHiddenFiles && file.getPath.getName.matches(hiddenFilePattern)
  }

  def apply(path: Path, recursive: Boolean)(implicit fs: FileSystem): Seq[LocatedFileStatus] = {
    logger.debug(s"Scanning $path, filtering=$ignoreHiddenFiles, pattern=$hiddenFilePattern")
    val files: List[LocatedFileStatus] = if (fs.exists(path)) {
      val files = fs.listFiles(path, recursive)
      HdfsIterator.remote(files)
          .filter(_.isFile)
          .filterNot(skip)
          .toList
    } else {
      Nil
    }
    logger.debug(s"Scanner found ${files.size} files")
    files
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:37,代码来源:HiveFileScanner.scala

示例4: HiveFilePublisher

//设置package包名称以及导入依赖的类
package io.eels.component.hive

import com.sksamuel.exts.io.Using
import io.eels.datastream.{Subscription, Publisher, Subscriber}
import io.eels.schema.{Partition, StructType}
import io.eels.{Predicate, _}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus}


class HiveFilePublisher(dialect: HiveDialect,
                        file: LocatedFileStatus,
                        metastoreSchema: StructType,
                        projectionSchema: StructType,
                        predicate: Option[Predicate],
                        partition: Partition)
                       (implicit fs: FileSystem, conf: Configuration) extends Publisher[Seq[Row]] with Using {
  require(projectionSchema.fieldNames.forall { it => it == it.toLowerCase() }, s"Use only lower case field names with hive")

  override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {

    val partitionMap: Map[String, Any] = partition.entries.map { it => (it.key, it.value) }.toMap

    // the schema we send to the dialect must have any partition fields removed, because those
    // fields won't exist in the data files. This is because partitions are not always written
    // and instead inferred from the partition itself.
    val projectionFields = projectionSchema.fields.filterNot(field => partition.containsKey(field.name))
    val projectionWithoutPartitions = StructType(projectionFields)

    // since we removed the partition fields from the target schema, we must repopulate them after the read
    // we also need to throw away the dummy field if we had an empty schema
    val publisher = dialect.input(file.getPath, metastoreSchema, projectionWithoutPartitions, predicate)
    publisher.subscribe(new Subscriber[Seq[Row]] {
      override def subscribed(s: Subscription): Unit = subscriber.subscribed(s)
      override def next(chunk: Seq[Row]): Unit = {
        val aligned = chunk.map { row =>
          if (projectionFields.isEmpty) {
            val values = projectionSchema.fieldNames().map(partitionMap.apply)
            Row(projectionSchema, values.toVector)
          } else {
            RowUtils.rowAlign(row, projectionSchema, partitionMap)
          }
        }
        subscriber.next(aligned)
      }
      override def completed(): Unit = subscriber.completed()
      override def error(t: Throwable): Unit = subscriber.error(t)
    })
  }
} 
开发者ID:51zero,项目名称:eel-sdk,代码行数:51,代码来源:HiveFilePublisher.scala


注:本文中的org.apache.hadoop.fs.LocatedFileStatus类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。