本文整理汇总了Scala中org.apache.hadoop.fs.LocatedFileStatus类的典型用法代码示例。如果您正苦于以下问题:Scala LocatedFileStatus类的具体用法?Scala LocatedFileStatus怎么用?Scala LocatedFileStatus使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了LocatedFileStatus类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Scala代码示例。
示例1: HdfsOps
//设置package包名称以及导入依赖的类
package io.eels
import com.sksamuel.exts.Logging
import io.eels.util.{HdfsIterator, PathIterator}
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
object HdfsOps extends Logging {
def makePathVisible(path: Path)(implicit fs: FileSystem): Unit = {
if (path.getName.startsWith(".")) {
logger.info(s"Making $path visible by stripping leading .")
val dest = new Path(path.getParent, path.getName.drop(1))
fs.rename(path, dest)
}
}
def findFiles(path: Path, recursive: Boolean, fs: FileSystem): Iterator[LocatedFileStatus] = {
HdfsIterator.remote(fs.listFiles(path, recursive))
}
def mkdirsp(path: Path, fs: FileSystem): Boolean = PathIterator(path).forall(fs.mkdirs)
}
示例2: HivePartitionScanner
//设置package包名称以及导入依赖的类
package io.eels.component.hive
import com.sksamuel.exts.Logging
import com.typesafe.config.{Config, ConfigFactory}
import io.eels.component.hive.partition.PartitionMetaData
import io.eels.schema.PartitionConstraint
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus}
// scans partitions for files, returning the files and the meta data object for each partition
class HivePartitionScanner(implicit fs: FileSystem) extends Logging {
private val config: Config = ConfigFactory.load()
private val missingPartitionAction: String = config.getString("eel.hive.source.missingPartitionAction")
def scan(partitions: Seq[PartitionMetaData],
constraints: Seq[PartitionConstraint] = Nil): Map[PartitionMetaData, Seq[LocatedFileStatus]] = {
logger.debug(s"Scanning ${partitions.size} partitions for applicable files ${partitions.map(_.location).mkString(", ").take(100)}")
// first we filter out any partitions not matching the constraints
val filteredPartitions = partitions.filter { meta =>
constraints.forall(_.eval(meta.partition))
}
logger.debug(s"Filtered partitions: ${filteredPartitions.map(_.location).mkString(", ")})")
// next, we check that the directories that the partitions point to actually exist
// this will avoid a situation where a location exists in the metastore but not on disk
val exantPartitions = filteredPartitions.filter { partition =>
if (fs.exists(partition.location)) {
true
} else {
if (missingPartitionAction == "error") {
throw new IllegalStateException(s"Partition [${partition.name}] was specified in the hive metastore at [${partition.location}] but did not exist on disk. To disable these exceptions set eel.hive.source.missingPartitionAction=warn or eel.hive.source.missingPartitionAction=none")
} else if (missingPartitionAction == "warn") {
logger.warn(s"Partition [${partition.name}] was specified in the hive metastore at [${partition.location}] but did not exist on disk. To disable these warnings set eel.hive.source.missingPartitionAction=none")
false
} else {
false
}
}
}
// next we grab all the data files from each of these partitions
exantPartitions.map { meta =>
meta -> HiveFileScanner(meta.location, false)
}.toMap
}
}
示例3: HiveFileScanner
//设置package包名称以及导入依赖的类
package io.eels.component.hive
import com.sksamuel.exts.Logging
import com.typesafe.config.ConfigFactory
import io.eels.util.HdfsIterator
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
// given a hadoop path, will look for files inside that path that match the
// configured settings for hidden files
// does not return directories
object HiveFileScanner extends Logging {
private val config = ConfigFactory.load()
private val ignoreHiddenFiles = config.getBoolean("eel.hive.source.ignoreHiddenFiles")
private val hiddenFilePattern = config.getString("eel.hive.source.hiddenFilePattern")
// returns true if the given file should be considered based on the config settings
private def skip(file: LocatedFileStatus): Boolean = {
file.getLen == 0L || ignoreHiddenFiles && file.getPath.getName.matches(hiddenFilePattern)
}
def apply(path: Path, recursive: Boolean)(implicit fs: FileSystem): Seq[LocatedFileStatus] = {
logger.debug(s"Scanning $path, filtering=$ignoreHiddenFiles, pattern=$hiddenFilePattern")
val files: List[LocatedFileStatus] = if (fs.exists(path)) {
val files = fs.listFiles(path, recursive)
HdfsIterator.remote(files)
.filter(_.isFile)
.filterNot(skip)
.toList
} else {
Nil
}
logger.debug(s"Scanner found ${files.size} files")
files
}
}
示例4: HiveFilePublisher
//设置package包名称以及导入依赖的类
package io.eels.component.hive
import com.sksamuel.exts.io.Using
import io.eels.datastream.{Subscription, Publisher, Subscriber}
import io.eels.schema.{Partition, StructType}
import io.eels.{Predicate, _}
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus}
class HiveFilePublisher(dialect: HiveDialect,
file: LocatedFileStatus,
metastoreSchema: StructType,
projectionSchema: StructType,
predicate: Option[Predicate],
partition: Partition)
(implicit fs: FileSystem, conf: Configuration) extends Publisher[Seq[Row]] with Using {
require(projectionSchema.fieldNames.forall { it => it == it.toLowerCase() }, s"Use only lower case field names with hive")
override def subscribe(subscriber: Subscriber[Seq[Row]]): Unit = {
val partitionMap: Map[String, Any] = partition.entries.map { it => (it.key, it.value) }.toMap
// the schema we send to the dialect must have any partition fields removed, because those
// fields won't exist in the data files. This is because partitions are not always written
// and instead inferred from the partition itself.
val projectionFields = projectionSchema.fields.filterNot(field => partition.containsKey(field.name))
val projectionWithoutPartitions = StructType(projectionFields)
// since we removed the partition fields from the target schema, we must repopulate them after the read
// we also need to throw away the dummy field if we had an empty schema
val publisher = dialect.input(file.getPath, metastoreSchema, projectionWithoutPartitions, predicate)
publisher.subscribe(new Subscriber[Seq[Row]] {
override def subscribed(s: Subscription): Unit = subscriber.subscribed(s)
override def next(chunk: Seq[Row]): Unit = {
val aligned = chunk.map { row =>
if (projectionFields.isEmpty) {
val values = projectionSchema.fieldNames().map(partitionMap.apply)
Row(projectionSchema, values.toVector)
} else {
RowUtils.rowAlign(row, projectionSchema, partitionMap)
}
}
subscriber.next(aligned)
}
override def completed(): Unit = subscriber.completed()
override def error(t: Throwable): Unit = subscriber.error(t)
})
}
}