本文整理汇总了Java中org.apache.hadoop.fs.PathFilter类的典型用法代码示例。如果您正苦于以下问题:Java PathFilter类的具体用法?Java PathFilter怎么用?Java PathFilter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PathFilter类属于org.apache.hadoop.fs包,在下文中一共展示了PathFilter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: hasNext
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
@Override
public RemoteIterator<LocatedFileStatus>listLocatedStatus(final Path f,
final PathFilter filter) throws FileNotFoundException, IOException {
final InodeTree.ResolveResult<FileSystem> res = fsState
.resolve(getUriPath(f), true);
final RemoteIterator<LocatedFileStatus> statusIter = res.targetFileSystem
.listLocatedStatus(res.remainingPath);
if (res.isInternalDir()) {
return statusIter;
}
return new RemoteIterator<LocatedFileStatus>() {
@Override
public boolean hasNext() throws IOException {
return statusIter.hasNext();
}
@Override
public LocatedFileStatus next() throws IOException {
final LocatedFileStatus status = statusIter.next();
return (LocatedFileStatus)fixFileStatus(status,
getChrootedPath(res, status, f));
}
};
}
示例2: traverseImpl
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
private static ArrayList<FileStatus> traverseImpl(Storage storage, Path path, PathFilter filter)
throws IOException {
if (!storage.exists(path.toString())) {
return new ArrayList<>();
}
ArrayList<FileStatus> result = new ArrayList<>();
FileStatus[] statuses = storage.listStatus(path.toString());
for (FileStatus status : statuses) {
if (status.isDirectory()) {
result.addAll(traverseImpl(storage, status.getPath(), filter));
} else {
if (filter.accept(status.getPath())) {
result.add(status);
}
}
}
return result;
}
示例3: isDirReadable
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
boolean isDirReadable(DrillFileSystem fs, FileStatus dir) {
Path p = new Path(dir.getPath(), ParquetFileWriter.PARQUET_METADATA_FILE);
try {
if (fs.exists(p)) {
return true;
} else {
if (metaDataFileExists(fs, dir)) {
return true;
}
PathFilter filter = new DrillPathFilter();
FileStatus[] files = fs.listStatus(dir.getPath(), filter);
if (files.length == 0) {
return false;
}
return super.isFileReadable(fs, files[0]);
}
} catch (IOException e) {
logger.info("Failure while attempting to check for Parquet metadata file.", e);
return false;
}
}
示例4: addInputPathRecursively
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Add files in the input path recursively into the results.
* @param result
* The List to store all files.
* @param fs
* The FileSystem.
* @param path
* The input path.
* @param inputFilter
* The input filter that can be used to filter files/dirs.
* @throws IOException
*/
protected void addInputPathRecursively(List<FileStatus> result,
FileSystem fs, Path path, PathFilter inputFilter)
throws IOException {
RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
while (iter.hasNext()) {
LocatedFileStatus stat = iter.next();
if (inputFilter.accept(stat.getPath())) {
if (stat.isDirectory()) {
addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
} else {
result.add(stat);
}
}
}
}
示例5: LocatedFileStatusFetcher
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* @param conf configuration for the job
* @param dirs the initial list of paths
* @param recursive whether to traverse the patchs recursively
* @param inputFilter inputFilter to apply to the resulting paths
* @param newApi whether using the mapred or mapreduce API
* @throws InterruptedException
* @throws IOException
*/
public LocatedFileStatusFetcher(Configuration conf, Path[] dirs,
boolean recursive, PathFilter inputFilter, boolean newApi) throws InterruptedException,
IOException {
int numThreads = conf.getInt(FileInputFormat.LIST_STATUS_NUM_THREADS,
FileInputFormat.DEFAULT_LIST_STATUS_NUM_THREADS);
rawExec = Executors.newFixedThreadPool(
numThreads,
new ThreadFactoryBuilder().setDaemon(true)
.setNameFormat("GetFileInfo #%d").build());
exec = MoreExecutors.listeningDecorator(rawExec);
resultQueue = new LinkedBlockingQueue<List<FileStatus>>();
this.conf = conf;
this.inputDirs = dirs;
this.recursive = recursive;
this.inputFilter = inputFilter;
this.newApi = newApi;
}
示例6: scanDirectory
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
@VisibleForTesting
protected static List<FileStatus> scanDirectory(Path path, FileContext fc,
PathFilter pathFilter) throws IOException {
path = fc.makeQualified(path);
List<FileStatus> jhStatusList = new ArrayList<FileStatus>();
try {
RemoteIterator<FileStatus> fileStatusIter = fc.listStatus(path);
while (fileStatusIter.hasNext()) {
FileStatus fileStatus = fileStatusIter.next();
Path filePath = fileStatus.getPath();
if (fileStatus.isFile() && pathFilter.accept(filePath)) {
jhStatusList.add(fileStatus);
}
}
} catch (FileNotFoundException fe) {
LOG.error("Error while scanning directory " + path, fe);
}
return jhStatusList;
}
示例7: publishPlainDataStatistics
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
static DataStatistics publishPlainDataStatistics(Configuration conf,
Path inputDir)
throws IOException {
FileSystem fs = inputDir.getFileSystem(conf);
// obtain input data file statuses
long dataSize = 0;
long fileCount = 0;
RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
while (iter.hasNext()) {
LocatedFileStatus lStatus = iter.next();
if (filter.accept(lStatus.getPath())) {
dataSize += lStatus.getLen();
++fileCount;
}
}
// publish the plain data statistics
LOG.info("Total size of input data : "
+ StringUtils.humanReadableInt(dataSize));
LOG.info("Total number of input data files : " + fileCount);
return new DataStatistics(dataSize, fileCount, false);
}
示例8: splitLog
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* This method is the base split method that splits WAL files matching a filter. Callers should
* pass the appropriate filter for meta and non-meta WALs.
* @param serverNames logs belonging to these servers will be split; this will rename the log
* directory out from under a soft-failed server
* @param filter
* @throws IOException
*/
public void splitLog(final Set<ServerName> serverNames, PathFilter filter) throws IOException {
long splitTime = 0, splitLogSize = 0;
List<Path> logDirs = getLogDirs(serverNames);
splitLogManager.handleDeadWorkers(serverNames);
splitTime = EnvironmentEdgeManager.currentTime();
splitLogSize = splitLogManager.splitLogDistributed(serverNames, logDirs, filter);
splitTime = EnvironmentEdgeManager.currentTime() - splitTime;
if (this.metricsMasterFilesystem != null) {
if (filter == META_FILTER) {
this.metricsMasterFilesystem.addMetaWALSplit(splitTime, splitLogSize);
} else {
this.metricsMasterFilesystem.addSplit(splitTime, splitLogSize);
}
}
}
示例9: getFileList
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Get a list of paths that need to be split given a set of server-specific directories and
* optionally a filter.
*
* See {@link DefaultWALProvider#getServerNameFromWALDirectoryName} for more info on directory
* layout.
*
* Should be package-private, but is needed by
* {@link org.apache.hadoop.hbase.wal.WALSplitter#split(Path, Path, Path, FileSystem,
* Configuration, WALFactory)} for tests.
*/
@VisibleForTesting
public static FileStatus[] getFileList(final Configuration conf, final List<Path> logDirs,
final PathFilter filter)
throws IOException {
List<FileStatus> fileStatus = new ArrayList<FileStatus>();
for (Path logDir : logDirs) {
final FileSystem fs = logDir.getFileSystem(conf);
if (!fs.exists(logDir)) {
LOG.warn(logDir + " doesn't exist. Nothing to do!");
continue;
}
FileStatus[] logfiles = FSUtils.listStatus(fs, logDir, filter);
if (logfiles == null || logfiles.length == 0) {
LOG.info(logDir + " is empty dir, no logs to split");
} else {
Collections.addAll(fileStatus, logfiles);
}
}
FileStatus[] a = new FileStatus[fileStatus.size()];
return fileStatus.toArray(a);
}
示例10: visitRegionStoreFiles
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Iterate over the region store files
*
* @param fs {@link FileSystem}
* @param regionDir {@link Path} to the region directory
* @param visitor callback object to get the store files
* @throws IOException if an error occurred while scanning the directory
*/
public static void visitRegionStoreFiles(final FileSystem fs, final Path regionDir,
final StoreFileVisitor visitor) throws IOException {
FileStatus[] families = FSUtils.listStatus(fs, regionDir, new FSUtils.FamilyDirFilter(fs));
if (families == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("No families under region directory:" + regionDir);
}
return;
}
PathFilter fileFilter = new FSUtils.FileFilter(fs);
for (FileStatus family: families) {
Path familyDir = family.getPath();
String familyName = familyDir.getName();
// get all the storeFiles in the family
FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir, fileFilter);
if (storeFiles == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("No hfiles found for family: " + familyDir + ", skipping.");
}
continue;
}
for (FileStatus hfile: storeFiles) {
Path hfilePath = hfile.getPath();
visitor.storeFile(regionDir.getName(), familyName, hfilePath.getName());
}
}
}
示例11: getStoreFiles
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Returns all files belonging to the given region directory. Could return an
* empty list.
*
* @param fs The file system reference.
* @param regionDir The region directory to scan.
* @return The list of files found.
* @throws IOException When scanning the files fails.
*/
static List<Path> getStoreFiles(FileSystem fs, Path regionDir)
throws IOException {
List<Path> res = new ArrayList<Path>();
PathFilter dirFilter = new FSUtils.DirFilter(fs);
FileStatus[] familyDirs = fs.listStatus(regionDir, dirFilter);
for(FileStatus dir : familyDirs) {
FileStatus[] files = fs.listStatus(dir.getPath());
for (FileStatus file : files) {
if (!file.isDir()) {
res.add(file.getPath());
}
}
}
return res;
}
示例12: MantaRemoteIterator
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Creates a new instance wrapping a {@link MantaDirectoryListingIterator}.
*
* @param filter filter object that will filter out results
* @param stream backing stream
* @param path base path that is being iterated
* @param fs reference to the underlying filesystem
* @param autocloseWhenFinished flag indicate whether or not to close all
* resources when we have finished iterating
*/
public MantaRemoteIterator(final PathFilter filter,
final Stream<MantaObject> stream,
final Path path,
final FileSystem fs,
final boolean autocloseWhenFinished) {
this.filter = filter;
if (filter == null) {
this.inner = stream.iterator();
} else {
this.inner = stream.filter(obj -> filter.accept(new Path(obj.getPath()))).iterator();
}
this.closeableStream = stream;
this.path = path;
this.fs = fs;
this.autocloseWhenFinished = autocloseWhenFinished;
this.nextRef.set(nextAcceptable());
}
示例13: getReaders
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/** Open the output generated by this format. */
public static MapFile.Reader[] getReaders(Path dir,
Configuration conf) throws IOException {
FileSystem fs = dir.getFileSystem(conf);
PathFilter filter = new PathFilter() {
@Override
public boolean accept(Path path) {
String name = path.getName();
if (name.startsWith("_") || name.startsWith("."))
return false;
return true;
}
};
Path[] names = FileUtil.stat2Paths(fs.listStatus(dir, filter));
// sort names, so that hash partitioning works
Arrays.sort(names);
MapFile.Reader[] parts = new MapFile.Reader[names.length];
for (int i = 0; i < names.length; i++) {
parts[i] = new MapFile.Reader(fs, names[i].toString(), conf);
}
return parts;
}
示例14: listLocatedStatus
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
@Override
protected RemoteIterator<LocatedFileStatus> listLocatedStatus(
final Path path, final PathFilter filter) throws IOException {
LOG.debug("List located status for path: {}", path);
String mantaPath = mantaPath(path);
if (!client.existsAndIsAccessible(mantaPath)) {
throw new FileNotFoundException(mantaPath);
}
/* We emulate a normal filesystem by showing the home directory under root in
* in order to provide compatibility with consumers that expect this behavior. */
if (mantaPath.equals(SEPARATOR)) {
LocatedFileStatus singleEntry = new LocatedFileStatus(new MantaFileStatus(true, path), null);
return new SingleEntryRemoteIterator<>(singleEntry);
}
if (!client.existsAndIsAccessible(mantaPath)) {
throw new FileNotFoundException(mantaPath);
}
Stream<MantaObject> stream = client.listObjects(mantaPath);
return new MantaRemoteIterator(filter, stream, path, this, true);
}
示例15: mergeFilters
import org.apache.hadoop.fs.PathFilter; //导入依赖的package包/类
/**
* Will merge given array of filters into one.
* If given array of filters is empty, will return {@link #DUMMY_FILTER}.
*
* @param filters array of filters
* @return one filter that combines all given filters
*/
public static PathFilter mergeFilters(final PathFilter... filters) {
if (filters.length == 0) {
return DUMMY_FILTER;
}
return new PathFilter() {
@Override
public boolean accept(Path path) {
for (PathFilter filter : filters) {
if (!filter.accept(path)) {
return false;
}
}
return true;
}
};
}