本文整理汇总了Java中org.apache.hadoop.fs.FileSystem.globStatus方法的典型用法代码示例。如果您正苦于以下问题:Java FileSystem.globStatus方法的具体用法?Java FileSystem.globStatus怎么用?Java FileSystem.globStatus使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.FileSystem
的用法示例。
在下文中一共展示了FileSystem.globStatus方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getCurrentDir
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public synchronized String getCurrentDir() {
if (currentDir == null) {
try {
final Path path = new Path(Path.CUR_DIR);
final FileSystem fs = getFileSystem();
final FileStatus[] fileStatuses = fs.globStatus(path);
if (fileStatuses == null || fileStatuses.length == 0) {
return "";
}
homeDir = currentDir = fileStatuses[0].getPath().toUri().getPath();
} catch (Exception e) {
return "";
}
}
return currentDir;
}
示例2: call
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Override
public Result call() throws Exception {
Result result = new Result();
FileSystem fs = path.getFileSystem(conf);
result.fs = fs;
FileStatus[] matches = fs.globStatus(path, inputFilter);
if (matches == null) {
result.addError(new IOException("Input path does not exist: " + path));
} else if (matches.length == 0) {
result.addError(new IOException("Input Pattern " + path
+ " matches 0 files"));
} else {
result.matchedFileStatuses = matches;
}
return result;
}
示例3: doBuildListing
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Collect the list of <sourceRelativePath, sourceFileStatus> to be copied and write to the sequence file. In essence,
* any file or directory that need to be copied or sync-ed is written as an entry to the sequence file, with the
* possible exception of the source root: when either -update (sync) or -overwrite switch is specified, and if the the
* source root is a directory, then the source root entry is not written to the sequence file, because only the
* contents of the source directory need to be copied in this case. See
* {@link com.hotels.bdp.circustrain.s3mapreducecp.util.ConfigurationUtil#getRelativePath} for how relative path is
* computed. See computeSourceRootPath method for how the root path of the source is computed.
*
* @param fileListWriter
* @param options
* @param globbedPaths
* @throws IOException
*/
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter, S3MapReduceCpOptions options) throws IOException {
List<Path> globbedPaths = new ArrayList<>(options.getSources().size());
for (Path sourcePath : options.getSources()) {
FileSystem fs = sourcePath.getFileSystem(getConf());
FileStatus sourceFileStatus = fs.getFileStatus(sourcePath);
if (sourceFileStatus.isFile()) {
LOG.debug("Adding path {}", sourceFileStatus.getPath());
globbedPaths.add(sourceFileStatus.getPath());
} else {
FileStatus[] inputs = fs.globStatus(sourcePath);
if (inputs != null && inputs.length > 0) {
for (FileStatus onePath : inputs) {
LOG.debug("Adding path {}", onePath.getPath());
globbedPaths.add(onePath.getPath());
}
} else {
throw new InvalidInputException("Source path " + sourcePath + " doesn't exist");
}
}
}
doBuildListing(fileListWriter, options, globbedPaths);
}
示例4: call
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Override
public void call(T ignored) throws IOException {
Path dataDirPath = new Path(dataDirString + "/*");
FileSystem fs = FileSystem.get(dataDirPath.toUri(), hadoopConf);
FileStatus[] inputPathStatuses = fs.globStatus(dataDirPath);
if (inputPathStatuses != null) {
long oldestTimeAllowed =
System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(maxAgeHours, TimeUnit.HOURS);
Arrays.stream(inputPathStatuses).filter(FileStatus::isDirectory).map(FileStatus::getPath).
filter(subdir -> {
Matcher m = dirTimestampPattern.matcher(subdir.getName());
return m.find() && Long.parseLong(m.group(1)) < oldestTimeAllowed;
}).forEach(subdir -> {
log.info("Deleting old data at {}", subdir);
try {
fs.delete(subdir, true);
} catch (IOException e) {
log.warn("Unable to delete {}; continuing", subdir, e);
}
});
}
}
示例5: listFileStatuses
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException
{
List<FileStatus> fileStatuses = Lists.newArrayList();
FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE);
if (entries == null) {
return fileStatuses;
}
for (FileStatus entry : entries) {
if (entry.isDirectory()) {
List<FileStatus> subEntries = listRecursive(fs, entry);
fileStatuses.addAll(subEntries);
}
else {
fileStatuses.add(entry);
}
}
return fileStatuses;
}
示例6: expandAsGlob
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Expand the given path as a glob pattern. Non-existent paths do not
* throw an exception because creation commands like touch and mkdir need
* to create them. The "stat" field will be null if the path does not
* exist.
* @param pattern the pattern to expand as a glob
* @param conf the hadoop configuration
* @return list of {@link PathData} objects. if the pattern is not a glob,
* and does not exist, the list will contain a single PathData with a null
* stat
* @throws IOException anything else goes wrong...
*/
public static PathData[] expandAsGlob(String pattern, Configuration conf)
throws IOException {
Path globPath = new Path(pattern);
FileSystem fs = globPath.getFileSystem(conf);
FileStatus[] stats = fs.globStatus(globPath);
PathData[] items = null;
if (stats == null) {
// remove any quoting in the glob pattern
pattern = pattern.replaceAll("\\\\(.)", "$1");
// not a glob & file not found, so add the path with a null stat
items = new PathData[]{ new PathData(fs, pattern, null) };
} else {
// figure out what type of glob path was given, will convert globbed
// paths to match the type to preserve relativity
PathType globType;
URI globUri = globPath.toUri();
if (globUri.getScheme() != null) {
globType = PathType.HAS_SCHEME;
} else if (!globUri.getPath().isEmpty() &&
new Path(globUri.getPath()).isAbsolute()) {
globType = PathType.SCHEMELESS_ABSOLUTE;
} else {
globType = PathType.RELATIVE;
}
// convert stats to PathData
items = new PathData[stats.length];
int i=0;
for (FileStatus stat : stats) {
URI matchUri = stat.getPath().toUri();
String globMatch = null;
switch (globType) {
case HAS_SCHEME: // use as-is, but remove authority if necessary
if (globUri.getAuthority() == null) {
matchUri = removeAuthority(matchUri);
}
globMatch = uriToString(matchUri, false);
break;
case SCHEMELESS_ABSOLUTE: // take just the uri's path
globMatch = matchUri.getPath();
break;
case RELATIVE: // make it relative to the current working dir
URI cwdUri = fs.getWorkingDirectory().toUri();
globMatch = relativize(cwdUri, matchUri, stat.isDirectory());
break;
}
items[i++] = new PathData(fs, globMatch, stat);
}
}
Arrays.sort(items);
return items;
}
示例7: testTracingGlobber
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Test tracing the globber. This is a regression test for HDFS-9187.
*/
@Test
public void testTracingGlobber() throws Exception {
// Bypass the normal FileSystem object creation path by just creating an
// instance of a subclass.
FileSystem fs = new LocalFileSystem();
fs.initialize(new URI("file:///"), new Configuration());
fs.globStatus(new Path("/"));
fs.close();
}
示例8: getTableDirs
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
throws IOException {
List<Path> tableDirs = new LinkedList<Path>();
for(FileStatus status :
fs.globStatus(new Path(rootdir,
new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) {
tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath()));
}
return tableDirs;
}
示例9: getDatasetDescriptorFromParquetFile
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
throws IOException {
ArrayList<FileStatus> files = new ArrayList<FileStatus>();
FileStatus[] dirs;
dirs = fs.globStatus(fs.makeQualified(getInputPath()));
for (int i = 0; (dirs != null && i < dirs.length); i++) {
files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
// We only check one file, so exit the loop when we have at least
// one.
if (files.size() > 0) {
break;
}
}
ParquetMetadata parquetMetadata;
try {
parquetMetadata =
ParquetFileReader.readFooter(job.getConfiguration(),
fs.makeQualified(files.get(0).getPath()));
} catch (IOException e) {
LOG.error("Wrong file format. Please check the export file's format.", e);
throw e;
}
MessageType schema = parquetMetadata.getFileMetaData().getSchema();
Schema avroSchema = new AvroSchemaConverter().convert(schema);
DatasetDescriptor descriptor =
new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
.compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
return descriptor;
}
示例10: singleThreadedListStatus
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
PathFilter inputFilter, boolean recursive) throws IOException {
List<FileStatus> result = new ArrayList<FileStatus>();
List<IOException> errors = new ArrayList<IOException>();
for (Path p: dirs) {
FileSystem fs = p.getFileSystem(job);
FileStatus[] matches = fs.globStatus(p, inputFilter);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + p));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
} else {
for (FileStatus globStat: matches) {
if (globStat.isDirectory()) {
RemoteIterator<LocatedFileStatus> iter =
fs.listLocatedStatus(globStat.getPath());
while (iter.hasNext()) {
LocatedFileStatus stat = iter.next();
if (inputFilter.accept(stat.getPath())) {
if (recursive && stat.isDirectory()) {
addInputPathRecursively(result, fs, stat.getPath(),
inputFilter);
} else {
result.add(stat);
}
}
}
} else {
result.add(globStat);
}
}
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
return result;
}
示例11: singleThreadedListStatus
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
PathFilter inputFilter, boolean recursive) throws IOException {
List<FileStatus> result = new ArrayList<FileStatus>();
List<IOException> errors = new ArrayList<IOException>();
for (int i=0; i < dirs.length; ++i) {
Path p = dirs[i];
FileSystem fs = p.getFileSystem(job.getConfiguration());
FileStatus[] matches = fs.globStatus(p, inputFilter);
if (matches == null) {
errors.add(new IOException("Input path does not exist: " + p));
} else if (matches.length == 0) {
errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
} else {
for (FileStatus globStat: matches) {
if (globStat.isDirectory()) {
RemoteIterator<LocatedFileStatus> iter =
fs.listLocatedStatus(globStat.getPath());
while (iter.hasNext()) {
LocatedFileStatus stat = iter.next();
if (inputFilter.accept(stat.getPath())) {
if (recursive && stat.isDirectory()) {
addInputPathRecursively(result, fs, stat.getPath(),
inputFilter);
} else {
result.add(stat);
}
}
}
} else {
result.add(globStat);
}
}
}
}
if (!errors.isEmpty()) {
throw new InvalidInputException(errors);
}
return result;
}
示例12: checkSrcPath
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/** Sanity check for srcPath */
private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths)
throws IOException {
List<IOException> rslt = new ArrayList<IOException>();
List<Path> unglobbed = new LinkedList<Path>();
Path[] ps = new Path[srcPaths.size()];
ps = srcPaths.toArray(ps);
TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);
for (Path p : srcPaths) {
FileSystem fs = p.getFileSystem(jobConf);
FileStatus[] inputs = fs.globStatus(p);
if(inputs != null && inputs.length > 0) {
for (FileStatus onePath: inputs) {
unglobbed.add(onePath.getPath());
}
} else {
rslt.add(new IOException("Input source " + p + " does not exist."));
}
}
if (!rslt.isEmpty()) {
throw new InvalidInputException(rslt);
}
srcPaths.clear();
srcPaths.addAll(unglobbed);
}
示例13: doBuildListing
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Implementation of CopyListing::buildListing().
* Creates the copy listing by "globbing" all source-paths.
* @param pathToListingFile The location at which the copy-listing file
* is to be created.
* @param options Input Options for DistCp (indicating source/target paths.)
* @throws IOException
*/
@Override
public void doBuildListing(Path pathToListingFile,
DistCpOptions options) throws IOException {
List<Path> globbedPaths = new ArrayList<Path>();
if (options.getSourcePaths().isEmpty()) {
throw new InvalidInputException("Nothing to process. Source paths::EMPTY");
}
for (Path p : options.getSourcePaths()) {
FileSystem fs = p.getFileSystem(getConf());
FileStatus[] inputs = fs.globStatus(p);
if(inputs != null && inputs.length > 0) {
for (FileStatus onePath: inputs) {
globbedPaths.add(onePath.getPath());
}
} else {
throw new InvalidInputException(p + " doesn't exist");
}
}
DistCpOptions optionsGlobbed = new DistCpOptions(options);
optionsGlobbed.setSourcePaths(globbedPaths);
simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}
示例14: deleteAttemptTempFiles
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private void deleteAttemptTempFiles(Path targetWorkPath,
FileSystem targetFS,
String jobId) throws IOException {
FileStatus[] tempFiles = targetFS.globStatus(
new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));
if (tempFiles != null && tempFiles.length > 0) {
for (FileStatus file : tempFiles) {
LOG.info("Cleaning up " + file.getPath());
targetFS.delete(file.getPath(), false);
}
}
}
示例15: getInitialCachedResources
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@VisibleForTesting
Map<String, String> getInitialCachedResources(FileSystem fs,
Configuration conf) throws IOException {
// get the root directory for the shared cache
String location =
conf.get(YarnConfiguration.SHARED_CACHE_ROOT,
YarnConfiguration.DEFAULT_SHARED_CACHE_ROOT);
Path root = new Path(location);
if (!fs.exists(root)) {
String message =
"The shared cache root directory " + location + " was not found";
LOG.error(message);
throw new IOException(message);
}
int nestedLevel = SharedCacheUtil.getCacheDepth(conf);
// now traverse individual directories and process them
// the directory structure is specified by the nested level parameter
// (e.g. 9/c/d/<checksum>/file)
String pattern = SharedCacheUtil.getCacheEntryGlobPattern(nestedLevel+1);
LOG.info("Querying for all individual cached resource files");
FileStatus[] entries = fs.globStatus(new Path(root, pattern));
int numEntries = entries == null ? 0 : entries.length;
LOG.info("Found " + numEntries + " files: processing for one resource per "
+ "key");
Map<String, String> initialCachedEntries = new HashMap<String, String>();
if (entries != null) {
for (FileStatus entry : entries) {
Path file = entry.getPath();
String fileName = file.getName();
if (entry.isFile()) {
// get the parent to get the checksum
Path parent = file.getParent();
if (parent != null) {
// the name of the immediate parent directory is the checksum
String key = parent.getName();
// make sure we insert only one file per checksum whichever comes
// first
if (initialCachedEntries.containsKey(key)) {
LOG.warn("Key " + key + " is already mapped to file "
+ initialCachedEntries.get(key) + "; file " + fileName
+ " will not be added");
} else {
initialCachedEntries.put(key, fileName);
}
}
}
}
}
LOG.info("A total of " + initialCachedEntries.size()
+ " files are now mapped");
return initialCachedEntries;
}