当前位置: 首页>>代码示例>>Java>>正文


Java FileSystem.globStatus方法代码示例

本文整理汇总了Java中org.apache.hadoop.fs.FileSystem.globStatus方法的典型用法代码示例。如果您正苦于以下问题:Java FileSystem.globStatus方法的具体用法?Java FileSystem.globStatus怎么用?Java FileSystem.globStatus使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.fs.FileSystem的用法示例。


在下文中一共展示了FileSystem.globStatus方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getCurrentDir

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public synchronized String getCurrentDir() {
    if (currentDir == null) {
        try {
            final Path path = new Path(Path.CUR_DIR);
            final FileSystem fs = getFileSystem();
            final FileStatus[] fileStatuses = fs.globStatus(path);
            if (fileStatuses == null || fileStatuses.length == 0) {
                return "";
            }
            homeDir = currentDir = fileStatuses[0].getPath().toUri().getPath();
        } catch (Exception e) {
            return "";
        }
    }
    return currentDir;
}
 
开发者ID:avast,项目名称:hdfs-shell,代码行数:17,代码来源:ContextCommands.java

示例2: call

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Override
public Result call() throws Exception {
  Result result = new Result();
  FileSystem fs = path.getFileSystem(conf);
  result.fs = fs;
  FileStatus[] matches = fs.globStatus(path, inputFilter);
  if (matches == null) {
    result.addError(new IOException("Input path does not exist: " + path));
  } else if (matches.length == 0) {
    result.addError(new IOException("Input Pattern " + path
        + " matches 0 files"));
  } else {
    result.matchedFileStatuses = matches;
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:LocatedFileStatusFetcher.java

示例3: doBuildListing

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Collect the list of <sourceRelativePath, sourceFileStatus> to be copied and write to the sequence file. In essence,
 * any file or directory that need to be copied or sync-ed is written as an entry to the sequence file, with the
 * possible exception of the source root: when either -update (sync) or -overwrite switch is specified, and if the the
 * source root is a directory, then the source root entry is not written to the sequence file, because only the
 * contents of the source directory need to be copied in this case. See
 * {@link com.hotels.bdp.circustrain.s3mapreducecp.util.ConfigurationUtil#getRelativePath} for how relative path is
 * computed. See computeSourceRootPath method for how the root path of the source is computed.
 *
 * @param fileListWriter
 * @param options
 * @param globbedPaths
 * @throws IOException
 */
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter, S3MapReduceCpOptions options) throws IOException {
  List<Path> globbedPaths = new ArrayList<>(options.getSources().size());

  for (Path sourcePath : options.getSources()) {
    FileSystem fs = sourcePath.getFileSystem(getConf());
    FileStatus sourceFileStatus = fs.getFileStatus(sourcePath);
    if (sourceFileStatus.isFile()) {
      LOG.debug("Adding path {}", sourceFileStatus.getPath());
      globbedPaths.add(sourceFileStatus.getPath());
    } else {
      FileStatus[] inputs = fs.globStatus(sourcePath);
      if (inputs != null && inputs.length > 0) {
        for (FileStatus onePath : inputs) {
          LOG.debug("Adding path {}", onePath.getPath());
          globbedPaths.add(onePath.getPath());
        }
      } else {
        throw new InvalidInputException("Source path " + sourcePath + " doesn't exist");
      }
    }
  }
  doBuildListing(fileListWriter, options, globbedPaths);
}
 
开发者ID:HotelsDotCom,项目名称:circus-train,代码行数:39,代码来源:SimpleCopyListing.java

示例4: call

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Override
public void call(T ignored) throws IOException {
  Path dataDirPath = new Path(dataDirString + "/*");
  FileSystem fs = FileSystem.get(dataDirPath.toUri(), hadoopConf);
  FileStatus[] inputPathStatuses = fs.globStatus(dataDirPath);
  if (inputPathStatuses != null) {
    long oldestTimeAllowed =
        System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(maxAgeHours, TimeUnit.HOURS);
    Arrays.stream(inputPathStatuses).filter(FileStatus::isDirectory).map(FileStatus::getPath).
        filter(subdir -> {
          Matcher m = dirTimestampPattern.matcher(subdir.getName());
          return m.find() && Long.parseLong(m.group(1)) < oldestTimeAllowed;
        }).forEach(subdir -> {
          log.info("Deleting old data at {}", subdir);
          try {
            fs.delete(subdir, true);
          } catch (IOException e) {
            log.warn("Unable to delete {}; continuing", subdir, e);
          }
        });
  }
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:23,代码来源:DeleteOldDataFn.java

示例5: listFileStatuses

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> listFileStatuses(FileSystem fs, Path rootPath) throws IOException
{
    List<FileStatus> fileStatuses = Lists.newArrayList();

    FileStatus[] entries = fs.globStatus(rootPath, HiddenFileFilter.INSTANCE);
    if (entries == null) {
        return fileStatuses;
    }

    for (FileStatus entry : entries) {
        if (entry.isDirectory()) {
            List<FileStatus> subEntries = listRecursive(fs, entry);
            fileStatuses.addAll(subEntries);
        }
        else {
            fileStatuses.add(entry);
        }
    }

    return fileStatuses;
}
 
开发者ID:CyberAgent,项目名称:embulk-input-parquet_hadoop,代码行数:22,代码来源:ParquetHadoopInputPlugin.java

示例6: expandAsGlob

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Expand the given path as a glob pattern.  Non-existent paths do not
 * throw an exception because creation commands like touch and mkdir need
 * to create them.  The "stat" field will be null if the path does not
 * exist.
 * @param pattern the pattern to expand as a glob
 * @param conf the hadoop configuration
 * @return list of {@link PathData} objects.  if the pattern is not a glob,
 * and does not exist, the list will contain a single PathData with a null
 * stat 
 * @throws IOException anything else goes wrong...
 */
public static PathData[] expandAsGlob(String pattern, Configuration conf)
throws IOException {
  Path globPath = new Path(pattern);
  FileSystem fs = globPath.getFileSystem(conf);    
  FileStatus[] stats = fs.globStatus(globPath);
  PathData[] items = null;
  
  if (stats == null) {
    // remove any quoting in the glob pattern
    pattern = pattern.replaceAll("\\\\(.)", "$1");
    // not a glob & file not found, so add the path with a null stat
    items = new PathData[]{ new PathData(fs, pattern, null) };
  } else {
    // figure out what type of glob path was given, will convert globbed
    // paths to match the type to preserve relativity
    PathType globType;
    URI globUri = globPath.toUri();
    if (globUri.getScheme() != null) {
      globType = PathType.HAS_SCHEME;
    } else if (!globUri.getPath().isEmpty() &&
               new Path(globUri.getPath()).isAbsolute()) {
      globType = PathType.SCHEMELESS_ABSOLUTE;
    } else {
      globType = PathType.RELATIVE;
    }

    // convert stats to PathData
    items = new PathData[stats.length];
    int i=0;
    for (FileStatus stat : stats) {
      URI matchUri = stat.getPath().toUri();
      String globMatch = null;
      switch (globType) {
        case HAS_SCHEME: // use as-is, but remove authority if necessary
          if (globUri.getAuthority() == null) {
            matchUri = removeAuthority(matchUri);
          }
          globMatch = uriToString(matchUri, false);
          break;
        case SCHEMELESS_ABSOLUTE: // take just the uri's path
          globMatch = matchUri.getPath();
          break;
        case RELATIVE: // make it relative to the current working dir
          URI cwdUri = fs.getWorkingDirectory().toUri();
          globMatch = relativize(cwdUri, matchUri, stat.isDirectory());
          break;
      }
      items[i++] = new PathData(fs, globMatch, stat);
    }
  }
  Arrays.sort(items);
  return items;
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:66,代码来源:PathData.java

示例7: testTracingGlobber

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Test tracing the globber.  This is a regression test for HDFS-9187.
 */
@Test
public void testTracingGlobber() throws Exception {
  // Bypass the normal FileSystem object creation path by just creating an
  // instance of a subclass.
  FileSystem fs = new LocalFileSystem();
  fs.initialize(new URI("file:///"), new Configuration());
  fs.globStatus(new Path("/"));
  fs.close();
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:13,代码来源:TestTraceUtils.java

示例8: getTableDirs

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static List<Path> getTableDirs(final FileSystem fs, final Path rootdir)
    throws IOException {
  List<Path> tableDirs = new LinkedList<Path>();

  for(FileStatus status :
      fs.globStatus(new Path(rootdir,
          new Path(HConstants.BASE_NAMESPACE_DIR, "*")))) {
    tableDirs.addAll(FSUtils.getLocalTableDirs(fs, status.getPath()));
  }
  return tableDirs;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:12,代码来源:FSUtils.java

示例9: getDatasetDescriptorFromParquetFile

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private DatasetDescriptor getDatasetDescriptorFromParquetFile(Job job, FileSystem fs, String uri)
    throws IOException {

  ArrayList<FileStatus> files = new ArrayList<FileStatus>();
  FileStatus[] dirs;
  dirs = fs.globStatus(fs.makeQualified(getInputPath()));
  for (int i = 0; (dirs != null && i < dirs.length); i++) {
    files.addAll(Arrays.asList(fs.listStatus(dirs[i].getPath(), HIDDEN_FILES_PATH_FILTER)));
    // We only check one file, so exit the loop when we have at least
    // one.
    if (files.size() > 0) {
      break;
    }
  }

  ParquetMetadata parquetMetadata;
  try {
    parquetMetadata =
        ParquetFileReader.readFooter(job.getConfiguration(),
            fs.makeQualified(files.get(0).getPath()));
  } catch (IOException e) {
    LOG.error("Wrong file format. Please check the export file's format.", e);
    throw e;
  }
  MessageType schema = parquetMetadata.getFileMetaData().getSchema();
  Schema avroSchema = new AvroSchemaConverter().convert(schema);
  DatasetDescriptor descriptor =
      new DatasetDescriptor.Builder().schema(avroSchema).format(Formats.PARQUET)
          .compressionType(ParquetJob.getCompressionType(job.getConfiguration())).build();
  return descriptor;
}
 
开发者ID:aliyun,项目名称:aliyun-maxcompute-data-collectors,代码行数:32,代码来源:HdfsOdpsImportJob.java

示例10: singleThreadedListStatus

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> singleThreadedListStatus(JobConf job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (Path p: dirs) {
    FileSystem fs = p.getFileSystem(job); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }
  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:39,代码来源:FileInputFormat.java

示例11: singleThreadedListStatus

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs,
    PathFilter inputFilter, boolean recursive) throws IOException {
  List<FileStatus> result = new ArrayList<FileStatus>();
  List<IOException> errors = new ArrayList<IOException>();
  for (int i=0; i < dirs.length; ++i) {
    Path p = dirs[i];
    FileSystem fs = p.getFileSystem(job.getConfiguration()); 
    FileStatus[] matches = fs.globStatus(p, inputFilter);
    if (matches == null) {
      errors.add(new IOException("Input path does not exist: " + p));
    } else if (matches.length == 0) {
      errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
    } else {
      for (FileStatus globStat: matches) {
        if (globStat.isDirectory()) {
          RemoteIterator<LocatedFileStatus> iter =
              fs.listLocatedStatus(globStat.getPath());
          while (iter.hasNext()) {
            LocatedFileStatus stat = iter.next();
            if (inputFilter.accept(stat.getPath())) {
              if (recursive && stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(),
                    inputFilter);
              } else {
                result.add(stat);
              }
            }
          }
        } else {
          result.add(globStat);
        }
      }
    }
  }

  if (!errors.isEmpty()) {
    throw new InvalidInputException(errors);
  }
  return result;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:41,代码来源:FileInputFormat.java

示例12: checkSrcPath

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/** Sanity check for srcPath */
private static void checkSrcPath(JobConf jobConf, List<Path> srcPaths) 
throws IOException {
  List<IOException> rslt = new ArrayList<IOException>();
  List<Path> unglobbed = new LinkedList<Path>();
  
  Path[] ps = new Path[srcPaths.size()];
  ps = srcPaths.toArray(ps);
  TokenCache.obtainTokensForNamenodes(jobConf.getCredentials(), ps, jobConf);
  
  
  for (Path p : srcPaths) {
    FileSystem fs = p.getFileSystem(jobConf);
    FileStatus[] inputs = fs.globStatus(p);
    
    if(inputs != null && inputs.length > 0) {
      for (FileStatus onePath: inputs) {
        unglobbed.add(onePath.getPath());
      }
    } else {
      rslt.add(new IOException("Input source " + p + " does not exist."));
    }
  }
  if (!rslt.isEmpty()) {
    throw new InvalidInputException(rslt);
  }
  srcPaths.clear();
  srcPaths.addAll(unglobbed);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:30,代码来源:DistCpV1.java

示例13: doBuildListing

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Implementation of CopyListing::buildListing().
 * Creates the copy listing by "globbing" all source-paths.
 * @param pathToListingFile The location at which the copy-listing file
 *                           is to be created.
 * @param options Input Options for DistCp (indicating source/target paths.)
 * @throws IOException
 */
@Override
public void doBuildListing(Path pathToListingFile,
                           DistCpOptions options) throws IOException {

  List<Path> globbedPaths = new ArrayList<Path>();
  if (options.getSourcePaths().isEmpty()) {
    throw new InvalidInputException("Nothing to process. Source paths::EMPTY");  
  }

  for (Path p : options.getSourcePaths()) {
    FileSystem fs = p.getFileSystem(getConf());
    FileStatus[] inputs = fs.globStatus(p);

    if(inputs != null && inputs.length > 0) {
      for (FileStatus onePath: inputs) {
        globbedPaths.add(onePath.getPath());
      }
    } else {
      throw new InvalidInputException(p + " doesn't exist");        
    }
  }

  DistCpOptions optionsGlobbed = new DistCpOptions(options);
  optionsGlobbed.setSourcePaths(globbedPaths);
  simpleListing.buildListing(pathToListingFile, optionsGlobbed);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:35,代码来源:GlobbedCopyListing.java

示例14: deleteAttemptTempFiles

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private void deleteAttemptTempFiles(Path targetWorkPath,
                                    FileSystem targetFS,
                                    String jobId) throws IOException {

  FileStatus[] tempFiles = targetFS.globStatus(
      new Path(targetWorkPath, ".distcp.tmp." + jobId.replaceAll("job","attempt") + "*"));

  if (tempFiles != null && tempFiles.length > 0) {
    for (FileStatus file : tempFiles) {
      LOG.info("Cleaning up " + file.getPath());
      targetFS.delete(file.getPath(), false);
    }
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:15,代码来源:CopyCommitter.java

示例15: getInitialCachedResources

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@VisibleForTesting
Map<String, String> getInitialCachedResources(FileSystem fs,
    Configuration conf) throws IOException {
  // get the root directory for the shared cache
  String location =
      conf.get(YarnConfiguration.SHARED_CACHE_ROOT,
          YarnConfiguration.DEFAULT_SHARED_CACHE_ROOT);
  Path root = new Path(location);
  if (!fs.exists(root)) {
    String message =
        "The shared cache root directory " + location + " was not found";
    LOG.error(message);
    throw new IOException(message);
  }

  int nestedLevel = SharedCacheUtil.getCacheDepth(conf);
  // now traverse individual directories and process them
  // the directory structure is specified by the nested level parameter
  // (e.g. 9/c/d/<checksum>/file)
  String pattern = SharedCacheUtil.getCacheEntryGlobPattern(nestedLevel+1);

  LOG.info("Querying for all individual cached resource files");
  FileStatus[] entries = fs.globStatus(new Path(root, pattern));
  int numEntries = entries == null ? 0 : entries.length;
  LOG.info("Found " + numEntries + " files: processing for one resource per "
      + "key");

  Map<String, String> initialCachedEntries = new HashMap<String, String>();
  if (entries != null) {
    for (FileStatus entry : entries) {
      Path file = entry.getPath();
      String fileName = file.getName();
      if (entry.isFile()) {
        // get the parent to get the checksum
        Path parent = file.getParent();
        if (parent != null) {
          // the name of the immediate parent directory is the checksum
          String key = parent.getName();
          // make sure we insert only one file per checksum whichever comes
          // first
          if (initialCachedEntries.containsKey(key)) {
            LOG.warn("Key " + key + " is already mapped to file "
                + initialCachedEntries.get(key) + "; file " + fileName
                + " will not be added");
          } else {
            initialCachedEntries.put(key, fileName);
          }
        }
      }
    }
  }
  LOG.info("A total of " + initialCachedEntries.size()
      + " files are now mapped");
  return initialCachedEntries;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:56,代码来源:InMemorySCMStore.java


注:本文中的org.apache.hadoop.fs.FileSystem.globStatus方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。