当前位置: 首页>>代码示例>>Java>>正文


Java FileSystem.listFiles方法代码示例

本文整理汇总了Java中org.apache.hadoop.fs.FileSystem.listFiles方法的典型用法代码示例。如果您正苦于以下问题:Java FileSystem.listFiles方法的具体用法?Java FileSystem.listFiles怎么用?Java FileSystem.listFiles使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.fs.FileSystem的用法示例。


在下文中一共展示了FileSystem.listFiles方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: assertFileCount

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Assert that the number of log files in the target directory is as expected.
 * @param fs the target FileSystem
 * @param dir the target directory path
 * @param expected the expected number of files
 * @throws IOException thrown if listing files fails
 */
public void assertFileCount(FileSystem fs, Path dir, int expected)
    throws IOException {
  RemoteIterator<LocatedFileStatus> i = fs.listFiles(dir, true);
  int count = 0;

  while (i.hasNext()) {
    i.next();
    count++;
  }

  assertTrue("The sink created additional unexpected log files. " + count
      + "files were created", expected >= count);
  assertTrue("The sink created too few log files. " + count + "files were "
      + "created", expected <= count);
}
 
开发者ID:nucypher,项目名称:hadoop-oss,代码行数:23,代码来源:RollingFileSystemSinkTestBase.java

示例2: getOrcFiles

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Get all ORC files present in directory for the specified table and partition/bucket. The ORC
 * files returned are in ascending order of the (insertion) time-partition and sequence-id within
 * the time-partition.
 *
 * @param orcDir the ORC store directory
 * @param args the arguments in order: table-name, bucket-id, time-partition-id
 * @return the list of all ORC files
 */
private String[] getOrcFiles(final String orcDir, final String fileExt, final String... args) {
  try {
    FileSystem fileSystem = FileSystem.get(conf);
    Path distributedPath = new Path(Paths.get(orcDir, args).toString());
    ArrayList<String> filePathStrings = new ArrayList<>();
    if (fileSystem.exists(distributedPath)) {
      RemoteIterator<LocatedFileStatus> fileListItr = fileSystem.listFiles(distributedPath, true);
      while (fileListItr != null && fileListItr.hasNext()) {
        LocatedFileStatus file = fileListItr.next();
        if (!file.getPath().getName().endsWith(fileExt)) {
          // exclude CRC files
          filePathStrings.add(file.getPath().toUri().toString());
        }
      }

      Collections.sort(filePathStrings);
    }
    String[] retArray = new String[filePathStrings.size()];
    filePathStrings.toArray(retArray);
    return retArray;
  } catch (IOException e) {
    e.printStackTrace();
  }
  return new String[0];
}
 
开发者ID:ampool,项目名称:monarch,代码行数:35,代码来源:AbstractTierStoreReader.java

示例3: getFilesCount

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public int getFilesCount(String storeBaseDir, String tableName) {
  int filesCount = 0;
  try {
    FileSystem fs = FileSystem.get(conf);
    Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
    Path tablePath = new Path(storeBasePath, tableName);
    if (fs.exists(tablePath)) {
      RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
          fs.listFiles(tablePath, false);
      while (locatedFileStatusRemoteIterator.hasNext()) {
        filesCount++;
        LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
        System.out.println("File name is " + next.getPath());
      }
    }
  } catch (IOException e) {
    e.printStackTrace();
  }
  return filesCount;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:21,代码来源:HDFSQuasiService.java

示例4: getORCRecords

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException {
  List<OrcStruct> orcrecords = new ArrayList<>();
  try {
    FileSystem fs = FileSystem.get(conf);
    Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
    Path tablePath = new Path(storeBasePath, tableName);
    if (fs.exists(tablePath)) {
      RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
          fs.listFiles(tablePath, false);
      while (locatedFileStatusRemoteIterator.hasNext()) {
        LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
        final org.apache.hadoop.hive.ql.io.orc.Reader fis =
            OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf));
        RecordReader rows = fis.rows();
        while (rows.hasNext()) {
          orcrecords.add((OrcStruct) rows.next(null));
        }
        System.out.println("File name is " + next.getPath());
      }
    }
  } catch (IOException e) {
    e.printStackTrace();
  }
  return orcrecords;
}
 
开发者ID:ampool,项目名称:monarch,代码行数:26,代码来源:HDFSQuasiService.java

示例5: assertListFilesFinds

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * To get this project to compile under Hadoop 1, this code needs to be
 * commented out
 *
 *
 * @param fs filesystem
 * @param dir dir
 * @param subdir subdir
 * @param recursive recurse?
 * @throws IOException IO problems
 */
public static void assertListFilesFinds(FileSystem fs,
                                        Path dir,
                                        Path subdir,
                                        boolean recursive) throws IOException {
  RemoteIterator<LocatedFileStatus> iterator =
    fs.listFiles(dir, recursive);
  boolean found = false;
  int entries = 0;
  StringBuilder builder = new StringBuilder();
  while (iterator.hasNext()) {
    LocatedFileStatus next = iterator.next();
    entries++;
    builder.append(next.toString()).append('\n');
    if (next.getPath().equals(subdir)) {
      found = true;
    }
  }
  assertTrue("Path " + subdir
             + " not found in directory " + dir + " : "
             + " entries=" + entries
             + " content"
             + builder.toString(),
             found);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:36,代码来源:TestV2LsOperations.java

示例6: publishPlainDataStatistics

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
static DataStatistics publishPlainDataStatistics(Configuration conf, 
                                                 Path inputDir) 
throws IOException {
  FileSystem fs = inputDir.getFileSystem(conf);

  // obtain input data file statuses
  long dataSize = 0;
  long fileCount = 0;
  RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
  PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
  while (iter.hasNext()) {
    LocatedFileStatus lStatus = iter.next();
    if (filter.accept(lStatus.getPath())) {
      dataSize += lStatus.getLen();
      ++fileCount;
    }
  }

  // publish the plain data statistics
  LOG.info("Total size of input data : " 
           + StringUtils.humanReadableInt(dataSize));
  LOG.info("Total number of input data files : " + fileCount);
  
  return new DataStatistics(dataSize, fileCount, false);
}
 
开发者ID:naver,项目名称:hadoop,代码行数:26,代码来源:GenerateData.java

示例7: readKeysToSearch

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
static SortedSet<byte []> readKeysToSearch(final Configuration conf)
throws IOException, InterruptedException {
  Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY));
  FileSystem fs = FileSystem.get(conf);
  SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
  if (!fs.exists(keysInputDir)) {
    throw new FileNotFoundException(keysInputDir.toString());
  }
  if (!fs.isDirectory(keysInputDir)) {
    throw new UnsupportedOperationException("TODO");
  } else {
    RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false);
    while(iterator.hasNext()) {
      LocatedFileStatus keyFileStatus = iterator.next();
      // Skip "_SUCCESS" file.
      if (keyFileStatus.getPath().getName().startsWith("_")) continue;
      result.addAll(readFileToSearch(conf, fs, keyFileStatus));
    }
  }
  return result;
}
 
开发者ID:fengchen8086,项目名称:ditb,代码行数:22,代码来源:IntegrationTestBigLinkedList.java

示例8: getFileCount

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private int getFileCount(URI uri) throws IOException {
  FileSystem fs = gcpUtils.fileSystemForUri(uri);
  RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(uri), true);
  int count = 0;
  while (it.hasNext()) {
    it.next();
    count++;
  }
  return count;
}
 
开发者ID:spotify,项目名称:spydra,代码行数:11,代码来源:LifecycleIT.java

示例9: listFiles

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
    return new Iterator<FileMetadata>() {
        RemoteIterator<LocatedFileStatus> it = fs.listFiles(fs.getWorkingDirectory(), recursive);
        LocatedFileStatus current = null;
        boolean previous = false;

        @Override
        public boolean hasNext() {
            try {
                if (current == null) {
                    if (!it.hasNext()) return false;
                    current = it.next();
                    return hasNext();
                }
                if (current.isFile() &&
                        fileRegexp.matcher(current.getPath().getName()).find()) {
                    return true;
                }
                current = null;
                return hasNext();
            } catch (IOException ioe) {
                throw new ConnectException(ioe);
            }
        }

        @Override
        public FileMetadata next() {
            if (!hasNext() && current == null) {
                throw new NoSuchElementException("There are no more items");
            }
            FileMetadata metadata = toMetadata(current);
            current = null;
            return metadata;
        }
    };
}
 
开发者ID:mmolimar,项目名称:kafka-connect-fs,代码行数:37,代码来源:AbstractPolicy.java

示例10: main

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
    String uri = "hdfs://hadoop-master:9000/";

    Configuration config = new Configuration();
    FileSystem fs = FileSystem.get(URI.create(uri), config, "root");

    FileStatus[] listStatus = fs.listStatus(new Path("/")); for (FileStatus file : listStatus) {
        System.out.println("[" + (file.isFile() ? "file" : "dir") + "] " + file.getPath().getName());
    }

    RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
    while (listFiles.hasNext()) {

        LocatedFileStatus fileStatus = listFiles.next();

        log.info("block size:{}",fileStatus.getBlockSize());
        log.info("owner : {}", fileStatus.getOwner());
        log.info("replication : {}" ,fileStatus.getReplication());
        log.info("permission : {}", fileStatus.getPermission());
        log.info("path name : {}",fileStatus.getPath().getName());

        log.info("========block info=========");

        BlockLocation[] blockLocations = fileStatus.getBlockLocations();

        for (BlockLocation blockLocation : blockLocations){

            log.info("block offset : {}",blockLocation.getOffset());
            log.info("block length : {}",blockLocation.getLength());

            String[] dataNodes = blockLocation.getHosts();
            for (String dataNode : dataNodes){
                log.info("dataNode :{}",dataNode);
            }
        }
    }
}
 
开发者ID:laidu,项目名称:java-learn,代码行数:38,代码来源:Ls.java

示例11: run

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public int run(String[] args, PrintStream stream) throws Exception {
  out = stream;
  List<String> paths = parseArgs(args);
  if (paths.size() != 1) {
    errorln(USAGE);
    return E_USAGE;
  }
  println("Hadoop %s", getVersion());
  println("Compiled by %s on %s", getUser(), getDate());
  println("Compiled with protoc %s", getProtocVersion());
  println("From source with checksum %s", getSrcChecksum());


  Configuration conf = getConf();
  Path path = new Path(paths.get(0));
  FileSystem fs = path.getFileSystem(conf);

  println("Filesystem for %s is %s", path, fs);

  // examine the FS
  Configuration fsConf = fs.getConf();
  for (int i = 0; i < props.length; i++) {
    showProp(fsConf, (String) props[i][0], (Boolean) props[i][1]);
  }

  Path root = fs.makeQualified(new Path("/"));
  try (DurationInfo d = new DurationInfo(LOG,
      "Listing  %s", root)) {
    println("%s has %d entries", root, fs.listStatus(root).length);
  }

  String dirName = "dir-" + UUID.randomUUID();
  Path dir = new Path(root, dirName);
  try (DurationInfo d = new DurationInfo(LOG,
      "Creating a directory %s", dir)) {
    fs.mkdirs(dir);
  }
  try {
    Path file = new Path(dir, "file");
    try (DurationInfo d = new DurationInfo(LOG,
        "Creating a file %s", file)) {
      FSDataOutputStream data = fs.create(file, true);
      data.writeUTF(HELLO);
      data.close();
    }
    try (DurationInfo d = new DurationInfo(LOG,
        "Listing  %s", dir)) {
      fs.listFiles(dir, false);
    }

    try (DurationInfo d = new DurationInfo(LOG,
        "Reading a file %s", file)) {
      FSDataInputStream in = fs.open(file);
      String utf = in.readUTF();
      in.close();
      if (!HELLO.equals(utf)) {
        throw new IOException("Expected " + file + " to contain the text "
            + HELLO + " -but it has the text \"" + utf + "\"");
      }
    }
    try (DurationInfo d = new DurationInfo(LOG,
        "Deleting file %s", file)) {
      fs.delete(file, true);
    }
  } finally {
    try (DurationInfo d = new DurationInfo(LOG,
        "Deleting directory %s", dir)) {
      try {
        fs.delete(dir, true);
      } catch (Exception e) {
        LOG.warn("When deleting {}: ", dir, e);
      }
    }


  }


  // Validate parameters.
  return SUCCESS;
}
 
开发者ID:steveloughran,项目名称:cloudup,代码行数:82,代码来源:S3ADiag.java

示例12: listHDFSFiles

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static RemoteIterator<LocatedFileStatus> listHDFSFiles(FileSystem fs, Path path)
    throws IOException {
  return fs.listFiles(path, true);
}
 
开发者ID:ampool,项目名称:monarch,代码行数:5,代码来源:HDFSClient.java

示例13: processInputArgument

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
 * Processes the input file/folder argument. If the input is a file,
 * then it is directly considered for further processing by TraceBuilder.
 * If the input is a folder, then all the history logs in the
 * input folder are considered for further processing.
 *
 * If isRecursive is true, then the input path is recursively scanned
 * for job history logs for further processing by TraceBuilder.
 *
 * NOTE: If the input represents a globbed path, then it is first flattened
 *       and then the individual paths represented by the globbed input
 *       path are considered for further processing.
 *
 * @param input        input path, possibly globbed
 * @param conf         configuration
 * @param isRecursive  whether to recursively traverse the input paths to
 *                     find history logs
 * @return the input history log files' paths
 * @throws FileNotFoundException
 * @throws IOException
 */
static List<Path> processInputArgument(String input, Configuration conf,
    boolean isRecursive) throws FileNotFoundException, IOException {
  Path inPath = new Path(input);
  FileSystem fs = inPath.getFileSystem(conf);
  FileStatus[] inStatuses = fs.globStatus(inPath);

  List<Path> inputPaths = new LinkedList<Path>();
  if (inStatuses == null || inStatuses.length == 0) {
    return inputPaths;
  }

  for (FileStatus inStatus : inStatuses) {
    Path thisPath = inStatus.getPath();
    if (inStatus.isDirectory()) {

      // Find list of files in this path(recursively if -recursive option
      // is specified).
      List<FileStatus> historyLogs = new ArrayList<FileStatus>();

      RemoteIterator<LocatedFileStatus> iter =
        fs.listFiles(thisPath, isRecursive);
      while (iter.hasNext()) {
        LocatedFileStatus child = iter.next();
        String fileName = child.getPath().getName();

        if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) {
          historyLogs.add(child);
        }
      }

      if (historyLogs.size() > 0) {
        // Add the sorted history log file names in this path to the
        // inputPaths list
        FileStatus[] sortableNames =
            historyLogs.toArray(new FileStatus[historyLogs.size()]);
        Arrays.sort(sortableNames, new HistoryLogsComparator());

        for (FileStatus historyLog : sortableNames) {
          inputPaths.add(historyLog.getPath());
        }
      }
    } else {
      inputPaths.add(thisPath);
    }
  }

  return inputPaths;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:70,代码来源:TraceBuilder.java

示例14: testFitViaStringPaths

import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Test
public void testFitViaStringPaths() throws Exception {

    Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPaths");
    File tempDirF = tempDir.toFile();
    tempDirF.deleteOnExit();

    int dataSetObjSize = 5;
    int batchSizePerExecutor = 25;
    DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize,1000,false);
    int i=0;
    while(iter.hasNext()){
        File nextFile = new File(tempDirF, i + ".bin");
        DataSet ds = iter.next();
        ds.save(nextFile);
        i++;
    }

    System.out.println("Saved to: " + tempDirF.getAbsolutePath());




    MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
            .updater(Updater.RMSPROP)
            .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
            .list()
            .layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder()
                    .nIn(28*28).nOut(50)
                    .activation("tanh").build())
            .layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
                    .nIn(50).nOut(10)
                    .activation("softmax")
                    .build())
            .pretrain(false).backprop(true)
            .build();

    SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc,conf,
            new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize)
                    .workerPrefetchNumBatches(5)
                    .batchSizePerWorker(batchSizePerExecutor)
                    .averagingFrequency(1)
                    .repartionData(Repartition.Always)
                    .build());
    sparkNet.setCollectTrainingStats(true);


    //List all the files:
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
    RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);

    List<String> paths = new ArrayList<>();
    while(fileIter.hasNext()){
        String path = fileIter.next().getPath().toString();
        paths.add(path);
    }

    INDArray paramsBefore = sparkNet.getNetwork().params().dup();
    JavaRDD<String> pathRdd = sc.parallelize(paths);
    sparkNet.fitPaths(pathRdd);

    INDArray paramsAfter = sparkNet.getNetwork().params().dup();
    assertNotEquals(paramsBefore, paramsAfter);

    SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
    System.out.println(stats.statsAsString());
}
 
开发者ID:PacktPublishing,项目名称:Deep-Learning-with-Hadoop,代码行数:69,代码来源:TestSparkMultiLayerParameterAveraging.java


注:本文中的org.apache.hadoop.fs.FileSystem.listFiles方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。