本文整理汇总了Java中org.apache.hadoop.fs.FileSystem.listFiles方法的典型用法代码示例。如果您正苦于以下问题:Java FileSystem.listFiles方法的具体用法?Java FileSystem.listFiles怎么用?Java FileSystem.listFiles使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.FileSystem
的用法示例。
在下文中一共展示了FileSystem.listFiles方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: assertFileCount
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Assert that the number of log files in the target directory is as expected.
* @param fs the target FileSystem
* @param dir the target directory path
* @param expected the expected number of files
* @throws IOException thrown if listing files fails
*/
public void assertFileCount(FileSystem fs, Path dir, int expected)
throws IOException {
RemoteIterator<LocatedFileStatus> i = fs.listFiles(dir, true);
int count = 0;
while (i.hasNext()) {
i.next();
count++;
}
assertTrue("The sink created additional unexpected log files. " + count
+ "files were created", expected >= count);
assertTrue("The sink created too few log files. " + count + "files were "
+ "created", expected <= count);
}
示例2: getOrcFiles
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Get all ORC files present in directory for the specified table and partition/bucket. The ORC
* files returned are in ascending order of the (insertion) time-partition and sequence-id within
* the time-partition.
*
* @param orcDir the ORC store directory
* @param args the arguments in order: table-name, bucket-id, time-partition-id
* @return the list of all ORC files
*/
private String[] getOrcFiles(final String orcDir, final String fileExt, final String... args) {
try {
FileSystem fileSystem = FileSystem.get(conf);
Path distributedPath = new Path(Paths.get(orcDir, args).toString());
ArrayList<String> filePathStrings = new ArrayList<>();
if (fileSystem.exists(distributedPath)) {
RemoteIterator<LocatedFileStatus> fileListItr = fileSystem.listFiles(distributedPath, true);
while (fileListItr != null && fileListItr.hasNext()) {
LocatedFileStatus file = fileListItr.next();
if (!file.getPath().getName().endsWith(fileExt)) {
// exclude CRC files
filePathStrings.add(file.getPath().toUri().toString());
}
}
Collections.sort(filePathStrings);
}
String[] retArray = new String[filePathStrings.size()];
filePathStrings.toArray(retArray);
return retArray;
} catch (IOException e) {
e.printStackTrace();
}
return new String[0];
}
示例3: getFilesCount
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public int getFilesCount(String storeBaseDir, String tableName) {
int filesCount = 0;
try {
FileSystem fs = FileSystem.get(conf);
Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
Path tablePath = new Path(storeBasePath, tableName);
if (fs.exists(tablePath)) {
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
fs.listFiles(tablePath, false);
while (locatedFileStatusRemoteIterator.hasNext()) {
filesCount++;
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
System.out.println("File name is " + next.getPath());
}
}
} catch (IOException e) {
e.printStackTrace();
}
return filesCount;
}
示例4: getORCRecords
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public List<OrcStruct> getORCRecords(String storeBaseDir, String tableName) throws IOException {
List<OrcStruct> orcrecords = new ArrayList<>();
try {
FileSystem fs = FileSystem.get(conf);
Path storeBasePath = new Path(fs.getHomeDirectory(), storeBaseDir);
Path tablePath = new Path(storeBasePath, tableName);
if (fs.exists(tablePath)) {
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator =
fs.listFiles(tablePath, false);
while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
final org.apache.hadoop.hive.ql.io.orc.Reader fis =
OrcFile.createReader(next.getPath(), OrcFile.readerOptions(conf));
RecordReader rows = fis.rows();
while (rows.hasNext()) {
orcrecords.add((OrcStruct) rows.next(null));
}
System.out.println("File name is " + next.getPath());
}
}
} catch (IOException e) {
e.printStackTrace();
}
return orcrecords;
}
示例5: assertListFilesFinds
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* To get this project to compile under Hadoop 1, this code needs to be
* commented out
*
*
* @param fs filesystem
* @param dir dir
* @param subdir subdir
* @param recursive recurse?
* @throws IOException IO problems
*/
public static void assertListFilesFinds(FileSystem fs,
Path dir,
Path subdir,
boolean recursive) throws IOException {
RemoteIterator<LocatedFileStatus> iterator =
fs.listFiles(dir, recursive);
boolean found = false;
int entries = 0;
StringBuilder builder = new StringBuilder();
while (iterator.hasNext()) {
LocatedFileStatus next = iterator.next();
entries++;
builder.append(next.toString()).append('\n');
if (next.getPath().equals(subdir)) {
found = true;
}
}
assertTrue("Path " + subdir
+ " not found in directory " + dir + " : "
+ " entries=" + entries
+ " content"
+ builder.toString(),
found);
}
示例6: publishPlainDataStatistics
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
static DataStatistics publishPlainDataStatistics(Configuration conf,
Path inputDir)
throws IOException {
FileSystem fs = inputDir.getFileSystem(conf);
// obtain input data file statuses
long dataSize = 0;
long fileCount = 0;
RemoteIterator<LocatedFileStatus> iter = fs.listFiles(inputDir, true);
PathFilter filter = new Utils.OutputFileUtils.OutputFilesFilter();
while (iter.hasNext()) {
LocatedFileStatus lStatus = iter.next();
if (filter.accept(lStatus.getPath())) {
dataSize += lStatus.getLen();
++fileCount;
}
}
// publish the plain data statistics
LOG.info("Total size of input data : "
+ StringUtils.humanReadableInt(dataSize));
LOG.info("Total number of input data files : " + fileCount);
return new DataStatistics(dataSize, fileCount, false);
}
示例7: readKeysToSearch
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
static SortedSet<byte []> readKeysToSearch(final Configuration conf)
throws IOException, InterruptedException {
Path keysInputDir = new Path(conf.get(SEARCHER_INPUTDIR_KEY));
FileSystem fs = FileSystem.get(conf);
SortedSet<byte []> result = new TreeSet<byte []>(Bytes.BYTES_COMPARATOR);
if (!fs.exists(keysInputDir)) {
throw new FileNotFoundException(keysInputDir.toString());
}
if (!fs.isDirectory(keysInputDir)) {
throw new UnsupportedOperationException("TODO");
} else {
RemoteIterator<LocatedFileStatus> iterator = fs.listFiles(keysInputDir, false);
while(iterator.hasNext()) {
LocatedFileStatus keyFileStatus = iterator.next();
// Skip "_SUCCESS" file.
if (keyFileStatus.getPath().getName().startsWith("_")) continue;
result.addAll(readFileToSearch(conf, fs, keyFileStatus));
}
}
return result;
}
示例8: getFileCount
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
private int getFileCount(URI uri) throws IOException {
FileSystem fs = gcpUtils.fileSystemForUri(uri);
RemoteIterator<LocatedFileStatus> it = fs.listFiles(new Path(uri), true);
int count = 0;
while (it.hasNext()) {
it.next();
count++;
}
return count;
}
示例9: listFiles
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public Iterator<FileMetadata> listFiles(FileSystem fs) throws IOException {
return new Iterator<FileMetadata>() {
RemoteIterator<LocatedFileStatus> it = fs.listFiles(fs.getWorkingDirectory(), recursive);
LocatedFileStatus current = null;
boolean previous = false;
@Override
public boolean hasNext() {
try {
if (current == null) {
if (!it.hasNext()) return false;
current = it.next();
return hasNext();
}
if (current.isFile() &&
fileRegexp.matcher(current.getPath().getName()).find()) {
return true;
}
current = null;
return hasNext();
} catch (IOException ioe) {
throw new ConnectException(ioe);
}
}
@Override
public FileMetadata next() {
if (!hasNext() && current == null) {
throw new NoSuchElementException("There are no more items");
}
FileMetadata metadata = toMetadata(current);
current = null;
return metadata;
}
};
}
示例10: main
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
String uri = "hdfs://hadoop-master:9000/";
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), config, "root");
FileStatus[] listStatus = fs.listStatus(new Path("/")); for (FileStatus file : listStatus) {
System.out.println("[" + (file.isFile() ? "file" : "dir") + "] " + file.getPath().getName());
}
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"), true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
log.info("block size:{}",fileStatus.getBlockSize());
log.info("owner : {}", fileStatus.getOwner());
log.info("replication : {}" ,fileStatus.getReplication());
log.info("permission : {}", fileStatus.getPermission());
log.info("path name : {}",fileStatus.getPath().getName());
log.info("========block info=========");
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation blockLocation : blockLocations){
log.info("block offset : {}",blockLocation.getOffset());
log.info("block length : {}",blockLocation.getLength());
String[] dataNodes = blockLocation.getHosts();
for (String dataNode : dataNodes){
log.info("dataNode :{}",dataNode);
}
}
}
}
示例11: run
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public int run(String[] args, PrintStream stream) throws Exception {
out = stream;
List<String> paths = parseArgs(args);
if (paths.size() != 1) {
errorln(USAGE);
return E_USAGE;
}
println("Hadoop %s", getVersion());
println("Compiled by %s on %s", getUser(), getDate());
println("Compiled with protoc %s", getProtocVersion());
println("From source with checksum %s", getSrcChecksum());
Configuration conf = getConf();
Path path = new Path(paths.get(0));
FileSystem fs = path.getFileSystem(conf);
println("Filesystem for %s is %s", path, fs);
// examine the FS
Configuration fsConf = fs.getConf();
for (int i = 0; i < props.length; i++) {
showProp(fsConf, (String) props[i][0], (Boolean) props[i][1]);
}
Path root = fs.makeQualified(new Path("/"));
try (DurationInfo d = new DurationInfo(LOG,
"Listing %s", root)) {
println("%s has %d entries", root, fs.listStatus(root).length);
}
String dirName = "dir-" + UUID.randomUUID();
Path dir = new Path(root, dirName);
try (DurationInfo d = new DurationInfo(LOG,
"Creating a directory %s", dir)) {
fs.mkdirs(dir);
}
try {
Path file = new Path(dir, "file");
try (DurationInfo d = new DurationInfo(LOG,
"Creating a file %s", file)) {
FSDataOutputStream data = fs.create(file, true);
data.writeUTF(HELLO);
data.close();
}
try (DurationInfo d = new DurationInfo(LOG,
"Listing %s", dir)) {
fs.listFiles(dir, false);
}
try (DurationInfo d = new DurationInfo(LOG,
"Reading a file %s", file)) {
FSDataInputStream in = fs.open(file);
String utf = in.readUTF();
in.close();
if (!HELLO.equals(utf)) {
throw new IOException("Expected " + file + " to contain the text "
+ HELLO + " -but it has the text \"" + utf + "\"");
}
}
try (DurationInfo d = new DurationInfo(LOG,
"Deleting file %s", file)) {
fs.delete(file, true);
}
} finally {
try (DurationInfo d = new DurationInfo(LOG,
"Deleting directory %s", dir)) {
try {
fs.delete(dir, true);
} catch (Exception e) {
LOG.warn("When deleting {}: ", dir, e);
}
}
}
// Validate parameters.
return SUCCESS;
}
示例12: listHDFSFiles
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
public static RemoteIterator<LocatedFileStatus> listHDFSFiles(FileSystem fs, Path path)
throws IOException {
return fs.listFiles(path, true);
}
示例13: processInputArgument
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
/**
* Processes the input file/folder argument. If the input is a file,
* then it is directly considered for further processing by TraceBuilder.
* If the input is a folder, then all the history logs in the
* input folder are considered for further processing.
*
* If isRecursive is true, then the input path is recursively scanned
* for job history logs for further processing by TraceBuilder.
*
* NOTE: If the input represents a globbed path, then it is first flattened
* and then the individual paths represented by the globbed input
* path are considered for further processing.
*
* @param input input path, possibly globbed
* @param conf configuration
* @param isRecursive whether to recursively traverse the input paths to
* find history logs
* @return the input history log files' paths
* @throws FileNotFoundException
* @throws IOException
*/
static List<Path> processInputArgument(String input, Configuration conf,
boolean isRecursive) throws FileNotFoundException, IOException {
Path inPath = new Path(input);
FileSystem fs = inPath.getFileSystem(conf);
FileStatus[] inStatuses = fs.globStatus(inPath);
List<Path> inputPaths = new LinkedList<Path>();
if (inStatuses == null || inStatuses.length == 0) {
return inputPaths;
}
for (FileStatus inStatus : inStatuses) {
Path thisPath = inStatus.getPath();
if (inStatus.isDirectory()) {
// Find list of files in this path(recursively if -recursive option
// is specified).
List<FileStatus> historyLogs = new ArrayList<FileStatus>();
RemoteIterator<LocatedFileStatus> iter =
fs.listFiles(thisPath, isRecursive);
while (iter.hasNext()) {
LocatedFileStatus child = iter.next();
String fileName = child.getPath().getName();
if (!(fileName.endsWith(".crc") || fileName.startsWith("."))) {
historyLogs.add(child);
}
}
if (historyLogs.size() > 0) {
// Add the sorted history log file names in this path to the
// inputPaths list
FileStatus[] sortableNames =
historyLogs.toArray(new FileStatus[historyLogs.size()]);
Arrays.sort(sortableNames, new HistoryLogsComparator());
for (FileStatus historyLog : sortableNames) {
inputPaths.add(historyLog.getPath());
}
}
} else {
inputPaths.add(thisPath);
}
}
return inputPaths;
}
示例14: testFitViaStringPaths
import org.apache.hadoop.fs.FileSystem; //导入方法依赖的package包/类
@Test
public void testFitViaStringPaths() throws Exception {
Path tempDir = Files.createTempDirectory("DL4J-testFitViaStringPaths");
File tempDirF = tempDir.toFile();
tempDirF.deleteOnExit();
int dataSetObjSize = 5;
int batchSizePerExecutor = 25;
DataSetIterator iter = new MnistDataSetIterator(dataSetObjSize,1000,false);
int i=0;
while(iter.hasNext()){
File nextFile = new File(tempDirF, i + ".bin");
DataSet ds = iter.next();
ds.save(nextFile);
i++;
}
System.out.println("Saved to: " + tempDirF.getAbsolutePath());
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
.updater(Updater.RMSPROP)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
.list()
.layer(0, new org.deeplearning4j.nn.conf.layers.DenseLayer.Builder()
.nIn(28*28).nOut(50)
.activation("tanh").build())
.layer(1, new org.deeplearning4j.nn.conf.layers.OutputLayer.Builder(LossFunctions.LossFunction.MCXENT)
.nIn(50).nOut(10)
.activation("softmax")
.build())
.pretrain(false).backprop(true)
.build();
SparkDl4jMultiLayer sparkNet = new SparkDl4jMultiLayer(sc,conf,
new ParameterAveragingTrainingMaster.Builder(numExecutors(), dataSetObjSize)
.workerPrefetchNumBatches(5)
.batchSizePerWorker(batchSizePerExecutor)
.averagingFrequency(1)
.repartionData(Repartition.Always)
.build());
sparkNet.setCollectTrainingStats(true);
//List all the files:
Configuration config = new Configuration();
FileSystem hdfs = FileSystem.get(tempDir.toUri(), config);
RemoteIterator<LocatedFileStatus> fileIter = hdfs.listFiles(new org.apache.hadoop.fs.Path(tempDir.toString()), false);
List<String> paths = new ArrayList<>();
while(fileIter.hasNext()){
String path = fileIter.next().getPath().toString();
paths.add(path);
}
INDArray paramsBefore = sparkNet.getNetwork().params().dup();
JavaRDD<String> pathRdd = sc.parallelize(paths);
sparkNet.fitPaths(pathRdd);
INDArray paramsAfter = sparkNet.getNetwork().params().dup();
assertNotEquals(paramsBefore, paramsAfter);
SparkTrainingStats stats = sparkNet.getSparkTrainingStats();
System.out.println(stats.statsAsString());
}
开发者ID:PacktPublishing,项目名称:Deep-Learning-with-Hadoop,代码行数:69,代码来源:TestSparkMultiLayerParameterAveraging.java