本文整理汇总了Java中org.apache.flink.core.fs.FileSystem.listStatus方法的典型用法代码示例。如果您正苦于以下问题:Java FileSystem.listStatus方法的具体用法?Java FileSystem.listStatus怎么用?Java FileSystem.listStatus使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.flink.core.fs.FileSystem
的用法示例。
在下文中一共展示了FileSystem.listStatus方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createInputSplits
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
@Override
public TweetFileInputSplit[] createInputSplits(int minNumSplits) throws IOException {
FileSystem fileSystem = getFileSystem();
FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
logger.info("Found {} files", statuses.length);
List<TweetFileInputSplit> splits = new ArrayList<>();
for (int i = 0; i < statuses.length; i++) {
FileStatus status = statuses[i];
String fileName = status.getPath().getName();
if (fileName.endsWith("edges")) {
splits.add(new TweetFileInputSplit(i, status.getPath()));
}
}
logger.info("Result number of splits: {}", splits.size());
return splits.toArray(new TweetFileInputSplit[splits.size()]);
}
示例2: getFiles
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
protected List<FileStatus> getFiles() throws IOException {
// get all the files that are involved in the splits
List<FileStatus> files = new ArrayList<FileStatus>();
final FileSystem fs = this.filePath.getFileSystem();
final FileStatus pathFile = fs.getFileStatus(this.filePath);
if (pathFile.isDir()) {
// input is directory. list all contained files
final FileStatus[] partials = fs.listStatus(this.filePath);
for (FileStatus partial : partials) {
if (!partial.isDir()) {
files.add(partial);
}
}
} else {
files.add(pathFile);
}
return files;
}
示例3: listNewFiles
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
private List<String> listNewFiles(FileSystem fileSystem) throws IOException {
List<String> files = new ArrayList<String>();
FileStatus[] statuses = fileSystem.listStatus(new Path(path));
if (statuses == null) {
LOG.warn("Path does not exist: {}", path);
} else {
for (FileStatus status : statuses) {
Path filePath = status.getPath();
String fileName = filePath.getName();
long modificationTime = status.getModificationTime();
if (!isFiltered(fileName, modificationTime)) {
files.add(filePath.toString());
modificationTimes.put(fileName, modificationTime);
}
}
}
return files;
}
示例4: addFilesInDir
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
/**
* Enumerate all files in the directory and recursive if enumerateNestedFiles is true.
* @return the total length of accepted files.
*/
private long addFilesInDir(Path path, List<FileStatus> files, boolean logExcludedFiles)
throws IOException {
final FileSystem fs = path.getFileSystem();
long length = 0;
for(FileStatus dir: fs.listStatus(path)) {
if (dir.isDir()) {
if (acceptFile(dir) && enumerateNestedFiles) {
length += addFilesInDir(dir.getPath(), files, logExcludedFiles);
} else {
if (logExcludedFiles && LOG.isDebugEnabled()) {
LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
}
}
}
else {
if(acceptFile(dir)) {
files.add(dir);
length += dir.getLen();
testForUnsplittable(dir);
} else {
if (logExcludedFiles && LOG.isDebugEnabled()) {
LOG.debug("Directory "+dir.getPath().toString()+" did not pass the file-filter and is excluded.");
}
}
}
}
return length;
}
示例5: findTestFiles
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
private static List<String> findTestFiles() throws Exception {
List<String> files = new ArrayList<>();
FileSystem fs = FileSystem.getLocalFileSystem();
FileStatus[] status = fs.listStatus(getBaseTestPythonDir());
for (FileStatus f : status) {
String file = f.getPath().toString();
if (file.endsWith(".py")) {
files.add(file);
}
}
return files;
}
示例6: copy
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
public static void copy(Path sourcePath, Path targetPath, boolean executable) throws IOException {
// TODO rewrite this to make it participate in the closable registry and the lifecycle of a task.
// we unwrap the file system to get raw streams without safety net
FileSystem sFS = FileSystem.getUnguardedFileSystem(sourcePath.toUri());
FileSystem tFS = FileSystem.getUnguardedFileSystem(targetPath.toUri());
if (!tFS.exists(targetPath)) {
if (sFS.getFileStatus(sourcePath).isDir()) {
tFS.mkdirs(targetPath);
FileStatus[] contents = sFS.listStatus(sourcePath);
for (FileStatus content : contents) {
String distPath = content.getPath().toString();
if (content.isDir()) {
if (distPath.endsWith("/")) {
distPath = distPath.substring(0, distPath.length() - 1);
}
}
String localPath = targetPath.toString() + distPath.substring(distPath.lastIndexOf("/"));
copy(content.getPath(), new Path(localPath), executable);
}
} else {
try (FSDataOutputStream lfsOutput = tFS.create(targetPath, FileSystem.WriteMode.NO_OVERWRITE); FSDataInputStream fsInput = sFS.open(sourcePath)) {
IOUtils.copyBytes(fsInput, lfsOutput);
//noinspection ResultOfMethodCallIgnored
new File(targetPath.toString()).setExecutable(executable);
} catch (IOException ioe) {
LOG.error("could not copy file to local file cache.", ioe);
}
}
}
}
示例7: testDirectoryListing
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
@Test
public void testDirectoryListing() throws Exception {
final Configuration conf = new Configuration();
conf.setString("s3.access.key", ACCESS_KEY);
conf.setString("s3.secret.key", SECRET_KEY);
FileSystem.initialize(conf);
final Path directory = new Path("s3://" + BUCKET + '/' + TEST_DATA_DIR + "/testdir/");
final FileSystem fs = directory.getFileSystem();
// directory must not yet exist
assertFalse(fs.exists(directory));
try {
// create directory
assertTrue(fs.mkdirs(directory));
// seems the presto file system does not assume existence of empty directories in S3
assertTrue(fs.exists(directory));
// directory empty
assertEquals(0, fs.listStatus(directory).length);
// create some files
final int numFiles = 3;
for (int i = 0; i < numFiles; i++) {
Path file = new Path(directory, "/file-" + i);
try (FSDataOutputStream out = fs.create(file, WriteMode.NO_OVERWRITE);
OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
writer.write("hello-" + i + "\n");
}
}
FileStatus[] files = fs.listStatus(directory);
assertNotNull(files);
assertEquals(3, files.length);
for (FileStatus status : files) {
assertFalse(status.isDir());
}
// now that there are files, the directory must exist
assertTrue(fs.exists(directory));
}
finally {
// clean up
fs.delete(directory, true);
}
// now directory must be gone
assertFalse(fs.exists(directory));
}
示例8: testDirectoryListing
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
@Test
public void testDirectoryListing() throws Exception {
final Configuration conf = new Configuration();
conf.setString("s3.access-key", ACCESS_KEY);
conf.setString("s3.secret-key", SECRET_KEY);
FileSystem.initialize(conf);
final Path directory = new Path("s3://" + BUCKET + '/' + TEST_DATA_DIR + "/testdir/");
final FileSystem fs = directory.getFileSystem();
// directory must not yet exist
assertFalse(fs.exists(directory));
try {
// create directory
assertTrue(fs.mkdirs(directory));
// seems the presto file system does not assume existence of empty directories in S3
// assertTrue(fs.exists(directory));
// directory empty
assertEquals(0, fs.listStatus(directory).length);
// create some files
final int numFiles = 3;
for (int i = 0; i < numFiles; i++) {
Path file = new Path(directory, "/file-" + i);
try (FSDataOutputStream out = fs.create(file, WriteMode.NO_OVERWRITE);
OutputStreamWriter writer = new OutputStreamWriter(out, StandardCharsets.UTF_8)) {
writer.write("hello-" + i + "\n");
}
}
FileStatus[] files = fs.listStatus(directory);
assertNotNull(files);
assertEquals(3, files.length);
for (FileStatus status : files) {
assertFalse(status.isDir());
}
// now that there are files, the directory must exist
assertTrue(fs.exists(directory));
}
finally {
// clean up
fs.delete(directory, true);
}
// now directory must be gone
assertFalse(fs.exists(directory));
}
示例9: testPersistMultipleMetadataOnlyCheckpoints
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
/**
* Validates that multiple checkpoints from different jobs with the same checkpoint ID do not
* interfere with each other.
*/
@Test
public void testPersistMultipleMetadataOnlyCheckpoints() throws Exception {
final FileSystem fs = FileSystem.getLocalFileSystem();
final Path checkpointDir = new Path(tmp.newFolder().toURI());
final FsStateBackend backend = new FsStateBackend(checkpointDir);
final JobID jobId1 = new JobID();
final JobID jobId2 = new JobID();
final long checkpointId = 177;
final CheckpointStorage storage1 = backend.createCheckpointStorage(jobId1);
final CheckpointStorage storage2 = backend.createCheckpointStorage(jobId2);
final CheckpointStorageLocation loc1 = storage1.initializeLocationForCheckpoint(checkpointId);
final CheckpointStorageLocation loc2 = storage2.initializeLocationForCheckpoint(checkpointId);
final byte[] data1 = {77, 66, 55, 99, 88};
final byte[] data2 = {1, 3, 2, 5, 4};
try (CheckpointStateOutputStream out = loc1.createMetadataOutputStream()) {
out.write(data1);
out.closeAndGetHandle();
}
final String result1 = loc1.markCheckpointAsFinished();
try (CheckpointStateOutputStream out = loc2.createMetadataOutputStream()) {
out.write(data2);
out.closeAndGetHandle();
}
final String result2 = loc2.markCheckpointAsFinished();
// check that this went to a file, but in a nested directory structure
// one directory per job
FileStatus[] files = fs.listStatus(checkpointDir);
assertEquals(2, files.length);
// in each per-job directory, one for the checkpoint
FileStatus[] job1Files = fs.listStatus(files[0].getPath());
FileStatus[] job2Files = fs.listStatus(files[1].getPath());
assertEquals(3, job1Files.length);
assertEquals(3, job2Files.length);
assertTrue(fs.exists(new Path(result1, AbstractFsCheckpointStorage.METADATA_FILE_NAME)));
assertTrue(fs.exists(new Path(result2, AbstractFsCheckpointStorage.METADATA_FILE_NAME)));
validateContents(backend.resolveCheckpoint(result1), data1);
validateContents(backend.resolveCheckpoint(result2), data2);
}
示例10: testBlobServerRecovery
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
/**
* Helper to test that the {@link BlobServer} recovery from its HA store works.
*
* <p>Uploads two BLOBs to one {@link BlobServer} and expects a second one to be able to retrieve
* them via a shared HA store upon request of a {@link BlobCacheService}.
*
* @param config
* blob server configuration (including HA settings like {@link HighAvailabilityOptions#HA_STORAGE_PATH}
* and {@link HighAvailabilityOptions#HA_CLUSTER_ID}) used to set up <tt>blobStore</tt>
* @param blobStore
* shared HA blob store to use
*
* @throws IOException
* in case of failures
*/
public static void testBlobServerRecovery(final Configuration config, final BlobStore blobStore) throws IOException {
final String clusterId = config.getString(HighAvailabilityOptions.HA_CLUSTER_ID);
String storagePath = config.getString(HighAvailabilityOptions.HA_STORAGE_PATH) + "/" + clusterId;
Random rand = new Random();
try (
BlobServer server0 = new BlobServer(config, blobStore);
BlobServer server1 = new BlobServer(config, blobStore);
// use VoidBlobStore as the HA store to force download from server[1]'s HA store
BlobCacheService cache1 = new BlobCacheService(
new InetSocketAddress("localhost", server1.getPort()), config,
new VoidBlobStore())) {
server0.start();
server1.start();
// Random data
byte[] expected = new byte[1024];
rand.nextBytes(expected);
byte[] expected2 = Arrays.copyOfRange(expected, 32, 288);
BlobKey[] keys = new BlobKey[2];
BlobKey nonHAKey;
// Put job-related HA data
JobID[] jobId = new JobID[] { new JobID(), new JobID() };
keys[0] = put(server0, jobId[0], expected, PERMANENT_BLOB); // Request 1
keys[1] = put(server0, jobId[1], expected2, PERMANENT_BLOB); // Request 2
// put non-HA data
nonHAKey = put(server0, jobId[0], expected2, TRANSIENT_BLOB);
verifyKeyDifferentHashEquals(keys[1], nonHAKey);
// check that the storage directory exists
final Path blobServerPath = new Path(storagePath, "blob");
FileSystem fs = blobServerPath.getFileSystem();
assertTrue("Unknown storage dir: " + blobServerPath, fs.exists(blobServerPath));
// Verify HA requests from cache1 (connected to server1) with no immediate access to the file
verifyContents(cache1, jobId[0], keys[0], expected);
verifyContents(cache1, jobId[1], keys[1], expected2);
// Verify non-HA file is not accessible from server1
verifyDeleted(cache1, jobId[0], nonHAKey);
// Remove again
server1.cleanupJob(jobId[0]);
server1.cleanupJob(jobId[1]);
// Verify everything is clean
assertTrue("HA storage directory does not exist", fs.exists(new Path(storagePath)));
if (fs.exists(blobServerPath)) {
final org.apache.flink.core.fs.FileStatus[] recoveryFiles =
fs.listStatus(blobServerPath);
ArrayList<String> filenames = new ArrayList<>(recoveryFiles.length);
for (org.apache.flink.core.fs.FileStatus file: recoveryFiles) {
filenames.add(file.toString());
}
fail("Unclean state backend: " + filenames);
}
}
}
示例11: listEligibleFiles
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
/**
* Returns the paths of the files not yet processed.
* @param fileSystem The filesystem where the monitored directory resides.
*/
private Map<Path, FileStatus> listEligibleFiles(FileSystem fileSystem, Path path) throws IOException {
final FileStatus[] statuses;
try {
statuses = fileSystem.listStatus(path);
} catch (IOException e) {
// we may run into an IOException if files are moved while listing their status
// delay the check for eligible files in this case
return Collections.emptyMap();
}
if (statuses == null) {
LOG.warn("Path does not exist: {}", path);
return Collections.emptyMap();
} else {
Map<Path, FileStatus> files = new HashMap<>();
// handle the new files
for (FileStatus status : statuses) {
if (!status.isDir()) {
Path filePath = status.getPath();
long modificationTime = status.getModificationTime();
if (!shouldIgnore(filePath, modificationTime)) {
files.put(filePath, status);
}
} else if (format.getNestedFileEnumeration() && format.acceptFile(status)){
files.putAll(listEligibleFiles(fileSystem, status.getPath()));
}
}
return files;
}
}
示例12: getStatistics
import org.apache.flink.core.fs.FileSystem; //导入方法依赖的package包/类
@Override
public BaseStatistics getStatistics(BaseStatistics cachedStatistics) throws IOException {
FileSystem fileSystem = getFileSystem();
FileStatus[] statuses = fileSystem.listStatus(new Path(inputPath));
return new GraphStatistics(statuses.length);
}