本文整理汇总了Java中org.apache.hadoop.fs.LocatedFileStatus.isFile方法的典型用法代码示例。如果您正苦于以下问题:Java LocatedFileStatus.isFile方法的具体用法?Java LocatedFileStatus.isFile怎么用?Java LocatedFileStatus.isFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.fs.LocatedFileStatus
的用法示例。
在下文中一共展示了LocatedFileStatus.isFile方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: literalAllSegments
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
while (files.hasNext()) {
LocatedFileStatus fileStatus = files.next();
if (!fileStatus.isFile()) {
continue;
}
if (fileStatus.getLen() == 0) {
continue;
}
Path path = fileStatus.getPath();
if (checkSegmentByPath(path)) {
consumer.accept(fileStatus);
}
}
}
示例2: validateDataDir
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void validateDataDir(String dataDir) {
try {
FileSystem hdfs = getHDFS();
Path dataPath = new Path(dataDir);
if (!hdfs.exists(dataPath)) {
log.error("HDFS data directory {} does not exist", dataDir);
System.exit(-1);
}
RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(dataPath, true);
while (listIter.hasNext()) {
LocatedFileStatus status = listIter.next();
if (status.isFile()) {
return;
}
}
log.error("HDFS data directory {} has no files", dataDir);
System.exit(-1);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
示例3: enqueue
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private void enqueue(String path) throws IOException {
Path filePath = new Path(path);
if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
log.info("Cannot enqueue file {} because it does not exist but got an event from the FS", filePath.toString());
return;
}
RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
while (it.hasNext()) {
LocatedFileStatus status = it.next();
if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue;
fileQueue.offer(toMetadata(status));
}
}
示例4: isValidFile
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/**
* Checks whether file is valid candidate.
*
* @param fileStatus
* file status holding file name
* @return true when valid, false otherwise
*/
private final boolean isValidFile(LocatedFileStatus fileStatus) {
if (fileStatus.isFile()) {
return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches();
} else {
return false;
}
}
示例5: isValidFile
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/**
* Checks whether file is valid
*
* @param fileStatus
* @return true when valid, false otherwise
*/
private boolean isValidFile(LocatedFileStatus fileStatus) {
if (fileStatus.isFile()) {
return whitelistPattern.matcher(
fileStatus.getPath().getName()).matches();
}
/** fallback */
return false;
}
示例6: getBlockLocations
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException {
LOG.info("Collecting block locations...");
LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>();
RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true);
int hasNextCode = hasNextCode(statuses);
while(hasNextCode > 0){
if(hasNextCode > 1){
hasNextCode = hasNextCode(statuses);
continue;
}
LocatedFileStatus fileStatus = statuses.next();
if(fileStatus.isFile()){
BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
blockLocations.addAll(Arrays.asList(blockLocations_tmp));
}
int size = blockLocations.size();
if(size > 0 && size % 5000 == 0)
LOG.info("Collected " + size + " locations. Still in progress...");
if(size >= MAX_NUMBER_OF_LOCATIONS){
LOG.info("Reached max number of locations to collect. The amount will be representative enough.");
break;
}
hasNextCode = hasNextCode(statuses);
}
LOG.info("Collected " + blockLocations.size() + " locations.");
if(isHdfsBlocksMetadataEnabled()){
BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations);
blockLocations.clear();
blockLocations.addAll(Arrays.asList(blockStorageLocations));
}else{
LOG.error("VolumnId/DiskId can not be collected since "
+ "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled.");
}
return blockLocations;
}
示例7: main
import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 1) {
log.error("Usage: LoadHdfs <dataDir>");
System.exit(1);
}
final String dataDir = args[0];
IndexEnv.validateDataDir(dataDir);
final String hadoopConfDir = IndexEnv.getHadoopConfDir();
final int rateLimit = WebIndexConfig.load().getLoadRateLimit();
List<String> loadPaths = new ArrayList<>();
FileSystem hdfs = IndexEnv.getHDFS();
RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
while (listIter.hasNext()) {
LocatedFileStatus status = listIter.next();
if (status.isFile()) {
loadPaths.add(status.getPath().toString());
}
}
log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);
SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {
JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());
paths.foreachPartition(iter -> {
final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties"));
final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
try (FluoClient client = FluoFactory.newClient(fluoConfig);
LoaderExecutor le = client.newLoaderExecutor()) {
iter.forEachRemaining(path -> {
Path filePath = new Path(path);
try {
if (fs.exists(filePath)) {
FSDataInputStream fsin = fs.open(filePath);
ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
for (ArchiveRecord record : reader) {
Page page = ArchiveUtil.buildPageIgnoreErrors(record);
if (page.getOutboundLinks().size() > 0) {
log.info("Loading page {} with {} links", page.getUrl(), page
.getOutboundLinks().size());
if (rateLimiter != null) {
rateLimiter.acquire();
}
le.execute(PageLoader.updatePage(page));
}
}
}
} catch (IOException e) {
log.error("Exception while processing {}", path, e);
}
});
}
});
}
}