当前位置: 首页>>代码示例>>Java>>正文


Java LocatedFileStatus.isFile方法代码示例

本文整理汇总了Java中org.apache.hadoop.fs.LocatedFileStatus.isFile方法的典型用法代码示例。如果您正苦于以下问题:Java LocatedFileStatus.isFile方法的具体用法?Java LocatedFileStatus.isFile怎么用?Java LocatedFileStatus.isFile使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.fs.LocatedFileStatus的用法示例。


在下文中一共展示了LocatedFileStatus.isFile方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: literalAllSegments

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void literalAllSegments(FileSystem fileSystem, Path dir, Consumer<LocatedFileStatus> consumer) throws IOException {
    RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(dir, true);
    while (files.hasNext()) {
        LocatedFileStatus fileStatus = files.next();
        if (!fileStatus.isFile()) {
            continue;
        }
        if (fileStatus.getLen() == 0) {
            continue;
        }

        Path path = fileStatus.getPath();
        if (checkSegmentByPath(path)) {
            consumer.accept(fileStatus);
        }
    }
}
 
开发者ID:shunfei,项目名称:indexr,代码行数:18,代码来源:SegmentHelper.java

示例2: validateDataDir

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void validateDataDir(String dataDir) {
  try {
    FileSystem hdfs = getHDFS();
    Path dataPath = new Path(dataDir);
    if (!hdfs.exists(dataPath)) {
      log.error("HDFS data directory {} does not exist", dataDir);
      System.exit(-1);
    }
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(dataPath, true);
    while (listIter.hasNext()) {
      LocatedFileStatus status = listIter.next();
      if (status.isFile()) {
        return;
      }
    }
    log.error("HDFS data directory {} has no files", dataDir);
    System.exit(-1);
  } catch (IOException e) {
    throw new IllegalStateException(e);
  }
}
 
开发者ID:astralway,项目名称:webindex,代码行数:22,代码来源:IndexEnv.java

示例3: enqueue

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
private void enqueue(String path) throws IOException {
    Path filePath = new Path(path);
    if (!fs.exists(filePath) || fs.getFileStatus(filePath) == null) {
        log.info("Cannot enqueue file {} because it does not exist but got an event from the FS", filePath.toString());
        return;
    }

    RemoteIterator<LocatedFileStatus> it = fs.listFiles(filePath, false);
    while (it.hasNext()) {
        LocatedFileStatus status = it.next();
        if (!status.isFile() || !fileRegexp.matcher(status.getPath().getName()).find()) continue;
        fileQueue.offer(toMetadata(status));
    }
}
 
开发者ID:mmolimar,项目名称:kafka-connect-fs,代码行数:15,代码来源:HdfsFileWatcherPolicy.java

示例4: isValidFile

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/**
 * Checks whether file is valid candidate.
 * 
 * @param fileStatus
 *            file status holding file name
 * @return true when valid, false otherwise
 */
private final boolean isValidFile(LocatedFileStatus fileStatus) {
	if (fileStatus.isFile()) {
		return WHITELIST_REGEXP.matcher(fileStatus.getPath().getName()).matches();
	} else {
		return false;
	}
}
 
开发者ID:openaire,项目名称:iis,代码行数:15,代码来源:SequenceFileTextValueReader.java

示例5: isValidFile

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
/**
 * Checks whether file is valid
 * 
 * @param fileStatus
 * @return true when valid, false otherwise
 */
private boolean isValidFile(LocatedFileStatus fileStatus) {
	if (fileStatus.isFile()) {
		return whitelistPattern.matcher(
				fileStatus.getPath().getName()).matches();
	}
	/** fallback */
	return false;
}
 
开发者ID:openaire,项目名称:iis,代码行数:15,代码来源:AvroDataStoreReader.java

示例6: getBlockLocations

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public LinkedList<BlockLocation> getBlockLocations(Path path) throws IOException {
	LOG.info("Collecting block locations...");
	
	LinkedList<BlockLocation> blockLocations = new LinkedList<BlockLocation>();
	RemoteIterator<LocatedFileStatus> statuses = listFiles(path, true);
	int hasNextCode = hasNextCode(statuses);
	while(hasNextCode > 0){
		if(hasNextCode > 1){
			hasNextCode = hasNextCode(statuses);
			continue;
		}
		
		LocatedFileStatus fileStatus = statuses.next();
		
		if(fileStatus.isFile()){
			BlockLocation[] blockLocations_tmp = getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
			
			blockLocations.addAll(Arrays.asList(blockLocations_tmp));
		}
		
		int size = blockLocations.size();
		if(size > 0 && size % 5000 == 0)
			LOG.info("Collected " + size + " locations. Still in progress...");
		
		if(size >= MAX_NUMBER_OF_LOCATIONS){
			LOG.info("Reached max number of locations to collect. The amount will be representative enough.");
			break;
		}
		
		hasNextCode = hasNextCode(statuses);
	}
	LOG.info("Collected " + blockLocations.size() + " locations.");
	
	if(isHdfsBlocksMetadataEnabled()){
		BlockStorageLocation[] blockStorageLocations = getFileBlockStorageLocations(blockLocations);
		
		blockLocations.clear();
		blockLocations.addAll(Arrays.asList(blockStorageLocations));
	}else{
		LOG.error("VolumnId/DiskId can not be collected since "
				+ "dfs.datanode.hdfs-blocks-metadata.enabled is not enabled.");
	}
	
	return blockLocations;
}
 
开发者ID:cerndb,项目名称:hdfs-metadata,代码行数:46,代码来源:DistributedFileSystemMetadata.java

示例7: main

import org.apache.hadoop.fs.LocatedFileStatus; //导入方法依赖的package包/类
public static void main(String[] args) throws Exception {

    if (args.length != 1) {
      log.error("Usage: LoadHdfs <dataDir>");
      System.exit(1);
    }
    final String dataDir = args[0];
    IndexEnv.validateDataDir(dataDir);

    final String hadoopConfDir = IndexEnv.getHadoopConfDir();
    final int rateLimit = WebIndexConfig.load().getLoadRateLimit();

    List<String> loadPaths = new ArrayList<>();
    FileSystem hdfs = IndexEnv.getHDFS();
    RemoteIterator<LocatedFileStatus> listIter = hdfs.listFiles(new Path(dataDir), true);
    while (listIter.hasNext()) {
      LocatedFileStatus status = listIter.next();
      if (status.isFile()) {
        loadPaths.add(status.getPath().toString());
      }
    }

    log.info("Loading {} files into Fluo from {}", loadPaths.size(), dataDir);

    SparkConf sparkConf = new SparkConf().setAppName("webindex-load-hdfs");
    try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) {

      JavaRDD<String> paths = ctx.parallelize(loadPaths, loadPaths.size());

      paths.foreachPartition(iter -> {
        final FluoConfiguration fluoConfig = new FluoConfiguration(new File("fluo.properties"));
        final RateLimiter rateLimiter = rateLimit > 0 ? RateLimiter.create(rateLimit) : null;
        FileSystem fs = IndexEnv.getHDFS(hadoopConfDir);
        try (FluoClient client = FluoFactory.newClient(fluoConfig);
            LoaderExecutor le = client.newLoaderExecutor()) {
          iter.forEachRemaining(path -> {
            Path filePath = new Path(path);
            try {
              if (fs.exists(filePath)) {
                FSDataInputStream fsin = fs.open(filePath);
                ArchiveReader reader = WARCReaderFactory.get(filePath.getName(), fsin, true);
                for (ArchiveRecord record : reader) {
                  Page page = ArchiveUtil.buildPageIgnoreErrors(record);
                  if (page.getOutboundLinks().size() > 0) {
                    log.info("Loading page {} with {} links", page.getUrl(), page
                        .getOutboundLinks().size());
                    if (rateLimiter != null) {
                      rateLimiter.acquire();
                    }
                    le.execute(PageLoader.updatePage(page));
                  }
                }
              }
            } catch (IOException e) {
              log.error("Exception while processing {}", path, e);
            }
          });
        }
      });
    }
  }
 
开发者ID:astralway,项目名称:webindex,代码行数:62,代码来源:LoadHdfs.java


注:本文中的org.apache.hadoop.fs.LocatedFileStatus.isFile方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。