当前位置: 首页>>代码示例>>Java>>正文


Java DistCpUtils.sortListing方法代码示例

本文整理汇总了Java中org.apache.hadoop.tools.util.DistCpUtils.sortListing方法的典型用法代码示例。如果您正苦于以下问题:Java DistCpUtils.sortListing方法的具体用法?Java DistCpUtils.sortListing怎么用?Java DistCpUtils.sortListing使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.hadoop.tools.util.DistCpUtils的用法示例。


在下文中一共展示了DistCpUtils.sortListing方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: checkForDuplicates

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
/**
 * Validate the final resulting path listing to see if there are any duplicate entries
 *
 * @param pathToListFile - path listing build by doBuildListing
 * @throws IOException - Any issues while checking for duplicates and throws
 * @throws DuplicateFileException - if there are duplicates
 */
private void checkForDuplicates(Path pathToListFile)
    throws DuplicateFileException, IOException {

  Configuration config = getConf();
  FileSystem fs = pathToListFile.getFileSystem(config);

  Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);

  SequenceFile.Reader reader = new SequenceFile.Reader(
                        config, SequenceFile.Reader.file(sortedList));
  try {
    Text lastKey = new Text("*"); //source relative path can never hold *
    FileStatus lastFileStatus = new FileStatus();

    Text currentKey = new Text();
    while (reader.next(currentKey)) {
      if (currentKey.equals(lastKey)) {
        FileStatus currentFileStatus = new FileStatus();
        reader.getCurrentValue(currentFileStatus);
        throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
            currentFileStatus.getPath() + " would cause duplicates. Aborting");
      }
      reader.getCurrentValue(lastFileStatus);
      lastKey.set(currentKey);
    }
  } finally {
    IOUtils.closeStream(reader);
  }
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:37,代码来源:CopyListing.java

示例2: deleteMissing

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
  LOG.info("-delete option is enabled. About to remove entries from " +
      "target that are missing in source");

  // Sort the source-file listing alphabetically.
  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

  // Similarly, create the listing of target-files. Sort alphabetically.
  Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
  CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);

  List<Path> targets = new ArrayList<Path>(1);
  Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targets.add(targetFinalPath);
  DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
  //
  // Set up options to be the same from the CopyListing.buildListing's perspective,
  // so to collect similar listings as when doing the copy
  //
  options.setOverwrite(overwrite);
  options.setSyncFolder(syncFolder);
  options.setTargetPathExists(targetPathExists);
  
  target.buildListing(targetListing, options);
  Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
  long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedSourceListing));
  SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedTargetListing));

  // Walk both source and target file listings.
  // Delete all from target that doesn't also exist on source.
  long deletedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
    Text trgtRelPath = new Text();

    FileSystem targetFS = targetFinalPath.getFileSystem(conf);
    boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
    while (targetReader.next(trgtRelPath, trgtFileStatus)) {
      // Skip sources that don't exist on target.
      while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
        srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
      }

      if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;

      // Target doesn't exist at source. Delete.
      boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
          targetFS.delete(trgtFileStatus.getPath(), true));
      if (result) {
        LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
        deletedEntries++;
      } else {
        throw new IOException("Unable to delete " + trgtFileStatus.getPath());
      }
      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Deleting missing files from target. [" +
          targetReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
    IOUtils.closeStream(targetReader);
  }
  LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:73,代码来源:CopyCommitter.java

示例3: deleteMissing

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
  LOG.info("-delete option is enabled. About to remove entries from " +
      "target that are missing in source");

  // Sort the source-file listing alphabetically.
  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

  // Similarly, create the listing of target-files. Sort alphabetically.
  Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
  CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);

  List<Path> targets = new ArrayList<Path>(1);
  Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targets.add(targetFinalPath);
  DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));

  target.buildListing(targetListing, options);
  Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
  long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedSourceListing));
  SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedTargetListing));

  // Walk both source and target file listings.
  // Delete all from target that doesn't also exist on source.
  long deletedEntries = 0;
  try {
    FileStatus srcFileStatus = new FileStatus();
    Text srcRelPath = new Text();
    FileStatus trgtFileStatus = new FileStatus();
    Text trgtRelPath = new Text();

    FileSystem targetFS = targetFinalPath.getFileSystem(conf);
    boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
    while (targetReader.next(trgtRelPath, trgtFileStatus)) {
      // Skip sources that don't exist on target.
      while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
        srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
      }

      if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;

      // Target doesn't exist at source. Delete.
      boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
          targetFS.delete(trgtFileStatus.getPath(), true));
      if (result) {
        LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
        deletedEntries++;
      } else {
        throw new IOException("Unable to delete " + trgtFileStatus.getPath());
      }
      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Deleting missing files from target. [" +
          targetReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
    IOUtils.closeStream(targetReader);
  }
  LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:66,代码来源:CopyCommitter.java

示例4: deleteMissing

import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
  LOG.info("-delete option is enabled. About to remove entries from " +
      "target that are missing in source");

  // Sort the source-file listing alphabetically.
  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);

  // Similarly, create the listing of target-files. Sort alphabetically.
  Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
  CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);

  List<Path> targets = new ArrayList<Path>(1);
  Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targets.add(targetFinalPath);
  Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath)
      .toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME)
      ? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH;
  DistCpOptions options = new DistCpOptions(targets, resultNonePath);
  //
  // Set up options to be the same from the CopyListing.buildListing's perspective,
  // so to collect similar listings as when doing the copy
  //
  options.setOverwrite(overwrite);
  options.setSyncFolder(syncFolder);
  options.setTargetPathExists(targetPathExists);
  
  target.buildListing(targetListing, options);
  Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
  long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();

  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedSourceListing));
  SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
                               SequenceFile.Reader.file(sortedTargetListing));

  // Walk both source and target file listings.
  // Delete all from target that doesn't also exist on source.
  long deletedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();
    CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
    Text trgtRelPath = new Text();

    FileSystem targetFS = targetFinalPath.getFileSystem(conf);
    boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
    while (targetReader.next(trgtRelPath, trgtFileStatus)) {
      // Skip sources that don't exist on target.
      while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
        srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
      }

      if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;

      // Target doesn't exist at source. Delete.
      boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
          targetFS.delete(trgtFileStatus.getPath(), true));
      if (result) {
        LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
        deletedEntries++;
      } else {
        throw new IOException("Unable to delete " + trgtFileStatus.getPath());
      }
      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Deleting missing files from target. [" +
          targetReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
    IOUtils.closeStream(targetReader);
  }
  LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
 
开发者ID:hopshadoop,项目名称:hops,代码行数:76,代码来源:CopyCommitter.java


注:本文中的org.apache.hadoop.tools.util.DistCpUtils.sortListing方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。