当前位置: 首页>>代码示例>>Java>>正文


Java DistCpUtils类代码示例

本文整理汇总了Java中org.apache.hadoop.tools.util.DistCpUtils的典型用法代码示例。如果您正苦于以下问题:Java DistCpUtils类的具体用法?Java DistCpUtils怎么用?Java DistCpUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


DistCpUtils类属于org.apache.hadoop.tools.util包,在下文中一共展示了DistCpUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: writeToFileListing

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
                                CopyListingFileStatus fileStatus,
                                Path sourcePathRoot,
                                DistCpOptions options) throws IOException {
  if (LOG.isDebugEnabled()) {
    LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
  }

  FileStatus status = fileStatus;

  if (!shouldCopy(fileStatus.getPath(), options)) {
    return;
  }

  fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath())), status);
  fileListWriter.sync();

  if (!fileStatus.isDirectory()) {
    totalBytesToCopy += fileStatus.getLen();
  }
  totalPaths++;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:25,代码来源:SimpleCopyListing.java

示例2: compareCheckSums

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void compareCheckSums(FileSystem sourceFS, Path source,
    FileChecksum sourceChecksum, FileSystem targetFS, Path target)
    throws IOException {
  if (!DistCpUtils.checksumsAreEqual(sourceFS, source, sourceChecksum,
      targetFS, target)) {
    StringBuilder errorMessage = new StringBuilder("Check-sum mismatch between ")
        .append(source).append(" and ").append(target).append(".");
    if (sourceFS.getFileStatus(source).getBlockSize() != targetFS.getFileStatus(target).getBlockSize()) {
      errorMessage.append(" Source and target differ in block-size.")
          .append(" Use -pb to preserve block-sizes during copy.")
          .append(" Alternatively, skip checksum-checks altogether, using -skipCrc.")
				.append(" (NOTE: By skipping checksums, one runs the risk of masking data-corruption during file-transfer.)");
    }
    throw new IOException(errorMessage.toString());
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:RetriableFileCopyCommand.java

示例3: preserveFileAttributesForDirectories

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void preserveFileAttributesForDirectories(Configuration conf) throws IOException {
  String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
  final boolean syncOrOverwrite = syncFolder || overwrite;

  LOG.info("About to preserve attributes: " + attrSymbols);

  EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);
  final boolean preserveRawXattrs =
      conf.getBoolean(DistCpConstants.CONF_LABEL_PRESERVE_RAWXATTRS, false);

  Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
  FileSystem clusterFS = sourceListing.getFileSystem(conf);
  SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
                                    SequenceFile.Reader.file(sourceListing));
  long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

  Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

  long preservedEntries = 0;
  try {
    CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
    Text srcRelPath = new Text();

    // Iterate over every source path that was copied.
    while (sourceReader.next(srcRelPath, srcFileStatus)) {
      // File-attributes for files are set at the time of copy,
      // in the map-task.
      if (! srcFileStatus.isDirectory()) continue;

      Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);
      //
      // Skip the root folder when syncOrOverwrite is true.
      //
      if (targetRoot.equals(targetFile) && syncOrOverwrite) continue;

      FileSystem targetFS = targetFile.getFileSystem(conf);
      DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes,
          preserveRawXattrs);

      taskAttemptContext.progress();
      taskAttemptContext.setStatus("Preserving status on directory entries. [" +
          sourceReader.getPosition() * 100 / totalLen + "%]");
    }
  } finally {
    IOUtils.closeStream(sourceReader);
  }
  LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}
 
开发者ID:naver,项目名称:hadoop,代码行数:49,代码来源:CopyCommitter.java

示例4: setup

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Implementation of the Mapper::setup() method. This extracts the DistCp-
 * options specified in the Job's configuration, to set up the Job.
 * @param context Mapper's context.
 * @throws IOException On IO failure.
 * @throws InterruptedException If the job is interrupted.
 */
@Override
public void setup(Context context) throws IOException, InterruptedException {
  conf = context.getConfiguration();

  syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
  ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
  skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
  overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
  append = conf.getBoolean(DistCpOptionSwitch.APPEND.getConfigLabel(), false);
  preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
      PRESERVE_STATUS.getConfigLabel()));

  targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
  Path targetFinalPath = new Path(conf.get(
          DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targetFS = targetFinalPath.getFileSystem(conf);

  if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
    overWrite = true; // When target is an existing file, overwrite it.
  }

  if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
    initializeSSLConf(context);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:33,代码来源:CopyMapper.java

示例5: canSkip

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private boolean canSkip(FileSystem sourceFS, FileStatus source, 
    FileStatus target) throws IOException {
  if (!syncFolders) {
    return true;
  }
  boolean sameLength = target.getLen() == source.getLen();
  boolean sameBlockSize = source.getBlockSize() == target.getBlockSize()
      || !preserve.contains(FileAttribute.BLOCKSIZE);
  if (sameLength && sameBlockSize) {
    return skipCrc ||
        DistCpUtils.checksumsAreEqual(sourceFS, source.getPath(), null,
            targetFS, target.getPath());
  } else {
    return false;
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:CopyMapper.java

示例6: createSplits

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
                                      List<DynamicInputChunk> chunks)
        throws IOException {
  int numMaps = getNumMapTasks(jobContext.getConfiguration());

  final int nSplits = Math.min(numMaps, chunks.size());
  List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
  
  for (int i=0; i< nSplits; ++i) {
    TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
    chunks.get(i).assignTo(taskId);
    splits.add(new FileSplit(chunks.get(i).getPath(), 0,
        // Setting non-zero length for FileSplit size, to avoid a possible
        // future when 0-sized file-splits are considered "empty" and skipped
        // over.
        getMinRecordsPerChunk(jobContext.getConfiguration()),
        null));
  }
  DistCpUtils.publish(jobContext.getConfiguration(),
                      CONF_LABEL_NUM_SPLITS, splits.size());
  return splits;
}
 
开发者ID:naver,项目名称:hadoop,代码行数:23,代码来源:DynamicInputFormat.java

示例7: appendToConf

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Add options to configuration. These will be used in the Mapper/committer
 *
 * @param conf - Configruation object to which the options need to be added
 */
public void appendToConf(Configuration conf) {
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
      String.valueOf(atomicCommit));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
      String.valueOf(ignoreFailures));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
      String.valueOf(syncFolder));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
      String.valueOf(deleteMissing));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
      String.valueOf(overwrite));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.APPEND,
      String.valueOf(append));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DIFF,
      String.valueOf(useDiff));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
      String.valueOf(skipCRC));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
      String.valueOf(mapBandwidth));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
      DistCpUtils.packAttributes(preserveStatus));
}
 
开发者ID:naver,项目名称:hadoop,代码行数:28,代码来源:DistCpOptions.java

示例8: testCopyingExistingFiles

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void testCopyingExistingFiles(FileSystem fs, CopyMapper copyMapper,
    Mapper<Text, CopyListingFileStatus, Text, Text>.Context context) {
  try {
    for (Path path : pathList) {
      copyMapper.map(new Text(DistCpUtils.getRelativePath(new Path(SOURCE_PATH), path)),
              new CopyListingFileStatus(fs.getFileStatus(path)), context);
    }

    Assert.assertEquals(nFiles,
            context.getCounter(CopyMapper.Counter.SKIP).getValue());
  }
  catch (Exception exception) {
    Assert.assertTrue("Caught unexpected exception:" + exception.getMessage(),
            false);
  }
}
 
开发者ID:naver,项目名称:hadoop,代码行数:17,代码来源:TestCopyMapper.java

示例9: addToFileListing

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Write a single file/directory to the sequence file.
 * @throws IOException
 */
private void addToFileListing(SequenceFile.Writer fileListWriter,
    Path sourceRoot, Path path, DistCpOptions options) throws IOException {
  sourceRoot = getPathWithSchemeAndAuthority(sourceRoot);
  path = getPathWithSchemeAndAuthority(path);
  path = makeQualified(path);

  FileSystem sourceFS = sourceRoot.getFileSystem(getConf());
  FileStatus fileStatus = sourceFS.getFileStatus(path);
  final boolean preserveAcls = options.shouldPreserve(FileAttribute.ACL);
  final boolean preserveXAttrs = options.shouldPreserve(FileAttribute.XATTR);
  final boolean preserveRawXAttrs = options.shouldPreserveRawXattrs();
  CopyListingFileStatus fileCopyListingStatus =
      DistCpUtils.toCopyListingFileStatus(sourceFS, fileStatus,
          preserveAcls, preserveXAttrs, preserveRawXAttrs);

  writeToFileListingRoot(fileListWriter, fileCopyListingStatus,
      sourceRoot, options);
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:23,代码来源:SimpleCopyListing.java

示例10: writeToFileListing

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
                                CopyListingFileStatus fileStatus,
                                Path sourcePathRoot) throws IOException {
  if (LOG.isDebugEnabled()) {
    LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
  }

  if (!shouldCopy(fileStatus.getPath())) {
    return;
  }

  fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath())), fileStatus);
  fileListWriter.sync();

  if (!fileStatus.isDirectory()) {
    totalBytesToCopy += fileStatus.getLen();
  } else {
    totalDirs++;
  }
  totalPaths++;
  maybePrintStats();
}
 
开发者ID:aliyun-beta,项目名称:aliyun-oss-hadoop-fs,代码行数:25,代码来源:SimpleCopyListing.java

示例11: appendToConf

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Add options to configuration. These will be used in the Mapper/committer
 *
 * @param conf - Configruation object to which the options need to be added
 */
public void appendToConf(Configuration conf) {
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
      String.valueOf(atomicCommit));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
      String.valueOf(ignoreFailures));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
      String.valueOf(syncFolder));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
      String.valueOf(deleteMissing));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
      String.valueOf(overwrite));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.APPEND,
      String.valueOf(append));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
      String.valueOf(skipCRC));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
      String.valueOf(mapBandwidth));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
      DistCpUtils.packAttributes(preserveStatus));
}
 
开发者ID:Nextzero,项目名称:hadoop-2.6.0-cdh5.4.3,代码行数:26,代码来源:DistCpOptions.java

示例12: writeToFileListing

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
                                FileStatus fileStatus, Path sourcePathRoot,
                                boolean localFile) throws IOException {
  if (fileStatus.getPath().equals(sourcePathRoot) && fileStatus.isDirectory())
    return; // Skip the root-paths.

  if (LOG.isDebugEnabled()) {
    LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
  }

  FileStatus status = fileStatus;
  if (localFile) {
    status = getFileStatus(fileStatus);
  }

  fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
      fileStatus.getPath())), status);
  fileListWriter.sync();

  if (!fileStatus.isDirectory()) {
    totalBytesToCopy += fileStatus.getLen();
  }
  totalPaths++;
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:26,代码来源:SimpleCopyListing.java

示例13: setup

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Implementation of the Mapper::setup() method. This extracts the DistCp-
 * options specified in the Job's configuration, to set up the Job.
 * @param context Mapper's context.
 * @throws IOException On IO failure.
 * @throws InterruptedException If the job is interrupted.
 */
@Override
public void setup(Context context) throws IOException, InterruptedException {
  conf = context.getConfiguration();

  syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false);
  ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false);
  skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false);
  overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false);
  preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.
      PRESERVE_STATUS.getConfigLabel()));

  targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));
  Path targetFinalPath = new Path(conf.get(
          DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
  targetFS = targetFinalPath.getFileSystem(conf);

  if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) {
    overWrite = true; // When target is an existing file, overwrite it.
  }

  if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) {
    initializeSSLConf(context);
  }
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:32,代码来源:CopyMapper.java

示例14: createSplits

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
private List<InputSplit> createSplits(JobContext jobContext,
                                      List<DynamicInputChunk> chunks)
        throws IOException {
  int numMaps = getNumMapTasks(jobContext.getConfiguration());

  final int nSplits = Math.min(numMaps, chunks.size());
  List<InputSplit> splits = new ArrayList<InputSplit>(nSplits);
  
  for (int i=0; i< nSplits; ++i) {
    TaskID taskId = new TaskID(jobContext.getJobID(), TaskType.MAP, i);
    chunks.get(i).assignTo(taskId);
    splits.add(new FileSplit(chunks.get(i).getPath(), 0,
        // Setting non-zero length for FileSplit size, to avoid a possible
        // future when 0-sized file-splits are considered "empty" and skipped
        // over.
        MIN_RECORDS_PER_CHUNK,
        null));
  }
  DistCpUtils.publish(jobContext.getConfiguration(),
                      CONF_LABEL_NUM_SPLITS, splits.size());
  return splits;
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:23,代码来源:DynamicInputFormat.java

示例15: appendToConf

import org.apache.hadoop.tools.util.DistCpUtils; //导入依赖的package包/类
/**
 * Add options to configuration. These will be used in the Mapper/committer
 *
 * @param conf - Configruation object to which the options need to be added
 */
public void appendToConf(Configuration conf) {
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.ATOMIC_COMMIT,
      String.valueOf(atomicCommit));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.IGNORE_FAILURES,
      String.valueOf(ignoreFailures));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SYNC_FOLDERS,
      String.valueOf(syncFolder));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.DELETE_MISSING,
      String.valueOf(deleteMissing));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.OVERWRITE,
      String.valueOf(overwrite));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.SKIP_CRC,
      String.valueOf(skipCRC));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.BANDWIDTH,
      String.valueOf(mapBandwidth));
  DistCpOptionSwitch.addToConf(conf, DistCpOptionSwitch.PRESERVE_STATUS,
      DistCpUtils.packAttributes(preserveStatus));
}
 
开发者ID:ict-carch,项目名称:hadoop-plus,代码行数:24,代码来源:DistCpOptions.java


注:本文中的org.apache.hadoop.tools.util.DistCpUtils类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。