本文整理汇总了Java中org.apache.hadoop.tools.util.DistCpUtils.sortListing方法的典型用法代码示例。如果您正苦于以下问题:Java DistCpUtils.sortListing方法的具体用法?Java DistCpUtils.sortListing怎么用?Java DistCpUtils.sortListing使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.tools.util.DistCpUtils
的用法示例。
在下文中一共展示了DistCpUtils.sortListing方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: checkForDuplicates
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
/**
* Validate the final resulting path listing to see if there are any duplicate entries
*
* @param pathToListFile - path listing build by doBuildListing
* @throws IOException - Any issues while checking for duplicates and throws
* @throws DuplicateFileException - if there are duplicates
*/
private void checkForDuplicates(Path pathToListFile)
throws DuplicateFileException, IOException {
Configuration config = getConf();
FileSystem fs = pathToListFile.getFileSystem(config);
Path sortedList = DistCpUtils.sortListing(fs, config, pathToListFile);
SequenceFile.Reader reader = new SequenceFile.Reader(
config, SequenceFile.Reader.file(sortedList));
try {
Text lastKey = new Text("*"); //source relative path can never hold *
FileStatus lastFileStatus = new FileStatus();
Text currentKey = new Text();
while (reader.next(currentKey)) {
if (currentKey.equals(lastKey)) {
FileStatus currentFileStatus = new FileStatus();
reader.getCurrentValue(currentFileStatus);
throw new DuplicateFileException("File " + lastFileStatus.getPath() + " and " +
currentFileStatus.getPath() + " would cause duplicates. Aborting");
}
reader.getCurrentValue(lastFileStatus);
lastKey.set(currentKey);
}
} finally {
IOUtils.closeStream(reader);
}
}
示例2: deleteMissing
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
LOG.info("-delete option is enabled. About to remove entries from " +
"target that are missing in source");
// Sort the source-file listing alphabetically.
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
// Similarly, create the listing of target-files. Sort alphabetically.
Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
List<Path> targets = new ArrayList<Path>(1);
Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
targets.add(targetFinalPath);
DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
//
// Set up options to be the same from the CopyListing.buildListing's perspective,
// so to collect similar listings as when doing the copy
//
options.setOverwrite(overwrite);
options.setSyncFolder(syncFolder);
options.setTargetPathExists(targetPathExists);
target.buildListing(targetListing, options);
Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedSourceListing));
SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedTargetListing));
// Walk both source and target file listings.
// Delete all from target that doesn't also exist on source.
long deletedEntries = 0;
try {
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
Text trgtRelPath = new Text();
FileSystem targetFS = targetFinalPath.getFileSystem(conf);
boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
while (targetReader.next(trgtRelPath, trgtFileStatus)) {
// Skip sources that don't exist on target.
while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
}
if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
// Target doesn't exist at source. Delete.
boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
targetFS.delete(trgtFileStatus.getPath(), true));
if (result) {
LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
deletedEntries++;
} else {
throw new IOException("Unable to delete " + trgtFileStatus.getPath());
}
taskAttemptContext.progress();
taskAttemptContext.setStatus("Deleting missing files from target. [" +
targetReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
IOUtils.closeStream(targetReader);
}
LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
示例3: deleteMissing
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
LOG.info("-delete option is enabled. About to remove entries from " +
"target that are missing in source");
// Sort the source-file listing alphabetically.
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
// Similarly, create the listing of target-files. Sort alphabetically.
Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
List<Path> targets = new ArrayList<Path>(1);
Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
targets.add(targetFinalPath);
DistCpOptions options = new DistCpOptions(targets, new Path("/NONE"));
target.buildListing(targetListing, options);
Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedSourceListing));
SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedTargetListing));
// Walk both source and target file listings.
// Delete all from target that doesn't also exist on source.
long deletedEntries = 0;
try {
FileStatus srcFileStatus = new FileStatus();
Text srcRelPath = new Text();
FileStatus trgtFileStatus = new FileStatus();
Text trgtRelPath = new Text();
FileSystem targetFS = targetFinalPath.getFileSystem(conf);
boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
while (targetReader.next(trgtRelPath, trgtFileStatus)) {
// Skip sources that don't exist on target.
while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
}
if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
// Target doesn't exist at source. Delete.
boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
targetFS.delete(trgtFileStatus.getPath(), true));
if (result) {
LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
deletedEntries++;
} else {
throw new IOException("Unable to delete " + trgtFileStatus.getPath());
}
taskAttemptContext.progress();
taskAttemptContext.setStatus("Deleting missing files from target. [" +
targetReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
IOUtils.closeStream(targetReader);
}
LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}
示例4: deleteMissing
import org.apache.hadoop.tools.util.DistCpUtils; //导入方法依赖的package包/类
private void deleteMissing(Configuration conf) throws IOException {
LOG.info("-delete option is enabled. About to remove entries from " +
"target that are missing in source");
// Sort the source-file listing alphabetically.
Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
FileSystem clusterFS = sourceListing.getFileSystem(conf);
Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing);
// Similarly, create the listing of target-files. Sort alphabetically.
Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq");
CopyListing target = new GlobbedCopyListing(new Configuration(conf), null);
List<Path> targets = new ArrayList<Path>(1);
Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH));
targets.add(targetFinalPath);
Path resultNonePath = Path.getPathWithoutSchemeAndAuthority(targetFinalPath)
.toString().startsWith(DistCpConstants.HDFS_RESERVED_RAW_DIRECTORY_NAME)
? DistCpConstants.RAW_NONE_PATH : DistCpConstants.NONE_PATH;
DistCpOptions options = new DistCpOptions(targets, resultNonePath);
//
// Set up options to be the same from the CopyListing.buildListing's perspective,
// so to collect similar listings as when doing the copy
//
options.setOverwrite(overwrite);
options.setSyncFolder(syncFolder);
options.setTargetPathExists(targetPathExists);
target.buildListing(targetListing, options);
Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing);
long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen();
SequenceFile.Reader sourceReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedSourceListing));
SequenceFile.Reader targetReader = new SequenceFile.Reader(conf,
SequenceFile.Reader.file(sortedTargetListing));
// Walk both source and target file listings.
// Delete all from target that doesn't also exist on source.
long deletedEntries = 0;
try {
CopyListingFileStatus srcFileStatus = new CopyListingFileStatus();
Text srcRelPath = new Text();
CopyListingFileStatus trgtFileStatus = new CopyListingFileStatus();
Text trgtRelPath = new Text();
FileSystem targetFS = targetFinalPath.getFileSystem(conf);
boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
while (targetReader.next(trgtRelPath, trgtFileStatus)) {
// Skip sources that don't exist on target.
while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) {
srcAvailable = sourceReader.next(srcRelPath, srcFileStatus);
}
if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue;
// Target doesn't exist at source. Delete.
boolean result = (!targetFS.exists(trgtFileStatus.getPath()) ||
targetFS.delete(trgtFileStatus.getPath(), true));
if (result) {
LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source");
deletedEntries++;
} else {
throw new IOException("Unable to delete " + trgtFileStatus.getPath());
}
taskAttemptContext.progress();
taskAttemptContext.setStatus("Deleting missing files from target. [" +
targetReader.getPosition() * 100 / totalLen + "%]");
}
} finally {
IOUtils.closeStream(sourceReader);
IOUtils.closeStream(targetReader);
}
LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0));
}