本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.Writer方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.Writer方法的具体用法?Java SequenceFile.Writer怎么用?Java SequenceFile.Writer使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.SequenceFile
的用法示例。
在下文中一共展示了SequenceFile.Writer方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: doBuildListing
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Collect the list of <sourceRelativePath, sourceFileStatus> to be copied and write to the sequence file. In essence,
* any file or directory that need to be copied or sync-ed is written as an entry to the sequence file, with the
* possible exception of the source root: when either -update (sync) or -overwrite switch is specified, and if the the
* source root is a directory, then the source root entry is not written to the sequence file, because only the
* contents of the source directory need to be copied in this case. See
* {@link com.hotels.bdp.circustrain.s3mapreducecp.util.ConfigurationUtil#getRelativePath} for how relative path is
* computed. See computeSourceRootPath method for how the root path of the source is computed.
*
* @param fileListWriter
* @param options
* @param globbedPaths
* @throws IOException
*/
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter, S3MapReduceCpOptions options) throws IOException {
List<Path> globbedPaths = new ArrayList<>(options.getSources().size());
for (Path sourcePath : options.getSources()) {
FileSystem fs = sourcePath.getFileSystem(getConf());
FileStatus sourceFileStatus = fs.getFileStatus(sourcePath);
if (sourceFileStatus.isFile()) {
LOG.debug("Adding path {}", sourceFileStatus.getPath());
globbedPaths.add(sourceFileStatus.getPath());
} else {
FileStatus[] inputs = fs.globStatus(sourcePath);
if (inputs != null && inputs.length > 0) {
for (FileStatus onePath : inputs) {
LOG.debug("Adding path {}", onePath.getPath());
globbedPaths.add(onePath.getPath());
}
} else {
throw new InvalidInputException("Source path " + sourcePath + " doesn't exist");
}
}
}
doBuildListing(fileListWriter, options, globbedPaths);
}
示例2: traverseNonEmptyDirectory
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void traverseNonEmptyDirectory(
SequenceFile.Writer fileListWriter,
FileStatus sourceStatus,
Path sourcePathRoot,
S3MapReduceCpOptions options)
throws IOException {
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
Stack<FileStatus> pathStack = new Stack<>();
pathStack.push(sourceStatus);
while (!pathStack.isEmpty()) {
for (FileStatus child : getChildren(sourceFS, pathStack.pop())) {
if (child.isFile()) {
LOG.debug("Recording source-path: {} for copy.", sourceStatus.getPath());
CopyListingFileStatus childCopyListingStatus = new CopyListingFileStatus(child);
writeToFileListing(fileListWriter, childCopyListingStatus, sourcePathRoot, options);
}
if (isDirectoryAndNotEmpty(sourceFS, child)) {
LOG.debug("Traversing non-empty source dir: {}", sourceStatus.getPath());
pathStack.push(child);
}
}
}
}
示例3: writeToFileListing
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void writeToFileListing(
SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
S3MapReduceCpOptions options)
throws IOException {
LOG.debug("REL PATH: {}, FULL PATH: {}", PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath()),
fileStatus.getPath());
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
return;
}
fileListWriter.append(new Text(PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath())), status);
fileListWriter.sync();
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
}
totalPaths++;
}
示例4: writeToFileListing
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
DistCpOptions options) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
}
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
return;
}
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath())), status);
fileListWriter.sync();
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
}
totalPaths++;
}
示例5: createFiles
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random,
Job job) throws IOException {
Range[] ranges = createRanges(length, numFiles, random);
for (int i = 0; i < numFiles; i++) {
Path file = new Path(workDir, "test_" + i + ".seq");
// create a file with length entries
@SuppressWarnings("deprecation")
SequenceFile.Writer writer =
SequenceFile.createWriter(localFs, job.getConfiguration(), file,
IntWritable.class, BytesWritable.class);
Range range = ranges[i];
try {
for (int j = range.start; j < range.end; j++) {
IntWritable key = new IntWritable(j);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
}
}
示例6: createWriters
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static SequenceFile.Writer[] createWriters(Path testdir,
Configuration conf, int srcs, Path[] src) throws IOException {
for (int i = 0; i < srcs; ++i) {
src[i] = new Path(testdir, Integer.toString(i + 10, 36));
}
SequenceFile.Writer out[] = new SequenceFile.Writer[srcs];
for (int i = 0; i < srcs; ++i) {
out[i] = new SequenceFile.Writer(testdir.getFileSystem(conf), conf,
src[i], IntWritable.class, IntWritable.class);
}
return out;
}
示例7: createControlFiles
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Create control files before a test run.
* Number of files created is equal to the number of maps specified
*
* @throws IOException on error
*/
private static void createControlFiles() throws IOException {
FileSystem tempFS = FileSystem.get(config);
LOG.info("Creating " + numberOfMaps + " control files");
for (int i = 0; i < numberOfMaps; i++) {
String strFileName = "NNBench_Controlfile_" + i;
Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
strFileName);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class,
LongWritable.class, CompressionType.NONE);
writer.append(new Text(strFileName), new LongWritable(0l));
} finally {
if (writer != null) {
writer.close();
}
}
}
}
示例8: createFiles
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random)
throws IOException {
Range[] ranges = createRanges(length, numFiles, random);
for (int i = 0; i < numFiles; i++) {
Path file = new Path(workDir, "test_" + i + ".seq");
// create a file with length entries
@SuppressWarnings("deprecation")
SequenceFile.Writer writer =
SequenceFile.createWriter(localFs, conf, file,
IntWritable.class, BytesWritable.class);
Range range = ranges[i];
try {
for (int j = range.start; j < range.end; j++) {
IntWritable key = new IntWritable(j);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
}
}
示例9: writeSimpleSrc
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static Path[] writeSimpleSrc(Path testdir, JobConf conf,
int srcs) throws IOException {
SequenceFile.Writer out[] = null;
Path[] src = new Path[srcs];
try {
out = createWriters(testdir, conf, srcs, src);
final int capacity = srcs * 2 + 1;
Text key = new Text();
key.set("ignored");
Text val = new Text();
for (int k = 0; k < capacity; ++k) {
for (int i = 0; i < srcs; ++i) {
val.set(Integer.toString(k % srcs == 0 ? k * srcs : k * srcs + i) +
"\t" + Integer.toString(10 * k + i));
out[i].append(key, val);
if (i == k) {
// add duplicate key
out[i].append(key, val);
}
}
}
} finally {
if (out != null) {
for (int i = 0; i < srcs; ++i) {
if (out[i] != null)
out[i].close();
}
}
}
return src;
}
示例10: getWriter
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
FileSystem fs = pathToListFile.getFileSystem(getConf());
if (fs.exists(pathToListFile)) {
fs.delete(pathToListFile, false);
}
return SequenceFile.createWriter(getConf(), SequenceFile.Writer.file(pathToListFile),
SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
示例11: writePartitions
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Write out a {@link SequenceFile} that can be read by
* {@link TotalOrderPartitioner} that contains the split points in startKeys.
*/
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath,
List<ImmutableBytesWritable> startKeys) throws IOException {
LOG.info("Writing partition information to " + partitionsPath);
if (startKeys.isEmpty()) {
throw new IllegalArgumentException("No regions passed");
}
// We're generating a list of split points, and we don't ever
// have keys < the first region (which has an empty start key)
// so we need to remove it. Otherwise we would end up with an
// empty reducer with index 0
TreeSet<ImmutableBytesWritable> sorted =
new TreeSet<ImmutableBytesWritable>(startKeys);
ImmutableBytesWritable first = sorted.first();
if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
throw new IllegalArgumentException(
"First region of table should have empty start key. Instead has: "
+ Bytes.toStringBinary(first.get()));
}
sorted.remove(first);
// Write the actual file
FileSystem fs = partitionsPath.getFileSystem(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(
fs, conf, partitionsPath, ImmutableBytesWritable.class,
NullWritable.class);
try {
for (ImmutableBytesWritable startKey : sorted) {
writer.append(startKey, NullWritable.get());
}
} finally {
writer.close();
}
}
示例12: getWriter
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private SequenceFile.Writer getWriter(Path pathToListFile) throws IOException {
FileSystem fs = pathToListFile.getFileSystem(getConf());
if (fs.exists(pathToListFile)) {
fs.delete(pathToListFile, false);
}
return SequenceFile.createWriter(getConf(),
SequenceFile.Writer.file(pathToListFile),
SequenceFile.Writer.keyClass(Text.class),
SequenceFile.Writer.valueClass(CopyListingFileStatus.class),
SequenceFile.Writer.compression(SequenceFile.CompressionType.NONE));
}
示例13: createControlFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createControlFile(
FileSystem fs,
int fileSize, // in MB
int nrFiles
) throws IOException {
LOG.info("creating control file: "+fileSize+" mega bytes, "+nrFiles+" files");
fs.delete(CONTROL_DIR, true);
for(int i=0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(CONTROL_DIR, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, fsConfig, controlFile,
Text.class, LongWritable.class,
CompressionType.NONE);
writer.append(new Text(name), new LongWritable(fileSize));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: "+nrFiles+" files");
}
示例14: getRecordWriter
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public RecordWriter<K, V> getRecordWriter(
FileSystem ignored, JobConf job,
String name, Progressable progress)
throws IOException {
// get the path of the temporary output file
Path file = FileOutputFormat.getTaskOutputPath(job, name);
FileSystem fs = file.getFileSystem(job);
CompressionCodec codec = null;
CompressionType compressionType = CompressionType.NONE;
if (getCompressOutput(job)) {
// find the kind of compression to do
compressionType = getOutputCompressionType(job);
// find the right codec
Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(job,
DefaultCodec.class);
codec = ReflectionUtils.newInstance(codecClass, job);
}
final SequenceFile.Writer out =
SequenceFile.createWriter(fs, job, file,
job.getOutputKeyClass(),
job.getOutputValueClass(),
compressionType,
codec,
progress);
return new RecordWriter<K, V>() {
public void write(K key, V value)
throws IOException {
out.append(key, value);
}
public void close(Reporter reporter) throws IOException { out.close();}
};
}
示例15: cleanup
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Reduce task done, write output to a file.
*/
@Override
public void cleanup(Context context) throws IOException {
//write output to a file
Configuration conf = context.getConfiguration();
Path outDir = new Path(conf.get(FileOutputFormat.OUTDIR));
Path outFile = new Path(outDir, "reduce-out");
FileSystem fileSys = FileSystem.get(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf,
outFile, LongWritable.class, LongWritable.class,
CompressionType.NONE);
writer.append(new LongWritable(numInside), new LongWritable(numOutside));
writer.close();
}