本文整理汇总了Java中org.apache.hadoop.io.SequenceFile类的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile类的具体用法?Java SequenceFile怎么用?Java SequenceFile使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
SequenceFile类属于org.apache.hadoop.io包,在下文中一共展示了SequenceFile类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: open
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
protected void open(Path dstPath, CompressionCodec codeC,
CompressionType compType, Configuration conf, FileSystem hdfs)
throws IOException {
if (useRawLocalFileSystem) {
if (hdfs instanceof LocalFileSystem) {
hdfs = ((LocalFileSystem)hdfs).getRaw();
} else {
logger.warn("useRawLocalFileSystem is set to true but file system " +
"is not of type LocalFileSystem: " + hdfs.getClass().getName());
}
}
if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) {
outStream = hdfs.append(dstPath);
} else {
outStream = hdfs.create(dstPath);
}
writer = SequenceFile.createWriter(conf, outStream,
serializer.getKeyClass(), serializer.getValueClass(), compType, codeC);
registerCurrentStream(outStream, hdfs, dstPath);
}
示例2: SeqFileAppendable
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
public SeqFileAppendable(FileSystem fs, Path path, int osBufferSize,
String compress, int minBlkSize) throws IOException {
Configuration conf = new Configuration();
CompressionCodec codec = null;
if ("lzo".equals(compress)) {
codec = Compression.Algorithm.LZO.getCodec();
}
else if ("gz".equals(compress)) {
codec = Compression.Algorithm.GZ.getCodec();
}
else if (!"none".equals(compress))
throw new IOException("Codec not supported.");
this.fsdos = fs.create(path, true, osBufferSize);
if (!"none".equals(compress)) {
writer =
SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
}
else {
writer =
SequenceFile.createWriter(conf, fsdos, BytesWritable.class,
BytesWritable.class, SequenceFile.CompressionType.NONE, null);
}
}
示例3: doBuildListing
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
/**
* Collect the list of <sourceRelativePath, sourceFileStatus> to be copied and write to the sequence file. In essence,
* any file or directory that need to be copied or sync-ed is written as an entry to the sequence file, with the
* possible exception of the source root: when either -update (sync) or -overwrite switch is specified, and if the the
* source root is a directory, then the source root entry is not written to the sequence file, because only the
* contents of the source directory need to be copied in this case. See
* {@link com.hotels.bdp.circustrain.s3mapreducecp.util.ConfigurationUtil#getRelativePath} for how relative path is
* computed. See computeSourceRootPath method for how the root path of the source is computed.
*
* @param fileListWriter
* @param options
* @param globbedPaths
* @throws IOException
*/
@VisibleForTesting
public void doBuildListing(SequenceFile.Writer fileListWriter, S3MapReduceCpOptions options) throws IOException {
List<Path> globbedPaths = new ArrayList<>(options.getSources().size());
for (Path sourcePath : options.getSources()) {
FileSystem fs = sourcePath.getFileSystem(getConf());
FileStatus sourceFileStatus = fs.getFileStatus(sourcePath);
if (sourceFileStatus.isFile()) {
LOG.debug("Adding path {}", sourceFileStatus.getPath());
globbedPaths.add(sourceFileStatus.getPath());
} else {
FileStatus[] inputs = fs.globStatus(sourcePath);
if (inputs != null && inputs.length > 0) {
for (FileStatus onePath : inputs) {
LOG.debug("Adding path {}", onePath.getPath());
globbedPaths.add(onePath.getPath());
}
} else {
throw new InvalidInputException("Source path " + sourcePath + " doesn't exist");
}
}
}
doBuildListing(fileListWriter, options, globbedPaths);
}
示例4: traverseNonEmptyDirectory
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
private void traverseNonEmptyDirectory(
SequenceFile.Writer fileListWriter,
FileStatus sourceStatus,
Path sourcePathRoot,
S3MapReduceCpOptions options)
throws IOException {
FileSystem sourceFS = sourcePathRoot.getFileSystem(getConf());
Stack<FileStatus> pathStack = new Stack<>();
pathStack.push(sourceStatus);
while (!pathStack.isEmpty()) {
for (FileStatus child : getChildren(sourceFS, pathStack.pop())) {
if (child.isFile()) {
LOG.debug("Recording source-path: {} for copy.", sourceStatus.getPath());
CopyListingFileStatus childCopyListingStatus = new CopyListingFileStatus(child);
writeToFileListing(fileListWriter, childCopyListingStatus, sourcePathRoot, options);
}
if (isDirectoryAndNotEmpty(sourceFS, child)) {
LOG.debug("Traversing non-empty source dir: {}", sourceStatus.getPath());
pathStack.push(child);
}
}
}
}
示例5: writeToFileListing
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
private void writeToFileListing(
SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
S3MapReduceCpOptions options)
throws IOException {
LOG.debug("REL PATH: {}, FULL PATH: {}", PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath()),
fileStatus.getPath());
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
return;
}
fileListWriter.append(new Text(PathUtil.getRelativePath(sourcePathRoot, fileStatus.getPath())), status);
fileListWriter.sync();
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
}
totalPaths++;
}
示例6: writeToFileListing
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
private void writeToFileListing(SequenceFile.Writer fileListWriter,
CopyListingFileStatus fileStatus,
Path sourcePathRoot,
DistCpOptions options) throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("REL PATH: " + DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath()) + ", FULL PATH: " + fileStatus.getPath());
}
FileStatus status = fileStatus;
if (!shouldCopy(fileStatus.getPath(), options)) {
return;
}
fileListWriter.append(new Text(DistCpUtils.getRelativePath(sourcePathRoot,
fileStatus.getPath())), status);
fileListWriter.sync();
if (!fileStatus.isDirectory()) {
totalBytesToCopy += fileStatus.getLen();
}
totalPaths++;
}
示例7: skipFlagFiles
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test(timeout = 10000)
public void skipFlagFiles() throws Exception {
FileSystem fs = cluster.getFileSystem();
Path source = new Path("/tmp/in4");
URI target = URI.create("s3://bucket/tmp/out4/");
createFile(fs, new Path(source, "1/_SUCCESS"));
createFile(fs, new Path(source, "1/file"));
createFile(fs, new Path(source, "2"));
Path listingFile = new Path("/tmp/list4");
listing.buildListing(listingFile, options(source, target));
assertThat(listing.getNumberOfPaths(), is(2L));
try (SequenceFile.Reader reader = new SequenceFile.Reader(CONFIG, SequenceFile.Reader.file(listingFile))) {
CopyListingFileStatus fileStatus = new CopyListingFileStatus();
Text relativePath = new Text();
assertThat(reader.next(relativePath, fileStatus), is(true));
assertThat(relativePath.toString(), is("/1/file"));
assertThat(reader.next(relativePath, fileStatus), is(true));
assertThat(relativePath.toString(), is("/2"));
assertThat(reader.next(relativePath, fileStatus), is(false));
}
}
示例8: failOnCloseError
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void failOnCloseError() throws IOException {
File inFile = File.createTempFile("TestCopyListingIn", null);
inFile.deleteOnExit();
File outFile = File.createTempFile("TestCopyListingOut", null);
outFile.deleteOnExit();
Path source = new Path(inFile.toURI());
Exception expectedEx = new IOException("boom");
SequenceFile.Writer writer = mock(SequenceFile.Writer.class);
doThrow(expectedEx).when(writer).close();
SimpleCopyListing listing = new SimpleCopyListing(CONFIG, CREDENTIALS);
Exception actualEx = null;
try {
listing.doBuildListing(writer, options(source, outFile.toURI()));
} catch (Exception e) {
actualEx = e;
}
Assert.assertNotNull("close writer didn't fail", actualEx);
Assert.assertEquals(expectedEx, actualEx);
}
示例9: testEventCountingRoller
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testEventCountingRoller() throws IOException, InterruptedException {
int maxEvents = 100;
MockHDFSWriter hdfsWriter = new MockHDFSWriter();
BucketWriter bucketWriter = new BucketWriter(
0, 0, maxEvents, 0, ctx, "/tmp", "file", "", ".tmp", null, null,
SequenceFile.CompressionType.NONE, hdfsWriter, timedRollerPool, proxy,
new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0, null, null, 30000,
Executors.newSingleThreadExecutor(), 0, 0);
Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
for (int i = 0; i < 1000; i++) {
bucketWriter.append(e);
}
logger.info("Number of events written: {}", hdfsWriter.getEventsWritten());
logger.info("Number of bytes written: {}", hdfsWriter.getBytesWritten());
logger.info("Number of files opened: {}", hdfsWriter.getFilesOpened());
Assert.assertEquals("events written", 1000, hdfsWriter.getEventsWritten());
Assert.assertEquals("bytes written", 3000, hdfsWriter.getBytesWritten());
Assert.assertEquals("files opened", 10, hdfsWriter.getFilesOpened());
}
示例10: testSizeRoller
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testSizeRoller() throws IOException, InterruptedException {
int maxBytes = 300;
MockHDFSWriter hdfsWriter = new MockHDFSWriter();
BucketWriter bucketWriter = new BucketWriter(
0, maxBytes, 0, 0, ctx, "/tmp", "file", "", ".tmp", null, null,
SequenceFile.CompressionType.NONE, hdfsWriter, timedRollerPool, proxy,
new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0, null, null, 30000,
Executors.newSingleThreadExecutor(), 0, 0);
Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
for (int i = 0; i < 1000; i++) {
bucketWriter.append(e);
}
logger.info("Number of events written: {}", hdfsWriter.getEventsWritten());
logger.info("Number of bytes written: {}", hdfsWriter.getBytesWritten());
logger.info("Number of files opened: {}", hdfsWriter.getFilesOpened());
Assert.assertEquals("events written", 1000, hdfsWriter.getEventsWritten());
Assert.assertEquals("bytes written", 3000, hdfsWriter.getBytesWritten());
Assert.assertEquals("files opened", 10, hdfsWriter.getFilesOpened());
}
示例11: verifyContents
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
private void verifyContents(Path listingPath) throws Exception {
SequenceFile.Reader reader = new SequenceFile.Reader(cluster.getFileSystem(),
listingPath, new Configuration());
Text key = new Text();
CopyListingFileStatus value = new CopyListingFileStatus();
Map<String, String> actualValues = new HashMap<String, String>();
while (reader.next(key, value)) {
if (value.isDirectory() && key.toString().equals("")) {
// ignore root with empty relPath, which is an entry to be
// used for preserving root attributes etc.
continue;
}
actualValues.put(value.getPath().toString(), key.toString());
}
Assert.assertEquals(expectedValues.size(), actualValues.size());
for (Map.Entry<String, String> entry : actualValues.entrySet()) {
Assert.assertEquals(entry.getValue(), expectedValues.get(entry.getKey()));
}
}
示例12: testInUsePrefix
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testInUsePrefix() throws IOException, InterruptedException {
final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
final String PREFIX = "BRNO_IS_CITY_IN_CZECH_REPUBLIC";
MockHDFSWriter hdfsWriter = new MockHDFSWriter();
HDFSTextSerializer formatter = new HDFSTextSerializer();
BucketWriter bucketWriter = new BucketWriter(
ROLL_INTERVAL, 0, 0, 0, ctx, "/tmp", "file", PREFIX, ".tmp", null, null,
SequenceFile.CompressionType.NONE, hdfsWriter, timedRollerPool, proxy,
new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0, null, null, 30000,
Executors.newSingleThreadExecutor(), 0, 0);
Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
bucketWriter.append(e);
Assert.assertTrue("Incorrect in use prefix", hdfsWriter.getOpenedFilePath().contains(PREFIX));
}
示例13: testInUseSuffix
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testInUseSuffix() throws IOException, InterruptedException {
final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
final String SUFFIX = "WELCOME_TO_THE_HELLMOUNTH";
MockHDFSWriter hdfsWriter = new MockHDFSWriter();
HDFSTextSerializer serializer = new HDFSTextSerializer();
BucketWriter bucketWriter = new BucketWriter(
ROLL_INTERVAL, 0, 0, 0, ctx, "/tmp", "file", "", SUFFIX, null, null,
SequenceFile.CompressionType.NONE, hdfsWriter, timedRollerPool, proxy,
new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0, null, null, 30000,
Executors.newSingleThreadExecutor(), 0, 0);
Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
bucketWriter.append(e);
Assert.assertTrue("Incorrect in use suffix", hdfsWriter.getOpenedFilePath().contains(SUFFIX));
}
示例14: testCallbackOnClose
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testCallbackOnClose() throws IOException, InterruptedException {
final int ROLL_INTERVAL = 1000; // seconds. Make sure it doesn't change in course of test
final String SUFFIX = "WELCOME_TO_THE_EREBOR";
final AtomicBoolean callbackCalled = new AtomicBoolean(false);
MockHDFSWriter hdfsWriter = new MockHDFSWriter();
BucketWriter bucketWriter = new BucketWriter(
ROLL_INTERVAL, 0, 0, 0, ctx, "/tmp", "file", "", SUFFIX, null, null,
SequenceFile.CompressionType.NONE, hdfsWriter, timedRollerPool, proxy,
new SinkCounter("test-bucket-writer-" + System.currentTimeMillis()), 0,
new HDFSEventSink.WriterCallback() {
@Override
public void run(String filePath) {
callbackCalled.set(true);
}
}, "blah", 30000, Executors.newSingleThreadExecutor(), 0, 0);
Event e = EventBuilder.withBody("foo", Charsets.UTF_8);
bucketWriter.append(e);
bucketWriter.close(true);
Assert.assertTrue(callbackCalled.get());
}
示例15: testGzipDurability
import org.apache.hadoop.io.SequenceFile; //导入依赖的package包/类
@Test
public void testGzipDurability() throws Exception {
Context context = new Context();
HDFSCompressedDataStream writer = new HDFSCompressedDataStream();
writer.configure(context);
writer.open(fileURI, factory.getCodec(new Path(fileURI)),
SequenceFile.CompressionType.BLOCK);
String[] bodies = { "yarf!" };
writeBodies(writer, bodies);
byte[] buf = new byte[256];
GZIPInputStream cmpIn = new GZIPInputStream(new FileInputStream(file));
int len = cmpIn.read(buf);
String result = new String(buf, 0, len, Charsets.UTF_8);
result = result.trim(); // BodyTextEventSerializer adds a newline
Assert.assertEquals("input and output must match", bodies[0], result);
}