本文整理汇总了Java中org.apache.hadoop.io.SequenceFile.createWriter方法的典型用法代码示例。如果您正苦于以下问题:Java SequenceFile.createWriter方法的具体用法?Java SequenceFile.createWriter怎么用?Java SequenceFile.createWriter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.hadoop.io.SequenceFile
的用法示例。
在下文中一共展示了SequenceFile.createWriter方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: open
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
protected void open(Path dstPath, CompressionCodec codeC,
CompressionType compType, Configuration conf, FileSystem hdfs)
throws IOException {
if(useRawLocalFileSystem) {
if(hdfs instanceof LocalFileSystem) {
hdfs = ((LocalFileSystem)hdfs).getRaw();
} else {
logger.warn("useRawLocalFileSystem is set to true but file system " +
"is not of type LocalFileSystem: " + hdfs.getClass().getName());
}
}
if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile
(dstPath)) {
outStream = hdfs.append(dstPath);
} else {
outStream = hdfs.create(dstPath);
}
writer = SequenceFile.createWriter(conf, outStream,
serializer.getKeyClass(), serializer.getValueClass(), compType, codeC);
registerCurrentStream(outStream, hdfs, dstPath);
}
示例2: createTempFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
void createTempFile(Path p, Configuration conf) throws IOException {
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, p,
Text.class, Text.class,
CompressionType.NONE);
writer.append(new Text("text"), new Text("moretext"));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null) {
writer.close();
}
writer = null;
}
LOG.info("created: " + p);
}
示例3: createControlFiles
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Create control files before a test run.
* Number of files created is equal to the number of maps specified
*
* @throws IOException on error
*/
private static void createControlFiles() throws IOException {
FileSystem tempFS = FileSystem.get(config);
LOG.info("Creating " + numberOfMaps + " control files");
for (int i = 0; i < numberOfMaps; i++) {
String strFileName = "NNBench_Controlfile_" + i;
Path filePath = new Path(new Path(baseDir, CONTROL_DIR_NAME),
strFileName);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(tempFS, config, filePath, Text.class,
LongWritable.class, CompressionType.NONE);
writer.append(new Text(strFileName), new LongWritable(0l));
} finally {
if (writer != null) {
writer.close();
}
}
}
}
示例4: createInputFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private void createInputFile(String rootName) throws IOException {
cleanup(); // clean up if previous run failed
Path inputFile = new Path(MAP_INPUT_DIR, "in_file");
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, fsConfig, inputFile,
Text.class, LongWritable.class, CompressionType.NONE);
try {
nrFiles = 0;
listSubtree(new Path(rootName), writer);
} finally {
writer.close();
}
LOG.info("Created map input files.");
}
示例5: createFiles
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static void createFiles(int length, int numFiles, Random random,
Job job) throws IOException {
Range[] ranges = createRanges(length, numFiles, random);
for (int i = 0; i < numFiles; i++) {
Path file = new Path(workDir, "test_" + i + ".seq");
// create a file with length entries
@SuppressWarnings("deprecation")
SequenceFile.Writer writer =
SequenceFile.createWriter(localFs, job.getConfiguration(), file,
IntWritable.class, BytesWritable.class);
Range range = ranges[i];
try {
for (int j = range.start; j < range.end; j++) {
IntWritable key = new IntWritable(j);
byte[] data = new byte[random.nextInt(10)];
random.nextBytes(data);
BytesWritable value = new BytesWritable(data);
writer.append(key, value);
}
} finally {
writer.close();
}
}
}
示例6: writePartitionFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
private static <T extends WritableComparable<?>> Path writePartitionFile(
String testname, Configuration conf, T[] splits) throws IOException {
final FileSystem fs = FileSystem.getLocal(conf);
final Path testdir = new Path(System.getProperty("test.build.data", "/tmp")
).makeQualified(fs);
Path p = new Path(testdir, testname + "/_partition.lst");
TotalOrderPartitioner.setPartitionFile(conf, p);
conf.setInt(MRJobConfig.NUM_REDUCES, splits.length + 1);
SequenceFile.Writer w = null;
try {
w = SequenceFile.createWriter(fs, conf, p,
splits[0].getClass(), NullWritable.class,
SequenceFile.CompressionType.NONE);
for (int i = 0; i < splits.length; ++i) {
w.append(splits[i], NullWritable.get());
}
} finally {
if (null != w)
w.close();
}
return p;
}
示例7: testSequenceFileSync
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/** Test hsync via SequenceFiles */
@Test
public void testSequenceFileSync() throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
final FileSystem fs = cluster.getFileSystem();
final Path p = new Path("/testSequenceFileSync/foo");
final int len = 1 << 16;
FSDataOutputStream out = fs.create(p, FsPermission.getDefault(),
EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK),
4096, (short) 1, len, null);
Writer w = SequenceFile.createWriter(new Configuration(),
Writer.stream(out),
Writer.keyClass(RandomDatum.class),
Writer.valueClass(RandomDatum.class),
Writer.compression(CompressionType.NONE, new DefaultCodec()));
w.hflush();
checkSyncMetric(cluster, 0);
w.hsync();
checkSyncMetric(cluster, 1);
int seed = new Random().nextInt();
RandomDatum.Generator generator = new RandomDatum.Generator(seed);
generator.next();
w.append(generator.getKey(), generator.getValue());
w.hsync();
checkSyncMetric(cluster, 2);
w.close();
checkSyncMetric(cluster, 2);
out.close();
checkSyncMetric(cluster, 3);
cluster.shutdown();
}
示例8: createSequenceFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Create a data file in SequenceFile format that gets exported to the db.
* @param fileNum the number of the file (for multi-file export).
* @param numRecords how many records to write to the file.
* @param className the table class name to instantiate and populate
* for each record.
*/
private void createSequenceFile(int fileNum, int numRecords, String className)
throws IOException {
try {
// Instantiate the value record object via reflection.
Class cls = Class.forName(className, true,
Thread.currentThread().getContextClassLoader());
SqoopRecord record = (SqoopRecord) ReflectionUtils.newInstance(
cls, new Configuration());
// Create the SequenceFile.
Configuration conf = new Configuration();
if (!BaseSqoopTestCase.isOnPhysicalCluster()) {
conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS);
}
FileSystem fs = FileSystem.get(conf);
Path tablePath = getTablePath();
Path filePath = new Path(tablePath, "part" + fileNum);
fs.mkdirs(tablePath);
SequenceFile.Writer w = SequenceFile.createWriter(
fs, conf, filePath, LongWritable.class, cls);
// Now write the data.
int startId = fileNum * numRecords;
for (int i = 0; i < numRecords; i++) {
record.parse(getRecordLine(startId + i));
w.append(new LongWritable(startId + i), record);
}
w.close();
} catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe);
} catch (RecordParser.ParseError pe) {
throw new IOException(pe);
}
}
示例9: createControlFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
public static void createControlFile(FileSystem fs,
long megaBytes, int numFiles,
long seed) throws Exception {
LOG.info("creating control file: "+megaBytes+" bytes, "+numFiles+" files");
Path controlFile = new Path(CONTROL_DIR, "files");
fs.delete(controlFile, true);
Random random = new Random(seed);
SequenceFile.Writer writer =
SequenceFile.createWriter(fs, conf, controlFile,
Text.class, LongWritable.class, CompressionType.NONE);
long totalSize = 0;
long maxSize = ((megaBytes / numFiles) * 2) + 1;
try {
while (totalSize < megaBytes) {
Text name = new Text(Long.toString(random.nextLong()));
long size = random.nextLong();
if (size < 0)
size = -size;
size = size % maxSize;
//LOG.info(" adding: name="+name+" size="+size);
writer.append(name, new LongWritable(size));
totalSize += size;
}
} finally {
writer.close();
}
LOG.info("created control file for: "+totalSize+" bytes");
}
示例10: createControlFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
private void createControlFile(FileSystem fs,
long nrBytes, // in bytes
int nrFiles
) throws IOException {
LOG.info("creating control file: "+nrBytes+" bytes, "+nrFiles+" files");
Path controlDir = getControlDir(config);
fs.delete(controlDir, true);
for(int i=0; i < nrFiles; i++) {
String name = getFileName(i);
Path controlFile = new Path(controlDir, "in_file_" + name);
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, config, controlFile,
Text.class, LongWritable.class,
CompressionType.NONE);
writer.append(new Text(name), new LongWritable(nrBytes));
} catch(Exception e) {
throw new IOException(e.getLocalizedMessage());
} finally {
if (writer != null)
writer.close();
writer = null;
}
}
LOG.info("created control files for: "+nrFiles+" files");
}
示例11: writePartitionFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
void writePartitionFile(Configuration conf, Path path) throws IOException {
FileSystem fs = path.getFileSystem(conf);
@SuppressWarnings("deprecation")
SequenceFile.Writer writer = SequenceFile.createWriter(
fs, conf, path, ImmutableBytesWritable.class, NullWritable.class);
for (int i = 0; i < partitions.size(); i++) {
writer.append(partitions.get(i), NullWritable.get());
}
writer.close();
}
示例12: writePartitionFile
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Write a partition file for the given job, using the Sampler provided.
* Queries the sampler for a sample keyset, sorts by the output key
* comparator, selects the keys for each rank, and writes to the destination
* returned from {@link TotalOrderPartitioner#getPartitionFile}.
*/
@SuppressWarnings("unchecked") // getInputFormat, getOutputKeyComparator
public static <K,V> void writePartitionFile(Job job, Sampler<K,V> sampler)
throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = job.getConfiguration();
final InputFormat inf =
ReflectionUtils.newInstance(job.getInputFormatClass(), conf);
int numPartitions = job.getNumReduceTasks();
K[] samples = (K[])sampler.getSample(inf, job);
LOG.info("Using " + samples.length + " samples");
RawComparator<K> comparator =
(RawComparator<K>) job.getSortComparator();
Arrays.sort(samples, comparator);
Path dst = new Path(TotalOrderPartitioner.getPartitionFile(conf));
FileSystem fs = dst.getFileSystem(conf);
if (fs.exists(dst)) {
fs.delete(dst, false);
}
SequenceFile.Writer writer = SequenceFile.createWriter(fs,
conf, dst, job.getMapOutputKeyClass(), NullWritable.class);
NullWritable nullValue = NullWritable.get();
float stepSize = samples.length / (float) numPartitions;
int last = -1;
for(int i = 1; i < numPartitions; ++i) {
int k = Math.round(stepSize * i);
while (last >= k && comparator.compare(samples[last], samples[k]) == 0) {
++k;
}
writer.append(samples[k], nullValue);
last = k;
}
writer.close();
}
示例13: writeSkippedRec
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@SuppressWarnings("unchecked")
private void writeSkippedRec(K key, V value) throws IOException{
if(skipWriter==null) {
Path skipDir = SkipBadRecords.getSkipOutputPath(conf);
Path skipFile = new Path(skipDir, getTaskID().toString());
skipWriter =
SequenceFile.createWriter(
skipFile.getFileSystem(conf), conf, skipFile,
(Class<K>) createKey().getClass(),
(Class<V>) createValue().getClass(),
CompressionType.BLOCK, getTaskReporter());
}
skipWriter.append(key, value);
}
示例14: configure
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
@Before
public void configure() throws Exception {
Path testdir = new Path(TEST_DIR.getAbsolutePath());
Path inDir = new Path(testdir, "in");
Path outDir = new Path(testdir, "out");
FileSystem fs = FileSystem.get(conf);
fs.delete(testdir, true);
conf.setInt(JobContext.IO_SORT_MB, 1);
conf.setInputFormat(SequenceFileInputFormat.class);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setMapperClass(TextGen.class);
conf.setReducerClass(TextReduce.class);
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.set(MRConfig.FRAMEWORK_NAME, MRConfig.LOCAL_FRAMEWORK_NAME);
conf.setOutputFormat(SequenceFileOutputFormat.class);
if (!fs.mkdirs(testdir)) {
throw new IOException("Mkdirs failed to create " + testdir.toString());
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Mkdirs failed to create " + inDir.toString());
}
Path inFile = new Path(inDir, "part0");
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, inFile,
Text.class, Text.class);
writer.append(new Text("rec: 1"), new Text("Hello"));
writer.close();
jc = new JobClient(conf);
}
示例15: writeDistCacheFilesList
import org.apache.hadoop.io.SequenceFile; //导入方法依赖的package包/类
/**
* Write the list of distributed cache files in the decreasing order of
* file sizes into the sequence file. This file will be input to the job
* {@link GenerateDistCacheData}.
* Also validates if -generate option is missing and distributed cache files
* are missing.
* @return exit code
* @throws IOException
*/
private int writeDistCacheFilesList()
throws IOException {
// Sort the distributed cache files in the decreasing order of file sizes.
List dcFiles = new ArrayList(distCacheFiles.entrySet());
Collections.sort(dcFiles, new Comparator() {
public int compare(Object dc1, Object dc2) {
return ((Comparable) ((Map.Entry) (dc2)).getValue())
.compareTo(((Map.Entry) (dc1)).getValue());
}
});
// write the sorted distributed cache files to the sequence file
FileSystem fs = FileSystem.get(conf);
Path distCacheFilesList = new Path(distCachePath, "_distCacheFiles.txt");
conf.set(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_LIST,
distCacheFilesList.toString());
SequenceFile.Writer src_writer = SequenceFile.createWriter(fs, conf,
distCacheFilesList, LongWritable.class, BytesWritable.class,
SequenceFile.CompressionType.NONE);
// Total number of unique distributed cache files
int fileCount = dcFiles.size();
long byteCount = 0;// Total size of all distributed cache files
long bytesSync = 0;// Bytes after previous sync;used to add sync marker
for (Iterator it = dcFiles.iterator(); it.hasNext();) {
Map.Entry entry = (Map.Entry)it.next();
LongWritable fileSize =
new LongWritable(Long.parseLong(entry.getValue().toString()));
BytesWritable filePath =
new BytesWritable(
entry.getKey().toString().getBytes(charsetUTF8));
byteCount += fileSize.get();
bytesSync += fileSize.get();
if (bytesSync > AVG_BYTES_PER_MAP) {
src_writer.sync();
bytesSync = fileSize.get();
}
src_writer.append(fileSize, filePath);
}
if (src_writer != null) {
src_writer.close();
}
// Set delete on exit for 'dist cache files list' as it is not needed later.
fs.deleteOnExit(distCacheFilesList);
conf.setInt(GenerateDistCacheData.GRIDMIX_DISTCACHE_FILE_COUNT, fileCount);
conf.setLong(GenerateDistCacheData.GRIDMIX_DISTCACHE_BYTE_COUNT, byteCount);
LOG.info("Number of HDFS based distributed cache files to be generated is "
+ fileCount + ". Total size of HDFS based distributed cache files "
+ "to be generated is " + byteCount);
if (!shouldGenerateDistCacheData() && fileCount > 0) {
LOG.error("Missing " + fileCount + " distributed cache files under the "
+ " directory\n" + distCachePath + "\nthat are needed for gridmix"
+ " to emulate distributed cache load. Either use -generate\noption"
+ " to generate distributed cache data along with input data OR "
+ "disable\ndistributed cache emulation by configuring '"
+ DistributedCacheEmulator.GRIDMIX_EMULATE_DISTRIBUTEDCACHE
+ "' to false.");
return Gridmix.MISSING_DIST_CACHE_FILES_ERROR;
}
return 0;
}