本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter类的典型用法代码示例。如果您正苦于以下问题:Java FileOutputCommitter类的具体用法?Java FileOutputCommitter怎么用?Java FileOutputCommitter使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FileOutputCommitter类属于org.apache.hadoop.mapreduce.lib.output包,在下文中一共展示了FileOutputCommitter类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initializeMemberVariables
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@SuppressWarnings("deprecation")
@Override
public void initializeMemberVariables() {
xmlFilename = new String("mapred-default.xml");
configurationClasses = new Class[] { MRJobConfig.class, MRConfig.class,
JHAdminConfig.class, ShuffleHandler.class, FileOutputFormat.class,
FileInputFormat.class, Job.class, NLineInputFormat.class,
JobConf.class, FileOutputCommitter.class };
// Initialize used variables
configurationPropsToSkipCompare = new HashSet<String>();
// Set error modes
errorIfMissingConfigProps = true;
errorIfMissingXmlProps = false;
// Ignore deprecated MR1 properties in JobConf
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_MAP_MEMORY_MB_PROPERTY);
configurationPropsToSkipCompare
.add(JobConf.MAPRED_JOB_REDUCE_MEMORY_MB_PROPERTY);
}
示例2: getRecordWriter
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(
TaskAttemptContext context) throws IOException {
// Get the path of the temporary output file
final Path outputPath = FileOutputFormat.getOutputPath(context);
final Path outputDir = new FileOutputCommitter(outputPath, context).getWorkPath();
final Configuration conf = context.getConfiguration();
final FileSystem fs = outputDir.getFileSystem(conf);
int blockSize = conf.getInt(Constants.HFILE_BLOCKSIZE, 16384);
// Default to snappy.
Compression.Algorithm compressionAlgorithm = getAlgorithm(
conf.get(Constants.HFILE_COMPRESSION));
final StoreFile.Writer writer =
new StoreFile.WriterBuilder(conf, new CacheConfig(conf), fs, blockSize)
.withFilePath(hfilePath(outputPath, context.getTaskAttemptID().getTaskID().getId()))
.withCompression(compressionAlgorithm)
.build();
return new HFileRecordWriter(writer);
}
示例3: createRecordWriter
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
/**
* Creates a new {@link RecordWriter} to output temporary data.
* @param <V> value type
* @param context current context
* @param name output name
* @param dataType value type
* @return the created writer
* @throws IOException if failed to create a new {@link RecordWriter}
* @throws InterruptedException if interrupted
*/
public <V> RecordWriter<NullWritable, V> createRecordWriter(
TaskAttemptContext context,
String name,
Class<V> dataType) throws IOException, InterruptedException {
Configuration conf = context.getConfiguration();
FileOutputCommitter committer = (FileOutputCommitter) getOutputCommitter(context);
Path file = new Path(
committer.getWorkPath(),
FileOutputFormat.getUniqueFile(context, name, "")); //$NON-NLS-1$
ModelOutput<V> out = TemporaryStorage.openOutput(conf, dataType, file);
return new RecordWriter<NullWritable, V>() {
@Override
public void write(NullWritable key, V value) throws IOException {
out.write(value);
}
@Override
public void close(TaskAttemptContext ignored) throws IOException {
out.close();
}
@Override
public String toString() {
return String.format("TemporaryOutput(%s)", file); //$NON-NLS-1$
}
};
}
示例4: open
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Override
public void open(String uId) throws Exception {
this.hash = uId.hashCode();
Job job = ((ConfigurableHDFSFileSink<K, V>) getWriteOperation().getSink()).jobInstance();
FileOutputFormat.setOutputPath(job, new Path(path));
// Each Writer is responsible for writing one bundle of elements and is represented by one
// unique Hadoop task based on uId/hash. All tasks share the same job ID. Since Dataflow
// handles retrying of failed bundles, each task has one attempt only.
JobID jobId = job.getJobID();
TaskID taskId = new TaskID(jobId, TaskType.REDUCE, hash);
configure(job);
context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(taskId, 0));
FileOutputFormat<K, V> outputFormat = formatClass.newInstance();
recordWriter = outputFormat.getRecordWriter(context);
outputCommitter = (FileOutputCommitter) outputFormat.getOutputCommitter(context);
}
示例5: getOutputFileURIs
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
/**
* Queries the file system for the URIs of all files in the base output directory that are not
* directories and whose name isn't {@link FileOutputCommitter#SUCCEEDED_FILE_NAME}.
*
* @return a list of all URIs in the form of strings.
* @throws IOException if unable to query for the files in the base output directory.
*/
protected List<String> getOutputFileURIs() throws IOException {
// Enumerate over all files in the output path.
FileStatus[] outputFiles = outputFileSystem.listStatus(outputPath);
ArrayList<String> sourceUris = new ArrayList<String>(outputFiles.length);
for (int i = 0; i < outputFiles.length; i++) {
FileStatus fileStatus = outputFiles[i];
// Skip the success file and directories as they're not relevant to BigQuery.
if (!fileStatus.isDir()
&& !fileStatus.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
sourceUris.add(fileStatus.getPath().toString());
}
}
return sourceUris;
}
开发者ID:GoogleCloudPlatform,项目名称:bigdata-interop,代码行数:25,代码来源:ForwardingBigQueryFileOutputCommitter.java
示例6: init
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Override
public void init() throws IOException {
super.init();
Configuration taskConf = new Configuration();
Path stagingResultDir = new Path(stagingDir, TajoConstants.RESULT_DIR_NAME);
taskConf.set(FileOutputFormat.OUTDIR, stagingResultDir.toString());
ExecutionBlockId ebId = taskAttemptId.getTaskId().getExecutionBlockId();
writerContext = new TaskAttemptContextImpl(taskConf,
new TaskAttemptID(ebId.getQueryId().toString(), ebId.getId(), TaskType.MAP,
taskAttemptId.getTaskId().getId(), taskAttemptId.getId()));
HFileOutputFormat2 hFileOutputFormat2 = new HFileOutputFormat2();
try {
writer = hFileOutputFormat2.getRecordWriter(writerContext);
committer = new FileOutputCommitter(FileOutputFormat.getOutputPath(writerContext), writerContext);
workingFilePath = committer.getWorkPath();
} catch (InterruptedException e) {
throw new IOException(e.getMessage(), e);
}
LOG.info("Created hbase file writer: " + workingFilePath);
}
示例7: populateMRSettingsToRetain
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
private static void populateMRSettingsToRetain() {
// FileInputFormat
mrSettingsToRetain.add(FileInputFormat.INPUT_DIR);
mrSettingsToRetain.add(FileInputFormat.SPLIT_MAXSIZE);
mrSettingsToRetain.add(FileInputFormat.SPLIT_MINSIZE);
mrSettingsToRetain.add(FileInputFormat.PATHFILTER_CLASS);
mrSettingsToRetain.add(FileInputFormat.NUM_INPUT_FILES);
mrSettingsToRetain.add(FileInputFormat.INPUT_DIR_RECURSIVE);
// FileOutputFormat
mrSettingsToRetain.add(MRConfiguration.OUTPUT_BASENAME);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS_CODEC);
mrSettingsToRetain.add(FileOutputFormat.COMPRESS_TYPE);
mrSettingsToRetain.add(FileOutputFormat.OUTDIR);
mrSettingsToRetain.add(FileOutputCommitter.SUCCESSFUL_JOB_OUTPUT_DIR_MARKER);
}
示例8: testNewAPI_TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testNewAPI_TextOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, TextOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(true, output.isMapperOutput);
assertEquals(true, output.useNewApi);
assertEquals(TextOutputFormat.class, output.newOutputFormat.getClass());
assertNull(output.oldOutputFormat);
assertNotNull(output.newApiTaskAttemptContext);
assertNull(output.oldApiTaskAttemptContext);
assertNotNull(output.newRecordWriter);
assertNull(output.oldRecordWriter);
assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
示例9: testOldAPI_TextOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testOldAPI_TextOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, org.apache.hadoop.mapred.TextOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(false, output.isMapperOutput);
assertEquals(false, output.useNewApi);
assertEquals(org.apache.hadoop.mapred.TextOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertNotNull(output.oldApiTaskAttemptContext);
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
示例10: testNewAPI_SequenceFileOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testNewAPI_SequenceFileOutputFormat() throws Exception {
String outputPath = "/tmp/output";
JobConf conf = new JobConf();
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, SequenceFileOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(true, output.useNewApi);
assertEquals(SequenceFileOutputFormat.class, output.newOutputFormat.getClass());
assertNull(output.oldOutputFormat);
assertEquals(NullWritable.class, output.newApiTaskAttemptContext.getOutputKeyClass());
assertEquals(Text.class, output.newApiTaskAttemptContext.getOutputValueClass());
assertNull(output.oldApiTaskAttemptContext);
assertNotNull(output.newRecordWriter);
assertNull(output.oldRecordWriter);
assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
示例11: testOldAPI_SequenceFileOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testOldAPI_SequenceFileOutputFormat() throws Exception {
String outputPath = "/tmp/output";
JobConf conf = new JobConf();
conf.setOutputKeyClass(NullWritable.class);
conf.setOutputValueClass(Text.class);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, org.apache.hadoop.mapred.SequenceFileOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(false, output.useNewApi);
assertEquals(org.apache.hadoop.mapred.SequenceFileOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertEquals(NullWritable.class, output.oldApiTaskAttemptContext.getOutputKeyClass());
assertEquals(Text.class, output.oldApiTaskAttemptContext.getOutputValueClass());
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
示例12: testNewAPI_WorkOutputPathOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testNewAPI_WorkOutputPathOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, true);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, NewAPI_WorkOutputPathReadingOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(true, output.isMapperOutput);
assertEquals(true, output.useNewApi);
assertEquals(NewAPI_WorkOutputPathReadingOutputFormat.class, output.newOutputFormat.getClass());
assertNull(output.oldOutputFormat);
assertNotNull(output.newApiTaskAttemptContext);
assertNull(output.oldApiTaskAttemptContext);
assertNotNull(output.newRecordWriter);
assertNull(output.oldRecordWriter);
assertEquals(FileOutputCommitter.class, output.committer.getClass());
}
示例13: testOldAPI_WorkOutputPathOutputFormat
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test(timeout = 5000)
public void testOldAPI_WorkOutputPathOutputFormat() throws Exception {
String outputPath = "/tmp/output";
Configuration conf = new Configuration();
conf.setBoolean(MRConfig.IS_MAP_PROCESSOR, false);
DataSinkDescriptor dataSink = MROutput
.createConfigBuilder(conf, OldAPI_WorkOutputPathReadingOutputFormat.class, outputPath)
.build();
OutputContext outputContext = createMockOutputContext(dataSink.getOutputDescriptor().getUserPayload());
MROutput output = new MROutput(outputContext, 2);
output.initialize();
assertEquals(false, output.isMapperOutput);
assertEquals(false, output.useNewApi);
assertEquals(OldAPI_WorkOutputPathReadingOutputFormat.class, output.oldOutputFormat.getClass());
assertNull(output.newOutputFormat);
assertNotNull(output.oldApiTaskAttemptContext);
assertNull(output.newApiTaskAttemptContext);
assertNotNull(output.oldRecordWriter);
assertNull(output.newRecordWriter);
assertEquals(org.apache.hadoop.mapred.FileOutputCommitter.class, output.committer.getClass());
}
示例14: testRandomWriter
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test (timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException,
ClassNotFoundException {
LOG.info("\n\n\nStarting testRandomWriter().");
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR
+ " not found. Not running test.");
return;
}
RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
mrCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
mrCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
Job job = randomWriterJob.createJob(mrCluster.getConfig());
Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
FileOutputFormat.setOutputPath(job, outputDir);
job.setSpeculativeExecution(false);
job.addFileToClassPath(APP_JAR); // The AppMaster jar itself.
job.setJarByClass(RandomTextWriterJob.class);
job.setMaxMapAttempts(1); // speed up failures
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
Assert.assertTrue(succeeded);
Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
Assert.assertTrue("Tracking URL was " + trackingUrl +
" but didn't Match Job ID " + jobId ,
trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
// Make sure there are three files in the output-dir
RemoteIterator<FileStatus> iterator =
FileContext.getFileContext(mrCluster.getConfig()).listStatus(
outputDir);
int count = 0;
while (iterator.hasNext()) {
FileStatus file = iterator.next();
if (!file.getPath().getName()
.equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
count++;
}
}
Assert.assertEquals("Number of part files is wrong!", 3, count);
verifyRandomWriterCounters(job);
// TODO later: add explicit "isUber()" checks of some sort
}
示例15: testMr
import org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter; //导入依赖的package包/类
@Test
public void testMr() throws InterruptedException, IOException, ClassNotFoundException {
String dirInput = "/tmp/wordcount/input";
String dirOutput = "/tmp/wordcount/output";
String fileInput = new Path(dirInput, "file1.txt").toString();
BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(getDfsServer().getFileSystem().create(getDfsServer().getPath(fileInput))));
writer.write("a a a a a\n");
writer.write("b b\n");
writer.close();
Job job = Job.getInstance(getMrServer().getConf());
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setMapperClass(MapClass.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
FileInputFormat.setInputPaths(job, getDfsServer().getPathUri(dirInput));
FileOutputFormat.setOutputPath(job, new Path(getDfsServer().getPathUri(dirOutput)));
assertTrue(job.waitForCompletion(true));
Path[] outputFiles = FileUtil.stat2Paths(getDfsServer().getFileSystem().listStatus(getDfsServer().getPath(dirOutput), path -> !path
.getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)));
assertEquals(1, outputFiles.length);
InputStream in = getDfsServer().getFileSystem().open(outputFiles[0]);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
assertEquals("a\t5", reader.readLine());
assertEquals("b\t2", reader.readLine());
assertNull(reader.readLine());
reader.close();
}