本文整理汇总了Java中org.apache.hadoop.mapred.lib.MultipleOutputs类的典型用法代码示例。如果您正苦于以下问题:Java MultipleOutputs类的具体用法?Java MultipleOutputs怎么用?Java MultipleOutputs使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
MultipleOutputs类属于org.apache.hadoop.mapred.lib包,在下文中一共展示了MultipleOutputs类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getMultipleConvertedOutputs
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job)
{
byte[] resultIndexes=MRJobConfiguration.getResultIndexes(job);
Converter[] outputConverters=new Converter[resultIndexes.length];
MatrixCharacteristics[] stats=new MatrixCharacteristics[resultIndexes.length];
HashMap<Byte, ArrayList<Integer>> tagMapping=new HashMap<>();
for(int i=0; i<resultIndexes.length; i++)
{
byte output=resultIndexes[i];
ArrayList<Integer> vec=tagMapping.get(output);
if(vec==null)
{
vec=new ArrayList<>();
tagMapping.put(output, vec);
}
vec.add(i);
outputConverters[i]=getOuputConverter(job, i);
stats[i]=MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
}
MultipleOutputs multipleOutputs=new MultipleOutputs(job);
return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);
}
示例2: createJobConf
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
/**
* Create a job configuration
*/
@SuppressWarnings("rawtypes")
public static JobConf createJobConf(String name, String topic, Props props, Class classobj)
throws Exception {
JobConf conf = getJobConf(name, props, classobj);
conf.set("topic", topic);
// input format
conf.setInputFormat(KafkaETLInputFormat.class);
//turn off mapper speculative execution
conf.setMapSpeculativeExecution(false);
// setup multiple outputs
MultipleOutputs.addMultiNamedOutput(conf, "offsets", SequenceFileOutputFormat.class,
KafkaETLKey.class, BytesWritable.class);
return conf;
}
示例3: configure
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf conf) {
multipleOutputs = new MultipleOutputs(conf);
currentTable = conf.get("org.acacia.partitioner.hbase.table");
zookeeperhost = getZookeeperLocation();
contactHost = conf.get("org.acacia.partitioner.index.contacthost");
totalVertexCount = Long.parseLong(conf.get("vert-count"));
initalPartitionID = Integer.parseInt(conf.get("initpartition-id"));
zeroFlag = Boolean.parseBoolean(conf.get("zero-flag"));
loadIndex();
}
示例4: main
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
JobConf conf = new JobConf(EdgelistPartitioner.class);
if (conf == null) {
return;
}
String dir1 = "/user/miyuru/merged";
String dir2 = "/user/miyuru/merged-out";
// We first delete the temporary directories if they exist on the HDFS
FileSystem fs1 = FileSystem.get(new JobConf());
// only delete dir2 because dir1 is uploaded externally.
if (fs1.exists(new Path(dir2))) {
fs1.delete(new Path(dir2), true);
}
conf.setInputFormat(WholeFileInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));
conf.setOutputKeyClass(Text.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(SequenceFileMapper.class);
conf.setReducerClass(MultipleOutputsInvertedReducer.class);
conf.setOutputFormat(NullOutputFormat.class);
conf.setJobName("EdgelistPartitioner");
MultipleOutputs.addMultiNamedOutput(conf, "partition",
TextOutputFormat.class, NullWritable.class, Text.class);
JobClient.runJob(conf);
}
示例5: setUpMultipleOutputs
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static void setUpMultipleOutputs(JobConf job, byte[] resultIndexes, byte[] resultDimsUnknown, String[] outputs,
OutputInfo[] outputInfos, boolean inBlockRepresentation, boolean mayContainCtable)
throws Exception
{
if(resultIndexes.length!=outputs.length)
throw new Exception("number of outputs and result indexes does not match");
if(outputs.length!=outputInfos.length)
throw new Exception("number of outputs and outputInfos indexes does not match");
job.set(RESULT_INDEXES_CONFIG, MRJobConfiguration.getIndexesString(resultIndexes));
job.set(RESULT_DIMS_UNKNOWN_CONFIG, MRJobConfiguration.getIndexesString(resultDimsUnknown));
job.setStrings(OUTPUT_MATRICES_DIRS_CONFIG, outputs);
job.setOutputCommitter(MultipleOutputCommitter.class);
for(int i=0; i<outputs.length; i++)
{
MapReduceTool.deleteFileIfExistOnHDFS(new Path(outputs[i]), job);
if ( mayContainCtable && resultDimsUnknown[i] == (byte) 1 ) {
setOutputInfo(job, i, outputInfos[i], false);
}
else {
setOutputInfo(job, i, outputInfos[i], inBlockRepresentation);
}
MultipleOutputs.addNamedOutput(job, Integer.toString(i),
outputInfos[i].outputFormatClass, outputInfos[i].outputKeyClass,
outputInfos[i].outputValueClass);
}
job.setOutputFormat(NullOutputFormat.class);
// configure temp output
Path tempOutputPath = new Path( constructTempOutputFilename() );
FileOutputFormat.setOutputPath(job, tempOutputPath);
MapReduceTool.deleteFileIfExistOnHDFS(tempOutputPath, job);
}
示例6: CollectMultipleConvertedOutputs
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public CollectMultipleConvertedOutputs(Converter[] convts, MatrixCharacteristics[] stats,
MultipleOutputs outputs)
{
outputConverters=convts;
multipleOutputs=outputs;
matrixStats=stats;
}
示例7: KafkaETLContext
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
/**
* construct using input string
*/
@SuppressWarnings("unchecked")
public KafkaETLContext(JobConf job, Props props, Reporter reporter,
MultipleOutputs mos, int index, String input)
throws Exception {
_bufferSize = getClientBufferSize(props);
_timeout = getClientTimeout(props);
System.out.println("bufferSize=" +_bufferSize);
System.out.println("timeout=" + _timeout);
_reporter = reporter;
_mos = mos;
// read topic and current offset from input
_index= index;
_input = input;
_request = new KafkaETLRequest(input.trim());
// read data from queue
URI uri = _request.getURI();
_consumer = new SimpleConsumer(uri.getHost(), uri.getPort(), _timeout, _bufferSize);
// get available offset range
_offsetRange = getOffsetRange();
System.out.println("Connected to node " + uri
+ " beginning reading at offset " + _offsetRange[0]
+ " latest offset=" + _offsetRange[1]);
_offset = _offsetRange[0];
_count = 0;
_requestTime = 0;
_retry = 0;
_startTime = System.currentTimeMillis();
}
示例8: KafkaETLRecordReader
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public KafkaETLRecordReader(InputSplit split, JobConf job, Reporter reporter)
throws IOException {
super(job, (FileSplit) split);
_props = KafkaETLUtils.getPropsFromJob(job);
_contextList = new ArrayList<KafkaETLContext>();
_job = job;
_reporter = reporter;
_contextIndex = -1;
_mos = new MultipleOutputs(job);
try {
_limit = _props.getInt("kafka.request.limit", -1);
/*get attemp id*/
String taskId = _job.get("mapred.task.id");
if (taskId == null) {
throw new IllegalArgumentException(
"Configutaion does not contain the property mapred.task.id");
}
String[] parts = taskId.split("_");
if ( parts.length != 6 || !parts[0].equals("attempt")
|| (!"m".equals(parts[3]) && !"r".equals(parts[3]))) {
throw new IllegalArgumentException(
"TaskAttemptId string : " + taskId + " is not properly formed");
}
_attemptId = parts[4]+parts[3];
}catch (Exception e) {
throw new IOException (e);
}
}
示例9: main
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
String dir1 = "/user/miyuru/input";
String dir2 = "/user/miyuru/edgedistributed-out";
// //We first delete the temporary directories if they exist on the HDFS
FileSystem fs1 = FileSystem.get(new JobConf());
if (fs1.exists(new Path(dir2))) {
fs1.delete(new Path(dir2), true);
}
// First job scans through the edge list and splits the edges in to
// separate files based on the partitioned vertex files.
JobConf conf = new JobConf(EdgeDistributor.class);
conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
conf.set("org.acacia.partitioner.hbase.table", args[1]);
conf.set("org.acacia.partitioner.index.contacthost", args[2]);
conf.set("vert-count", args[3]);
conf.set("initpartition-id", args[4]);
conf.set("zero-flag", args[5]);
conf.setOutputKeyClass(LongWritable.class);
conf.setOutputValueClass(Text.class);
conf.setMapperClass(FileMapper.class);
conf.setReducerClass(FileReducer.class);
// conf.setInputFormat(TextInputFormat.class);
conf.setInputFormat(NLinesInputFormat.class);
conf.setOutputFormat(TextOutputFormat.class);
conf.setNumReduceTasks(96); // Need to specify the number of reduce
// tasks explicitly. Otherwise it creates
// only one reduce task.
FileInputFormat.setInputPaths(conf, new Path(dir1));
FileOutputFormat.setOutputPath(conf, new Path(dir2));
MultipleOutputs.addMultiNamedOutput(conf, "partition",
TextOutputFormat.class, NullWritable.class, Text.class);
Job job = new Job(conf, "EdgeDistributor");
job.waitForCompletion(true);
System.out.println("Done job EdgeDistribution");
}
示例10: configure
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf conf) {
multipleOutputs = new MultipleOutputs(conf);
}
示例11: MultipleOutputsCloseableAdapter
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public MultipleOutputsCloseableAdapter(MultipleOutputs mo) {
this.mo = mo;
}
示例12: run
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
JobConf conf = new JobConf(RedirectStep.class);
DumpExtractor.configureJob(conf, args) ;
conf.setJobName("WM: resolve redirects");
conf.setOutputKeyClass(IntWritable.class);
conf.setOutputValueClass(DbIntList.class);
conf.setMapperClass(Step2Mapper.class);
conf.setCombinerClass(Step2Reducer.class) ;
conf.setReducerClass(Step2Reducer.class) ;
// set up input
conf.setInputFormat(TextInputFormat.class);
FileInputFormat.setInputPaths(conf, new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.page) + "/" + PageStep.Output.tempRedirect + "*"));
//set up output
conf.setOutputFormat(RedirectOutputFormat.class);
FileOutputFormat.setOutputPath(conf, new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.redirect)));
//set up distributed cache
DistributedCache.addCacheFile(new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.OUTPUT_SITEINFO).toUri(), conf);
DistributedCache.addCacheFile(new Path(conf.get(DumpExtractor.KEY_LANG_FILE)).toUri(), conf);
//cache page files created in previous step, so we can look up pages by title
Path pageStepPath = new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.page)) ;
for (FileStatus fs:FileSystem.get(conf).listStatus(pageStepPath)) {
if (fs.getPath().getName().startsWith(PageStep.Output.tempPage.name())) {
Logger.getLogger(RedirectStep.class).info("Cached page file " + fs.getPath()) ;
DistributedCache.addCacheFile(fs.getPath().toUri(), conf);
}
}
MultipleOutputs.addNamedOutput(conf, Output.redirectTargetsBySource.name(), TextOutputFormat.class,
IntWritable.class, IntWritable.class);
conf.set("mapred.textoutputformat.separator", ",");
//run job
JobClient.runJob(conf);
return 0;
}
示例13: configure
import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf job) {
HashSet<PageType> pageTypesToCache = new HashSet<PageType>() ;
pageTypesToCache.add(PageType.article) ;
pageTypesToCache.add(PageType.redirect) ;
pageTypesToCache.add(PageType.disambiguation) ;
try {
Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
for (Path cf:cacheFiles) {
if (cf.getName().equals(new Path(DumpExtractor.OUTPUT_SITEINFO).getName())) {
si = new SiteInfo(cf) ;
}
if (cf.getName().equals(new Path(job.get(DumpExtractor.KEY_LANG_FILE)).getName())) {
lc = new LanguageConfiguration(job.get(DumpExtractor.KEY_LANG_CODE), cf) ;
}
if (cf.getName().startsWith(PageStep.Output.tempPage.name())) {
Logger.getLogger(Step2Mapper.class).info("Located cached page file " + cf.toString()) ;
pageFiles.add(cf) ;
}
}
if (si == null)
throw new Exception("Could not locate '" + DumpExtractor.OUTPUT_SITEINFO + "' in DistributedCache") ;
if (lc == null)
throw new Exception("Could not locate '" + job.get(DumpExtractor.KEY_LANG_FILE) + "' in DistributedCache") ;
if (pageFiles.isEmpty())
throw new Exception("Could not gather page summary files produced in step 1") ;
mos = new MultipleOutputs(job);
} catch (Exception e) {
Logger.getLogger(Step2Mapper.class).error("Could not configure mapper", e);
System.exit(1) ;
}
}