当前位置: 首页>>代码示例>>Java>>正文


Java MultipleOutputs类代码示例

本文整理汇总了Java中org.apache.hadoop.mapred.lib.MultipleOutputs的典型用法代码示例。如果您正苦于以下问题:Java MultipleOutputs类的具体用法?Java MultipleOutputs怎么用?Java MultipleOutputs使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


MultipleOutputs类属于org.apache.hadoop.mapred.lib包,在下文中一共展示了MultipleOutputs类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: getMultipleConvertedOutputs

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static CollectMultipleConvertedOutputs getMultipleConvertedOutputs(JobConf job)
{		
	byte[] resultIndexes=MRJobConfiguration.getResultIndexes(job);
	Converter[] outputConverters=new Converter[resultIndexes.length];
	MatrixCharacteristics[] stats=new MatrixCharacteristics[resultIndexes.length];
	HashMap<Byte, ArrayList<Integer>> tagMapping=new HashMap<>();
	for(int i=0; i<resultIndexes.length; i++)
	{
		byte output=resultIndexes[i];
		ArrayList<Integer> vec=tagMapping.get(output);
		if(vec==null)
		{
			vec=new ArrayList<>();
			tagMapping.put(output, vec);
		}
		vec.add(i);
		
		outputConverters[i]=getOuputConverter(job, i);
		stats[i]=MRJobConfiguration.getMatrixCharacteristicsForOutput(job, output);
	}
	
	MultipleOutputs multipleOutputs=new MultipleOutputs(job);
	
	return new CollectMultipleConvertedOutputs(outputConverters, stats, multipleOutputs);
	
}
 
开发者ID:apache,项目名称:systemml,代码行数:27,代码来源:MRJobConfiguration.java

示例2: createJobConf

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
/**
 * Create a job configuration
 */
@SuppressWarnings("rawtypes")
public static JobConf createJobConf(String name, String topic, Props props, Class classobj) 
throws Exception {
    JobConf conf = getJobConf(name, props, classobj);
    
    conf.set("topic", topic);
    
    // input format
    conf.setInputFormat(KafkaETLInputFormat.class);

    //turn off mapper speculative execution
    conf.setMapSpeculativeExecution(false);
    
    // setup multiple outputs
    MultipleOutputs.addMultiNamedOutput(conf, "offsets", SequenceFileOutputFormat.class, 
                KafkaETLKey.class, BytesWritable.class);


    return conf;
}
 
开发者ID:yanfang724,项目名称:hadoop-consumer,代码行数:24,代码来源:KafkaETLJob.java

示例3: configure

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf conf) {
    multipleOutputs = new MultipleOutputs(conf);
    currentTable = conf.get("org.acacia.partitioner.hbase.table");
    zookeeperhost = getZookeeperLocation();
    contactHost = conf.get("org.acacia.partitioner.index.contacthost");
    totalVertexCount = Long.parseLong(conf.get("vert-count"));
    initalPartitionID = Integer.parseInt(conf.get("initpartition-id"));
    zeroFlag = Boolean.parseBoolean(conf.get("zero-flag"));
    loadIndex();
}
 
开发者ID:miyurud,项目名称:Acacia,代码行数:12,代码来源:EdgeDistributor.java

示例4: main

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    // We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    // only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition",
            TextOutputFormat.class, NullWritable.class, Text.class);

    JobClient.runJob(conf);
}
 
开发者ID:miyurud,项目名称:Acacia,代码行数:38,代码来源:EdgelistPartitioner.java

示例5: setUpMultipleOutputs

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static void setUpMultipleOutputs(JobConf job, byte[] resultIndexes, byte[] resultDimsUnknown, String[] outputs, 
		OutputInfo[] outputInfos, boolean inBlockRepresentation, boolean mayContainCtable) 
throws Exception
{
	if(resultIndexes.length!=outputs.length)
		throw new Exception("number of outputs and result indexes does not match");
	if(outputs.length!=outputInfos.length)
		throw new Exception("number of outputs and outputInfos indexes does not match");
	
	job.set(RESULT_INDEXES_CONFIG, MRJobConfiguration.getIndexesString(resultIndexes));
	job.set(RESULT_DIMS_UNKNOWN_CONFIG, MRJobConfiguration.getIndexesString(resultDimsUnknown));
	job.setStrings(OUTPUT_MATRICES_DIRS_CONFIG, outputs);
	job.setOutputCommitter(MultipleOutputCommitter.class);
	
	for(int i=0; i<outputs.length; i++)
	{
		MapReduceTool.deleteFileIfExistOnHDFS(new Path(outputs[i]), job);
		if ( mayContainCtable && resultDimsUnknown[i] == (byte) 1 )  {
			setOutputInfo(job, i, outputInfos[i], false);
		}
		else {
			setOutputInfo(job, i, outputInfos[i], inBlockRepresentation);
		}
		MultipleOutputs.addNamedOutput(job, Integer.toString(i), 
				outputInfos[i].outputFormatClass, outputInfos[i].outputKeyClass, 
				outputInfos[i].outputValueClass);
	}
	job.setOutputFormat(NullOutputFormat.class);
	
	// configure temp output
	Path tempOutputPath = new Path( constructTempOutputFilename() );
	FileOutputFormat.setOutputPath(job, tempOutputPath);
	MapReduceTool.deleteFileIfExistOnHDFS(tempOutputPath, job);
}
 
开发者ID:apache,项目名称:systemml,代码行数:35,代码来源:MRJobConfiguration.java

示例6: CollectMultipleConvertedOutputs

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public CollectMultipleConvertedOutputs(Converter[] convts, MatrixCharacteristics[] stats, 
		MultipleOutputs outputs)
{
	outputConverters=convts;
	multipleOutputs=outputs;
	matrixStats=stats;
}
 
开发者ID:apache,项目名称:systemml,代码行数:8,代码来源:CollectMultipleConvertedOutputs.java

示例7: KafkaETLContext

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
/**
 * construct using input string
 */
@SuppressWarnings("unchecked")
public KafkaETLContext(JobConf job, Props props, Reporter reporter, 
                                MultipleOutputs mos, int index, String input) 
throws Exception {
    
    _bufferSize = getClientBufferSize(props);
    _timeout = getClientTimeout(props);
    System.out.println("bufferSize=" +_bufferSize);
    System.out.println("timeout=" + _timeout);
    _reporter = reporter;
    _mos = mos;
    
    // read topic and current offset from input
    _index= index; 
    _input = input;
    _request = new KafkaETLRequest(input.trim());
    
    // read data from queue
    URI uri = _request.getURI();
    _consumer = new SimpleConsumer(uri.getHost(), uri.getPort(), _timeout, _bufferSize);
    
    // get available offset range
    _offsetRange = getOffsetRange();
    System.out.println("Connected to node " + uri 
            + " beginning reading at offset " + _offsetRange[0]
            + " latest offset=" + _offsetRange[1]);

    _offset = _offsetRange[0];
    _count = 0;
    _requestTime = 0;
    _retry = 0;
    
    _startTime = System.currentTimeMillis();
}
 
开发者ID:yanfang724,项目名称:hadoop-consumer,代码行数:38,代码来源:KafkaETLContext.java

示例8: KafkaETLRecordReader

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public KafkaETLRecordReader(InputSplit split, JobConf job, Reporter reporter) 
throws IOException {
   super(job, (FileSplit) split);
   
   _props = KafkaETLUtils.getPropsFromJob(job);
   _contextList = new ArrayList<KafkaETLContext>();
   _job = job;
   _reporter = reporter;
   _contextIndex = -1;
   _mos = new MultipleOutputs(job);
   try {
       _limit = _props.getInt("kafka.request.limit", -1);
       
       /*get attemp id*/
       String taskId = _job.get("mapred.task.id");
       if (taskId == null) {
           throw new IllegalArgumentException(
                             "Configutaion does not contain the property mapred.task.id");
       }
       String[] parts = taskId.split("_");
       if (    parts.length != 6 || !parts[0].equals("attempt") 
            || (!"m".equals(parts[3]) && !"r".equals(parts[3]))) {
               throw new IllegalArgumentException(
                             "TaskAttemptId string : " + taskId + " is not properly formed");
       }
      _attemptId = parts[4]+parts[3];
   }catch (Exception e) {
       throw new IOException (e);
   }
}
 
开发者ID:yanfang724,项目名称:hadoop-consumer,代码行数:31,代码来源:KafkaETLRecordReader.java

示例9: main

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public static void main(String[] args) throws IOException,
        InterruptedException, ClassNotFoundException {
    String dir1 = "/user/miyuru/input";
    String dir2 = "/user/miyuru/edgedistributed-out";

    // //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    // First job scans through the edge list and splits the edges in to
    // separate files based on the partitioned vertex files.

    JobConf conf = new JobConf(EdgeDistributor.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
    conf.set("org.acacia.partitioner.hbase.table", args[1]);
    conf.set("org.acacia.partitioner.index.contacthost", args[2]);
    conf.set("vert-count", args[3]);
    conf.set("initpartition-id", args[4]);
    conf.set("zero-flag", args[5]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(FileMapper.class);
    conf.setReducerClass(FileReducer.class);
    // conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setNumReduceTasks(96); // Need to specify the number of reduce
                                // tasks explicitly. Otherwise it creates
                                // only one reduce task.

    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));

    MultipleOutputs.addMultiNamedOutput(conf, "partition",
            TextOutputFormat.class, NullWritable.class, Text.class);

    Job job = new Job(conf, "EdgeDistributor");
    job.waitForCompletion(true);

    System.out.println("Done job EdgeDistribution");
}
 
开发者ID:miyurud,项目名称:Acacia,代码行数:44,代码来源:EdgeDistributor.java

示例10: configure

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf conf) {
    multipleOutputs = new MultipleOutputs(conf);
}
 
开发者ID:miyurud,项目名称:Acacia,代码行数:5,代码来源:EdgelistPartitioner.java

示例11: MultipleOutputsCloseableAdapter

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
public MultipleOutputsCloseableAdapter(MultipleOutputs mo) {
  this.mo = mo;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:4,代码来源:IOUtils.java

示例12: run

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {

	JobConf conf = new JobConf(RedirectStep.class);
	DumpExtractor.configureJob(conf, args) ;
	
	conf.setJobName("WM: resolve redirects");

	conf.setOutputKeyClass(IntWritable.class);
	conf.setOutputValueClass(DbIntList.class);		

	conf.setMapperClass(Step2Mapper.class);
	conf.setCombinerClass(Step2Reducer.class) ;
	conf.setReducerClass(Step2Reducer.class) ;

	// set up input

	conf.setInputFormat(TextInputFormat.class);
	FileInputFormat.setInputPaths(conf, new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.page) + "/" + PageStep.Output.tempRedirect + "*"));

	//set up output

	conf.setOutputFormat(RedirectOutputFormat.class);
	FileOutputFormat.setOutputPath(conf, new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.redirect)));

	//set up distributed cache

	DistributedCache.addCacheFile(new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.OUTPUT_SITEINFO).toUri(), conf);
	DistributedCache.addCacheFile(new Path(conf.get(DumpExtractor.KEY_LANG_FILE)).toUri(), conf);

	//cache page files created in previous step, so we can look up pages by title
	Path pageStepPath = new Path(conf.get(DumpExtractor.KEY_OUTPUT_DIR) + "/" + DumpExtractor.getDirectoryName(ExtractionStep.page)) ;
	for (FileStatus fs:FileSystem.get(conf).listStatus(pageStepPath)) {

		if (fs.getPath().getName().startsWith(PageStep.Output.tempPage.name())) {
			Logger.getLogger(RedirectStep.class).info("Cached page file " + fs.getPath()) ;
			DistributedCache.addCacheFile(fs.getPath().toUri(), conf);
		}
	}
	
	MultipleOutputs.addNamedOutput(conf, Output.redirectTargetsBySource.name(), TextOutputFormat.class,
			IntWritable.class, IntWritable.class);
	
	conf.set("mapred.textoutputformat.separator", ",");
	
	//run job
	JobClient.runJob(conf);
	return 0;
}
 
开发者ID:busk,项目名称:WikipediaMiner,代码行数:50,代码来源:RedirectStep.java

示例13: configure

import org.apache.hadoop.mapred.lib.MultipleOutputs; //导入依赖的package包/类
@Override
public void configure(JobConf job) {

	HashSet<PageType> pageTypesToCache = new HashSet<PageType>() ;
	pageTypesToCache.add(PageType.article) ;
	pageTypesToCache.add(PageType.redirect) ;
	pageTypesToCache.add(PageType.disambiguation) ;

	try {

		Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);

		for (Path cf:cacheFiles) {

			if (cf.getName().equals(new Path(DumpExtractor.OUTPUT_SITEINFO).getName())) {
				si = new SiteInfo(cf) ;
			}

			if (cf.getName().equals(new Path(job.get(DumpExtractor.KEY_LANG_FILE)).getName())) {
				lc = new LanguageConfiguration(job.get(DumpExtractor.KEY_LANG_CODE), cf) ;
			}

			if (cf.getName().startsWith(PageStep.Output.tempPage.name())) {
				Logger.getLogger(Step2Mapper.class).info("Located cached page file " + cf.toString()) ;
				pageFiles.add(cf) ;
			}
		}

		if (si == null) 
			throw new Exception("Could not locate '" + DumpExtractor.OUTPUT_SITEINFO + "' in DistributedCache") ;

		if (lc == null) 
			throw new Exception("Could not locate '" + job.get(DumpExtractor.KEY_LANG_FILE) + "' in DistributedCache") ;

		if (pageFiles.isEmpty())
			throw new Exception("Could not gather page summary files produced in step 1") ;
		
		mos = new MultipleOutputs(job);

	} catch (Exception e) {
		Logger.getLogger(Step2Mapper.class).error("Could not configure mapper", e);
		System.exit(1) ;
	}
}
 
开发者ID:busk,项目名称:WikipediaMiner,代码行数:45,代码来源:RedirectStep.java


注:本文中的org.apache.hadoop.mapred.lib.MultipleOutputs类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。