Java HashPartitioner类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.partition.HashPartitioner类的典型用法代码示例。如果您正苦于以下问题：Java HashPartitioner类的具体用法？Java HashPartitioner怎么用？Java HashPartitioner使用的例子？那么, 这里精选的类代码示例或许可以为您提供帮助。

HashPartitioner类属于org.apache.hadoop.mapreduce.lib.partition包，在下文中一共展示了HashPartitioner类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		JobBuilder.printUsage(this, "<path> <key>");
		return -1;
	}
	Path path = new Path(args[0]);
	IntWritable key = new IntWritable(Integer.parseInt(args[1]));

	Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	Text val = new Text();
	Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
			key, val);
	if (entry == null) {
		System.err.println("Key not found: " + key);
		return -1;
	}
	NcdcRecordParser parser = new NcdcRecordParser();
	parser.parse(val.toString());
	System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
	return 0;
}

开发者ID:lhfei，项目名称:hadoop-in-action，代码行数:24，代码来源:LookupRecordByTemperature.java

示例2: assertData

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
private void assertData(int totalShardCount) throws IOException {
  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  for (int i = 0; i < totalShardCount; i++) {
    HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
    DirectoryReader reader = DirectoryReader.open(directory);
    int numDocs = reader.numDocs();
    for (int d = 0; d < numDocs; d++) {
      Document document = reader.document(d);
      IndexableField field = document.getField("id");
      Integer id = (Integer) field.numericValue();
      int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
      assertEquals(i, partition);
    }
    reader.close();
  }
}

开发者ID:apache，项目名称:incubator-blur，代码行数:17，代码来源:TableShardCountCollapserTest.java

示例3: createShard

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}

开发者ID:apache，项目名称:incubator-blur，代码行数:17，代码来源:TableShardCountCollapserTest.java

示例4: testMapOnlyJob

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
 * Test {@link LembosMapReduceRunner#initJob(String[])} works as expected for a map only job.
 *
 * @throws Exception if anything goes wrong
 */
@Test
public void testMapOnlyJob() throws Exception {
    final String moduleName = "LembosMapReduceRunnerTest-testMapOnlyJob";
    final String modulePath = TestUtils.getModulePath(moduleName);
    final Job job = getJob(moduleName, modulePath, null, null);

    assertNotNull(job.getMapperClass());
    assertNull(job.getCombinerClass());
    // assertNull(job.getGroupingComparator()); // Throws an exception because our map output key is
                                                // WritableComparable and can't subclass itself
    assertEquals(HashPartitioner.class, job.getPartitionerClass());
    assertEquals(Reducer.class, job.getReducerClass()); // Defaults to the Hadoop Reducer
    // assertNull(job.getSortComparator()); // Throws an exception because our map output key is
                                            // WritableComparable and can't subclass itself

    assertNull(job.getConfiguration().get("boolean"));
    assertNull(job.getConfiguration().get("double"));
    assertNull(job.getConfiguration().get("float"));
    assertNull(job.getConfiguration().get("int"));
    assertNull(job.getConfiguration().get("long"));
    assertNull(job.getConfiguration().get("string"));
}

开发者ID:apigee，项目名称:lembos，代码行数:28，代码来源:LembosMapReduceRunnerTest.java

示例5: getPartitionerClass

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}

开发者ID:naver，项目名称:hadoop，代码行数:12，代码来源:JobContextImpl.java

示例6: cleanup

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
		InterruptedException {

	Configuration conf = context.getConfiguration();
	Path titlesDir = new Path(conf.get("pagerank.titles_dir"));

	MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	IntWritable page = new IntWritable();
	Text title = new Text();

	float[] pageRanks = new float[topN.size()];
	String[] titles = new String[topN.size()];

	// The order of the entries is reversed. The priority queue is in
	// non-decreasing order and we want the highest PageRank first.
	for (int i = pageRanks.length - 1; i >= 0; i--) {
		Map.Entry<Float, Integer> entry = topN.poll();
		// Get the title of the page from the title index.
		page.set(entry.getValue());
		MapFileOutputFormat.getEntry(readers, partitioner, page, title);
		pageRanks[i] = entry.getKey();
		titles[i] = title.toString();
	}

	for (MapFile.Reader reader : readers) {
		reader.close();
	}

	for (int i = 0; i < pageRanks.length; i++) {
		context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
	}
}

开发者ID:yasserglez，项目名称:pagerank-hadoop，代码行数:35，代码来源:PageRankTopNReducer.java

示例7: cleanup

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
		InterruptedException {

	Configuration conf = context.getConfiguration();
	Path titlesDir = new Path(conf.get("inlinks.titles_dir"));

	MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	IntWritable page = new IntWritable();
	Text title = new Text();

	int[] inLinks = new int[topN.size()];
	String[] titles = new String[topN.size()];

	for (int i = inLinks.length - 1; i >= 0; i--) {
		Map.Entry<Integer, Integer> entry = topN.poll();
		page.set(entry.getValue());
		MapFileOutputFormat.getEntry(readers, partitioner, page, title);
		inLinks[i] = entry.getKey();
		titles[i] = title.toString();
	}

	for (MapFile.Reader reader : readers) {
		reader.close();
	}

	for (int i = 0; i < inLinks.length; i++) {
		context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
	}
}

开发者ID:yasserglez，项目名称:pagerank-hadoop，代码行数:32，代码来源:InLinksTopNReducer.java

示例8: run

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		JobBuilder.printUsage(this, "<path> <key>");
		return -1;
	}
	Path path = new Path(args[0]);
	IntWritable key = new IntWritable(Integer.parseInt(args[1]));

	Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	Text val = new Text();

	Reader reader = readers[partitioner.getPartition(key, val,
			readers.length)];
	Writable entry = reader.get(key, val);
	if (entry == null) {
		System.err.println("Key not found: " + key);
		return -1;
	}
	NcdcRecordParser parser = new NcdcRecordParser();
	IntWritable nextKey = new IntWritable();
	do {
		parser.parse(val.toString());
		System.out.printf("%s\t%s\n", parser.getStationId(),
				parser.getYear());
	} while (reader.next(nextKey, val) && key.equals(nextKey));
	return 0;
}

开发者ID:lhfei，项目名称:hadoop-in-action，代码行数:30，代码来源:LookupRecordsByTemperature.java

示例9: run

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
 public int run(String[] args) throws Exception {
if (args.length != 2) {
      System.err.printf("Usage: %s [generic options] <input> <output>\n",
          getClass().getSimpleName());
      ToolRunner.printGenericCommandUsage(System.err);
      return -1;
 }
    
Job job = new Job(getConf());
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
/* by default start */
   job.setInputFormatClass(TextInputFormat.class); 
   
   job.setMapperClass(Mapper.class); 
   
   job.setMapOutputKeyClass(LongWritable.class); 
   job.setMapOutputValueClass(Text.class); 
   
   job.setPartitionerClass(HashPartitioner.class); 
   
   job.setNumReduceTasks(1); 
   job.setReducerClass(Reducer.class); 

   job.setOutputKeyClass(LongWritable.class); 
   job.setOutputValueClass(Text.class); 

   job.setOutputFormatClass(TextOutputFormat.class);
   /* by default end */
   
   return job.waitForCompletion(true) ? 0 : 1;
 }

开发者ID:willddy，项目名称:bigdata_pattern，代码行数:35，代码来源:MinimalMapReduceWithDefaults.java

示例10: reduce

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
 * w/ reduce.
 * @throws Exception if failed
 */
@Test
public void reduce() throws Exception {
    FileEditor.put(new File(folder.getRoot(), "input/test.txt"), "Hello, world!");
    Path root = new Path(folder.getRoot().toURI());
    Path base = new Path(root, "output");
    ClassDescription client = new ClassDescription("com.example.StageClient");
    MapReduceStageInfo info = new MapReduceStageInfo(
            new StageInfo("simple", "simple", "simple"),
            Arrays.asList(new MapReduceStageInfo.Input(
                    new Path(root, "input/*.txt").toString(),
                    classOf(Text.class),
                    classOf(TextInputFormat.class),
                    classOf(Mapper.class),
                    Collections.emptyMap())),
            Arrays.asList(new MapReduceStageInfo.Output(
                    "out",
                    classOf(NullWritable.class),
                    classOf(Text.class),
                    classOf(TextOutputFormat.class),
                    Collections.emptyMap())),
            Collections.emptyList(),
            new MapReduceStageInfo.Shuffle(
                    classOf(LongWritable.class),
                    classOf(Text.class),
                    classOf(HashPartitioner.class),
                    null,
                    classOf(LongWritable.Comparator.class),
                    classOf(LongWritable.Comparator.class),
                    classOf(SimpleReducer.class)),
            base.toString());
    MapReduceStageEmitter.emit(client, info, javac);
    int status = MapReduceRunner.execute(
            new Configuration(),
            client,
            "testing",
            Collections.emptyMap(),
            javac.compile());
    assertThat("exit status code", status, is(0));
    assertThat(collect("output"), contains("Hello, world!"));
}

开发者ID:asakusafw，项目名称:asakusafw-compiler，代码行数:45，代码来源:MapReduceStageEmitterTest.java

示例11: map

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue,
		Context context) throws IOException, InterruptedException {

	// This task gets each block M_{i,j}, loads the corresponding stripe j
	// of the vector v_{k-1} and produces the partial result of the stripe i
	// of the vector v_k.

	Configuration conf = context.getConfiguration();
	int iter = Integer.parseInt(conf.get("pagerank.iteration"));
	int numPages = Integer.parseInt(conf.get("pagerank.num_pages"));
	short blockSize = Short.parseShort(conf.get("pagerank.block_size"));

	Writable[] blockIndexes = inKey.get();
	short i = ((ShortWritable) blockIndexes[0]).get();
	short j = ((ShortWritable) blockIndexes[1]).get();

	int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize;
	FloatWritable[] vj = new FloatWritable[vjSize];

	if (iter == 1) {
		// Initial PageRank vector with 1/n for all pages.
		for (int k = 0; k < vj.length; k++) {
			vj[k] = new FloatWritable(1.0f / numPages);
		}
	} else {
		// Load the stripe j of the vector v_{k-1} from the MapFiles.
		Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent();
		Path vjDir = new Path(outputDir, "v" + (iter - 1));
		MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf);
		Partitioner<ShortWritable, FloatArrayWritable> partitioner =
				new HashPartitioner<ShortWritable, FloatArrayWritable>();
		ShortWritable key = new ShortWritable(j);
		FloatArrayWritable value = new FloatArrayWritable();
		MapFileOutputFormat.getEntry(readers, partitioner, key, value);
		Writable[] writables = value.get();
		for (int k = 0; k < vj.length; k++) {
			vj[k] = (FloatWritable) writables[k];
		}
		for (MapFile.Reader reader : readers) {
			reader.close();
		}
	}

	// Initialize the partial result i of the vector v_k.
	int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize;
	FloatWritable[] vi = new FloatWritable[viSize];
	for (int k = 0; k < vi.length; k++) {
		vi[k] = new FloatWritable(0);
	}

	// Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the
	// partial result i of the vector v_k.
	Writable[][] blockColumns = inValue.get();
	for (int k = 0; k < blockColumns.length; k++) {
		Writable[] blockColumn = blockColumns[k];
		if (blockColumn.length > 0) {
			int vDegree = ((ShortWritable) blockColumn[0]).get();
			for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) {
				int l = ((ShortWritable) blockColumn[columnIndex]).get();
				vi[l].set(vi[l].get() +  (1.0f / vDegree) * vj[k].get());
			}
		}
	}

	context.write(new ShortWritable(i), new FloatArrayWritable(vi));
}

开发者ID:yasserglez，项目名称:pagerank-hadoop，代码行数:68，代码来源:PageRankIterationMapper.java

示例12: visitRank

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public void visitRank(PORank op) throws VisitorException {
    try{
        // Rank implementation has 3 vertices
        // Vertex 1 has POCounterTez produce output tuples and send to Vertex 3 via 1-1 edge.
        // Vertex 1 also sends the count of tuples of each task in Vertex 1 to Vertex 2 which is a single reducer.
        // Vertex 3 has PORankTez which consumes from Vertex 2 as broadcast input and also tuples from Vertex 1 and
        // produces tuples with updated ranks based on the count of tuples from Vertex 2.
        // This is different from MR implementation where POCounter updates job counters, and that is
        // copied by JobControlCompiler into the PORank job's jobconf.

        // Previous operator is always POCounterTez (Vertex 1)
        TezOperator counterOper = curTezOp;
        POCounterTez counterTez = (POCounterTez) counterOper.plan.getLeaves().get(0);

        //Construct Vertex 2
        TezOperator statsOper = getTezOp();
        tezPlan.add(statsOper);
        POCounterStatsTez counterStatsTez = new POCounterStatsTez(OperatorKey.genOpKey(scope));
        statsOper.plan.addAsLeaf(counterStatsTez);
        statsOper.setRequestedParallelism(1);
        statsOper.setDontEstimateParallelism(true);

        //Construct Vertex 3
        TezOperator rankOper = getTezOp();
        tezPlan.add(rankOper);
        PORankTez rankTez = new PORankTez(op);
        rankOper.plan.addAsLeaf(rankTez);
        curTezOp = rankOper;

        // Connect counterOper vertex to rankOper vertex by 1-1 edge
        rankOper.setRequestedParallelismByReference(counterOper);
        TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, counterOper, rankOper);
        rankOper.setUseMRMapSettings(counterOper.isUseMRMapSettings());
        TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.ONE_TO_ONE);
        counterTez.setTuplesOutputKey(rankOper.getOperatorKey().toString());
        rankTez.setTuplesInputKey(counterOper.getOperatorKey().toString());

        // Connect counterOper vertex to statsOper vertex by Shuffle edge
        edge = TezCompilerUtil.connect(tezPlan, counterOper, statsOper);
        // Task id
        edge.setIntermediateOutputKeyClass(IntWritable.class.getName());
        edge.partitionerClass = HashPartitioner.class;
        // Number of records in that task
        edge.setIntermediateOutputValueClass(LongWritable.class.getName());
        counterTez.setStatsOutputKey(statsOper.getOperatorKey().toString());
        counterStatsTez.setInputKey(counterOper.getOperatorKey().toString());

        // Connect statsOper vertex to rankOper vertex by Broadcast edge
        edge = TezCompilerUtil.connect(tezPlan, statsOper, rankOper);
        // Map of task id, offset count based on total number of records is in the value
        TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
        counterStatsTez.setOutputKey(rankOper.getOperatorKey().toString());
        rankTez.setStatsInputKey(statsOper.getOperatorKey().toString());

        phyToTezOpMap.put(op, rankOper);
    } catch (Exception e) {
        int errCode = 2034;
        String msg = "Error compiling operator " + op.getClass().getSimpleName();
        throw new TezCompilerException(msg, errCode, PigException.BUG, e);
    }
}

开发者ID:sigmoidanalytics，项目名称:spork，代码行数:63，代码来源:TezCompiler.java

示例13: groupWithComparator

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
protected final GroupingOptions groupWithComparator(
    Class<? extends RawComparator<?>> comparator) {
  return groupingOptions(HashPartitioner.class, comparator);
}

开发者ID:apsaltis，项目名称:oryx，代码行数:5，代码来源:JobStep.java

示例14: run

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{

	addInputOption();
	addOutputOption();
	addOption("numberOfColumns", "r", "Number of columns in the input matrix");
	addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
			+ "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
	addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: "
			+ DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));

	Map<String, String> parsedArgs = parseArguments(args);
	if (parsedArgs == null)
	{
		return -1;
	}

	int numberOfColumns = Integer.parseInt(parsedArgs.get("--numberOfColumns"));
	String similarityClassnameArg = parsedArgs.get("--similarityClassname");
	String distributedSimilarityClassname;
	try
	{
		distributedSimilarityClassname = SimilarityType.valueOf(similarityClassnameArg)
				.getSimilarityImplementationClassName();
	}
	catch (IllegalArgumentException iae)
	{
		distributedSimilarityClassname = similarityClassnameArg;
	}

	int maxSimilaritiesPerRow = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerRow"));

	Path inputPath = getInputPath();
	Path outputPath = getOutputPath();
	Path tempDirPath = new Path(parsedArgs.get("--tempDir"));

	Path weightsPath = new Path(tempDirPath, "weights");
	Path pairwiseSimilarityPath = new Path(tempDirPath, "pairwiseSimilarity");

	AtomicInteger currentPhase = new AtomicInteger();

	if (shouldRunNextPhase(parsedArgs, currentPhase))
	{
		Job weights = prepareJob(inputPath, weightsPath, SequenceFileInputFormat.class, RowWeightMapper.class,
				VarIntWritable.class, WeightedOccurrence.class, WeightedOccurrencesPerColumnReducer.class,
				VarIntWritable.class, WeightedOccurrenceArray.class, SequenceFileOutputFormat.class);

		weights.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
		weights.waitForCompletion(true);
	}

	if (shouldRunNextPhase(parsedArgs, currentPhase))
	{
		Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, SequenceFileInputFormat.class,
				CooccurrencesMapper.class, WeightedRowPair.class, Cooccurrence.class, SimilarityReducer.class,
				SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
				SequenceFileOutputFormat.class);

		Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
		pairwiseConf.set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
		pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
		pairwiseSimilarity.waitForCompletion(true);
	}

	if (shouldRunNextPhase(parsedArgs, currentPhase))
	{
		Job asMatrix = prepareJob(pairwiseSimilarityPath, outputPath, SequenceFileInputFormat.class, Mapper.class,
				SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
				EntriesToVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
		asMatrix.setPartitionerClass(HashPartitioner.class);
		asMatrix.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
		asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
		asMatrix.waitForCompletion(true);
	}

	return 0;
}

开发者ID:beeldengeluid，项目名称:zieook，代码行数:79，代码来源:RowSimilarityZieOok.java

示例15: main

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
  if (args.length != 4) {
    System.out
        .println("USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
    return;
  }
  System.out.println(Arrays.toString(args));
  Configuration conf = new Configuration();
  conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
  conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
  conf.setInt("number.edges", Integer.parseInt(args[1]));
  Job job = new Job(conf);

  Path generated = new Path(new Path(args[3]).getParent(), "generated");
  FileOutputFormat.setOutputPath(job, generated);
  FileSystem.get(conf).delete(generated, true);

  job.setJobName("RangeWriter");

  job.setJarByClass(SortGenMapper.class);
  job.setMapperClass(SortGenMapper.class);
  job.setNumReduceTasks(0);

  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(NullWritable.class);

  job.setInputFormatClass(RangeInputFormat.class);

  job.waitForCompletion(true);
  conf.setInt("max.id", Integer.valueOf(args[0]));
  job = new Job(conf);

  FileOutputFormat.setOutputPath(job, new Path(args[3]));
  FileSystem.get(conf).delete(new Path(args[3]), true);

  job.setJobName("Random Vertex Writer");

  FileInputFormat.addInputPath(job, generated);

  job.setJarByClass(RandomMapper.class);
  job.setMapperClass(RandomMapper.class);
  job.setReducerClass(Reducer.class);

  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(Text.class);

  job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
  job.setPartitionerClass(HashPartitioner.class);

  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(Text.class);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(TextOutputFormat.class);

  job.waitForCompletion(true);
}

开发者ID:millecker，项目名称:applications，代码行数:58，代码来源:RandomGraphGenerator.java

注：本文中的org.apache.hadoop.mapreduce.lib.partition.HashPartitioner类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。