本文整理汇总了Java中org.apache.hadoop.mapreduce.lib.partition.HashPartitioner类的典型用法代码示例。如果您正苦于以下问题:Java HashPartitioner类的具体用法?Java HashPartitioner怎么用?Java HashPartitioner使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
HashPartitioner类属于org.apache.hadoop.mapreduce.lib.partition包,在下文中一共展示了HashPartitioner类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
return 0;
}
示例2: assertData
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
private void assertData(int totalShardCount) throws IOException {
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
for (int i = 0; i < totalShardCount; i++) {
HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
DirectoryReader reader = DirectoryReader.open(directory);
int numDocs = reader.numDocs();
for (int d = 0; d < numDocs; d++) {
Document document = reader.document(d);
IndexableField field = document.getField("id");
Integer id = (Integer) field.numericValue();
int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
assertEquals(i, partition);
}
reader.close();
}
}
示例3: createShard
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
throws IOException {
HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
assertEquals(i, partition);
Document doc = getDoc(i);
indexWriter.addDocument(doc);
indexWriter.close();
}
示例4: testMapOnlyJob
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
* Test {@link LembosMapReduceRunner#initJob(String[])} works as expected for a map only job.
*
* @throws Exception if anything goes wrong
*/
@Test
public void testMapOnlyJob() throws Exception {
final String moduleName = "LembosMapReduceRunnerTest-testMapOnlyJob";
final String modulePath = TestUtils.getModulePath(moduleName);
final Job job = getJob(moduleName, modulePath, null, null);
assertNotNull(job.getMapperClass());
assertNull(job.getCombinerClass());
// assertNull(job.getGroupingComparator()); // Throws an exception because our map output key is
// WritableComparable and can't subclass itself
assertEquals(HashPartitioner.class, job.getPartitionerClass());
assertEquals(Reducer.class, job.getReducerClass()); // Defaults to the Hadoop Reducer
// assertNull(job.getSortComparator()); // Throws an exception because our map output key is
// WritableComparable and can't subclass itself
assertNull(job.getConfiguration().get("boolean"));
assertNull(job.getConfiguration().get("double"));
assertNull(job.getConfiguration().get("float"));
assertNull(job.getConfiguration().get("int"));
assertNull(job.getConfiguration().get("long"));
assertNull(job.getConfiguration().get("string"));
}
示例5: getPartitionerClass
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
* Get the {@link Partitioner} class for the job.
*
* @return the {@link Partitioner} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass()
throws ClassNotFoundException {
return (Class<? extends Partitioner<?,?>>)
conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
示例6: cleanup
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
Path titlesDir = new Path(conf.get("pagerank.titles_dir"));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
IntWritable page = new IntWritable();
Text title = new Text();
float[] pageRanks = new float[topN.size()];
String[] titles = new String[topN.size()];
// The order of the entries is reversed. The priority queue is in
// non-decreasing order and we want the highest PageRank first.
for (int i = pageRanks.length - 1; i >= 0; i--) {
Map.Entry<Float, Integer> entry = topN.poll();
// Get the title of the page from the title index.
page.set(entry.getValue());
MapFileOutputFormat.getEntry(readers, partitioner, page, title);
pageRanks[i] = entry.getKey();
titles[i] = title.toString();
}
for (MapFile.Reader reader : readers) {
reader.close();
}
for (int i = 0; i < pageRanks.length; i++) {
context.write(new FloatWritable(pageRanks[i]), new Text(titles[i]));
}
}
示例7: cleanup
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
Configuration conf = context.getConfiguration();
Path titlesDir = new Path(conf.get("inlinks.titles_dir"));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(titlesDir, conf);
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
IntWritable page = new IntWritable();
Text title = new Text();
int[] inLinks = new int[topN.size()];
String[] titles = new String[topN.size()];
for (int i = inLinks.length - 1; i >= 0; i--) {
Map.Entry<Integer, Integer> entry = topN.poll();
page.set(entry.getValue());
MapFileOutputFormat.getEntry(readers, partitioner, page, title);
inLinks[i] = entry.getKey();
titles[i] = title.toString();
}
for (MapFile.Reader reader : readers) {
reader.close();
}
for (int i = 0; i < inLinks.length; i++) {
context.write(new IntWritable(inLinks[i]), new Text(titles[i]));
}
}
示例8: run
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Reader reader = readers[partitioner.getPartition(key, val,
readers.length)];
Writable entry = reader.get(key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
IntWritable nextKey = new IntWritable();
do {
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(),
parser.getYear());
} while (reader.next(nextKey, val) && key.equals(nextKey));
return 0;
}
示例9: run
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.printf("Usage: %s [generic options] <input> <output>\n",
getClass().getSimpleName());
ToolRunner.printGenericCommandUsage(System.err);
return -1;
}
Job job = new Job(getConf());
job.setJarByClass(getClass());
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
/* by default start */
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(Mapper.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(Text.class);
job.setPartitionerClass(HashPartitioner.class);
job.setNumReduceTasks(1);
job.setReducerClass(Reducer.class);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setOutputFormatClass(TextOutputFormat.class);
/* by default end */
return job.waitForCompletion(true) ? 0 : 1;
}
示例10: reduce
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
/**
* w/ reduce.
* @throws Exception if failed
*/
@Test
public void reduce() throws Exception {
FileEditor.put(new File(folder.getRoot(), "input/test.txt"), "Hello, world!");
Path root = new Path(folder.getRoot().toURI());
Path base = new Path(root, "output");
ClassDescription client = new ClassDescription("com.example.StageClient");
MapReduceStageInfo info = new MapReduceStageInfo(
new StageInfo("simple", "simple", "simple"),
Arrays.asList(new MapReduceStageInfo.Input(
new Path(root, "input/*.txt").toString(),
classOf(Text.class),
classOf(TextInputFormat.class),
classOf(Mapper.class),
Collections.emptyMap())),
Arrays.asList(new MapReduceStageInfo.Output(
"out",
classOf(NullWritable.class),
classOf(Text.class),
classOf(TextOutputFormat.class),
Collections.emptyMap())),
Collections.emptyList(),
new MapReduceStageInfo.Shuffle(
classOf(LongWritable.class),
classOf(Text.class),
classOf(HashPartitioner.class),
null,
classOf(LongWritable.Comparator.class),
classOf(LongWritable.Comparator.class),
classOf(SimpleReducer.class)),
base.toString());
MapReduceStageEmitter.emit(client, info, javac);
int status = MapReduceRunner.execute(
new Configuration(),
client,
"testing",
Collections.emptyMap(),
javac.compile());
assertThat("exit status code", status, is(0));
assertThat(collect("output"), contains("Hello, world!"));
}
示例11: map
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public void map(ShortArrayWritable inKey, MatrixBlockWritable inValue,
Context context) throws IOException, InterruptedException {
// This task gets each block M_{i,j}, loads the corresponding stripe j
// of the vector v_{k-1} and produces the partial result of the stripe i
// of the vector v_k.
Configuration conf = context.getConfiguration();
int iter = Integer.parseInt(conf.get("pagerank.iteration"));
int numPages = Integer.parseInt(conf.get("pagerank.num_pages"));
short blockSize = Short.parseShort(conf.get("pagerank.block_size"));
Writable[] blockIndexes = inKey.get();
short i = ((ShortWritable) blockIndexes[0]).get();
short j = ((ShortWritable) blockIndexes[1]).get();
int vjSize = (j > numPages / blockSize) ? (numPages % blockSize) : blockSize;
FloatWritable[] vj = new FloatWritable[vjSize];
if (iter == 1) {
// Initial PageRank vector with 1/n for all pages.
for (int k = 0; k < vj.length; k++) {
vj[k] = new FloatWritable(1.0f / numPages);
}
} else {
// Load the stripe j of the vector v_{k-1} from the MapFiles.
Path outputDir = MapFileOutputFormat.getOutputPath(context).getParent();
Path vjDir = new Path(outputDir, "v" + (iter - 1));
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(vjDir, conf);
Partitioner<ShortWritable, FloatArrayWritable> partitioner =
new HashPartitioner<ShortWritable, FloatArrayWritable>();
ShortWritable key = new ShortWritable(j);
FloatArrayWritable value = new FloatArrayWritable();
MapFileOutputFormat.getEntry(readers, partitioner, key, value);
Writable[] writables = value.get();
for (int k = 0; k < vj.length; k++) {
vj[k] = (FloatWritable) writables[k];
}
for (MapFile.Reader reader : readers) {
reader.close();
}
}
// Initialize the partial result i of the vector v_k.
int viSize = (i > numPages / blockSize) ? (numPages % blockSize) : blockSize;
FloatWritable[] vi = new FloatWritable[viSize];
for (int k = 0; k < vi.length; k++) {
vi[k] = new FloatWritable(0);
}
// Multiply M_{i,j} by the stripe j of the vector v_{k-1} to obtain the
// partial result i of the vector v_k.
Writable[][] blockColumns = inValue.get();
for (int k = 0; k < blockColumns.length; k++) {
Writable[] blockColumn = blockColumns[k];
if (blockColumn.length > 0) {
int vDegree = ((ShortWritable) blockColumn[0]).get();
for (int columnIndex = 1; columnIndex < blockColumn.length; columnIndex++) {
int l = ((ShortWritable) blockColumn[columnIndex]).get();
vi[l].set(vi[l].get() + (1.0f / vDegree) * vj[k].get());
}
}
}
context.write(new ShortWritable(i), new FloatArrayWritable(vi));
}
示例12: visitRank
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public void visitRank(PORank op) throws VisitorException {
try{
// Rank implementation has 3 vertices
// Vertex 1 has POCounterTez produce output tuples and send to Vertex 3 via 1-1 edge.
// Vertex 1 also sends the count of tuples of each task in Vertex 1 to Vertex 2 which is a single reducer.
// Vertex 3 has PORankTez which consumes from Vertex 2 as broadcast input and also tuples from Vertex 1 and
// produces tuples with updated ranks based on the count of tuples from Vertex 2.
// This is different from MR implementation where POCounter updates job counters, and that is
// copied by JobControlCompiler into the PORank job's jobconf.
// Previous operator is always POCounterTez (Vertex 1)
TezOperator counterOper = curTezOp;
POCounterTez counterTez = (POCounterTez) counterOper.plan.getLeaves().get(0);
//Construct Vertex 2
TezOperator statsOper = getTezOp();
tezPlan.add(statsOper);
POCounterStatsTez counterStatsTez = new POCounterStatsTez(OperatorKey.genOpKey(scope));
statsOper.plan.addAsLeaf(counterStatsTez);
statsOper.setRequestedParallelism(1);
statsOper.setDontEstimateParallelism(true);
//Construct Vertex 3
TezOperator rankOper = getTezOp();
tezPlan.add(rankOper);
PORankTez rankTez = new PORankTez(op);
rankOper.plan.addAsLeaf(rankTez);
curTezOp = rankOper;
// Connect counterOper vertex to rankOper vertex by 1-1 edge
rankOper.setRequestedParallelismByReference(counterOper);
TezEdgeDescriptor edge = TezCompilerUtil.connect(tezPlan, counterOper, rankOper);
rankOper.setUseMRMapSettings(counterOper.isUseMRMapSettings());
TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.ONE_TO_ONE);
counterTez.setTuplesOutputKey(rankOper.getOperatorKey().toString());
rankTez.setTuplesInputKey(counterOper.getOperatorKey().toString());
// Connect counterOper vertex to statsOper vertex by Shuffle edge
edge = TezCompilerUtil.connect(tezPlan, counterOper, statsOper);
// Task id
edge.setIntermediateOutputKeyClass(IntWritable.class.getName());
edge.partitionerClass = HashPartitioner.class;
// Number of records in that task
edge.setIntermediateOutputValueClass(LongWritable.class.getName());
counterTez.setStatsOutputKey(statsOper.getOperatorKey().toString());
counterStatsTez.setInputKey(counterOper.getOperatorKey().toString());
// Connect statsOper vertex to rankOper vertex by Broadcast edge
edge = TezCompilerUtil.connect(tezPlan, statsOper, rankOper);
// Map of task id, offset count based on total number of records is in the value
TezCompilerUtil.configureValueOnlyTupleOutput(edge, DataMovementType.BROADCAST);
counterStatsTez.setOutputKey(rankOper.getOperatorKey().toString());
rankTez.setStatsInputKey(statsOper.getOperatorKey().toString());
phyToTezOpMap.put(op, rankOper);
} catch (Exception e) {
int errCode = 2034;
String msg = "Error compiling operator " + op.getClass().getSimpleName();
throw new TezCompilerException(msg, errCode, PigException.BUG, e);
}
}
示例13: groupWithComparator
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
protected final GroupingOptions groupWithComparator(
Class<? extends RawComparator<?>> comparator) {
return groupingOptions(HashPartitioner.class, comparator);
}
示例14: run
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException
{
addInputOption();
addOutputOption();
addOption("numberOfColumns", "r", "Number of columns in the input matrix");
addOption("similarityClassname", "s", "Name of distributed similarity class to instantiate, alternatively use "
+ "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')');
addOption("maxSimilaritiesPerRow", "m", "Number of maximum similarities per row (default: "
+ DEFAULT_MAX_SIMILARITIES_PER_ROW + ')', String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
Map<String, String> parsedArgs = parseArguments(args);
if (parsedArgs == null)
{
return -1;
}
int numberOfColumns = Integer.parseInt(parsedArgs.get("--numberOfColumns"));
String similarityClassnameArg = parsedArgs.get("--similarityClassname");
String distributedSimilarityClassname;
try
{
distributedSimilarityClassname = SimilarityType.valueOf(similarityClassnameArg)
.getSimilarityImplementationClassName();
}
catch (IllegalArgumentException iae)
{
distributedSimilarityClassname = similarityClassnameArg;
}
int maxSimilaritiesPerRow = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerRow"));
Path inputPath = getInputPath();
Path outputPath = getOutputPath();
Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
Path weightsPath = new Path(tempDirPath, "weights");
Path pairwiseSimilarityPath = new Path(tempDirPath, "pairwiseSimilarity");
AtomicInteger currentPhase = new AtomicInteger();
if (shouldRunNextPhase(parsedArgs, currentPhase))
{
Job weights = prepareJob(inputPath, weightsPath, SequenceFileInputFormat.class, RowWeightMapper.class,
VarIntWritable.class, WeightedOccurrence.class, WeightedOccurrencesPerColumnReducer.class,
VarIntWritable.class, WeightedOccurrenceArray.class, SequenceFileOutputFormat.class);
weights.getConfiguration().set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
weights.waitForCompletion(true);
}
if (shouldRunNextPhase(parsedArgs, currentPhase))
{
Job pairwiseSimilarity = prepareJob(weightsPath, pairwiseSimilarityPath, SequenceFileInputFormat.class,
CooccurrencesMapper.class, WeightedRowPair.class, Cooccurrence.class, SimilarityReducer.class,
SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
SequenceFileOutputFormat.class);
Configuration pairwiseConf = pairwiseSimilarity.getConfiguration();
pairwiseConf.set(DISTRIBUTED_SIMILARITY_CLASSNAME, distributedSimilarityClassname);
pairwiseConf.setInt(NUMBER_OF_COLUMNS, numberOfColumns);
pairwiseSimilarity.waitForCompletion(true);
}
if (shouldRunNextPhase(parsedArgs, currentPhase))
{
Job asMatrix = prepareJob(pairwiseSimilarityPath, outputPath, SequenceFileInputFormat.class, Mapper.class,
SimilarityMatrixEntryKey.class, DistributedRowMatrix.MatrixEntryWritable.class,
EntriesToVectorsReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
asMatrix.setPartitionerClass(HashPartitioner.class);
asMatrix.setGroupingComparatorClass(SimilarityMatrixEntryKey.SimilarityMatrixEntryKeyGroupingComparator.class);
asMatrix.getConfiguration().setInt(MAX_SIMILARITIES_PER_ROW, maxSimilaritiesPerRow);
asMatrix.waitForCompletion(true);
}
return 0;
}
示例15: main
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length != 4) {
System.out
.println("USAGE: <Number of vertices> <Number of edges per vertex> <Number of partitions> <Outpath>");
return;
}
System.out.println(Arrays.toString(args));
Configuration conf = new Configuration();
conf.setInt("hama.num.vertices", Integer.parseInt(args[0]));
conf.setInt("hama.num.partitions", Integer.parseInt(args[2]));
conf.setInt("number.edges", Integer.parseInt(args[1]));
Job job = new Job(conf);
Path generated = new Path(new Path(args[3]).getParent(), "generated");
FileOutputFormat.setOutputPath(job, generated);
FileSystem.get(conf).delete(generated, true);
job.setJobName("RangeWriter");
job.setJarByClass(SortGenMapper.class);
job.setMapperClass(SortGenMapper.class);
job.setNumReduceTasks(0);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
job.setInputFormatClass(RangeInputFormat.class);
job.waitForCompletion(true);
conf.setInt("max.id", Integer.valueOf(args[0]));
job = new Job(conf);
FileOutputFormat.setOutputPath(job, new Path(args[3]));
FileSystem.get(conf).delete(new Path(args[3]), true);
job.setJobName("Random Vertex Writer");
FileInputFormat.addInputPath(job, generated);
job.setJarByClass(RandomMapper.class);
job.setMapperClass(RandomMapper.class);
job.setReducerClass(Reducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setNumReduceTasks(conf.getInt("hama.num.partitions", 2));
job.setPartitionerClass(HashPartitioner.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.waitForCompletion(true);
}