本文整理汇总了Java中org.apache.hadoop.mapreduce.Partitioner类的典型用法代码示例。如果您正苦于以下问题:Java Partitioner类的具体用法?Java Partitioner怎么用?Java Partitioner使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Partitioner类属于org.apache.hadoop.mapreduce包,在下文中一共展示了Partitioner类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: initMapperJob
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* Initializes the Mapper, and sets input parameters for the job. All of
* the records in the dataStore are used as the input. If you want to
* include a specific subset, use one of the overloaded methods which takes
* query parameter.
* @param job the job to set the properties for
* @param dataStoreClass the datastore class
* @param inKeyClass Map input key class
* @param inValueClass Map input value class
* @param outKeyClass Map output key class
* @param outValueClass Map output value class
* @param mapperClass the mapper class extending GoraMapper
* @param partitionerClass optional partitioner class
* @param reuseObjects whether to reuse objects in serialization
*/
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
Job job,
Class<? extends DataStore<K1,V1>> dataStoreClass,
Class<K1> inKeyClass,
Class<V1> inValueClass,
Class<K2> outKeyClass,
Class<V2> outValueClass,
Class<? extends GoraMapper> mapperClass,
Class<? extends Partitioner> partitionerClass,
boolean reuseObjects) throws IOException {
//set the input via GoraInputFormat
GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);
job.setMapperClass(mapperClass);
job.setMapOutputKeyClass(outKeyClass);
job.setMapOutputValueClass(outValueClass);
if (partitionerClass != null) {
job.setPartitionerClass(partitionerClass);
}
}
示例2: getTeraSortTotalOrderPartitioner
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* Extracts package-private TeraSort total order partitioner class.
*
* @return The class.
*/
@SuppressWarnings("unchecked")
private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() {
Class[] classes = TeraSort.class.getDeclaredClasses();
Class<? extends Partitioner> totalOrderPartitionerCls = null;
for (Class<?> x: classes) {
if ("TotalOrderPartitioner".equals(x.getSimpleName())) {
totalOrderPartitionerCls = (Class<? extends Partitioner>)x;
break;
}
}
if (totalOrderPartitionerCls == null)
throw new IllegalStateException("Failed to find TeraSort total order partitioner class.");
return totalOrderPartitionerCls;
}
示例3: run
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
JobBuilder.printUsage(this, "<path> <key>");
return -1;
}
Path path = new Path(args[0]);
IntWritable key = new IntWritable(Integer.parseInt(args[1]));
Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
Text val = new Text();
Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
key, val);
if (entry == null) {
System.err.println("Key not found: " + key);
return -1;
}
NcdcRecordParser parser = new NcdcRecordParser();
parser.parse(val.toString());
System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
return 0;
}
示例4: initMapperJob
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* Initializes the Mapper, and sets input parameters for the job. All of
* the records in the dataStore are used as the input. If you want to
* include a specific subset, use one of the overloaded methods which takes
* query parameter.
*
* @param job the job to set the properties for
* @param dataStoreClass the datastore class
* @param inKeyClass Map input key class
* @param inValueClass Map input value class
* @param outKeyClass Map output key class
* @param outValueClass Map output value class
* @param mapperClass the mapper class extending GoraMapper
* @param partitionerClass optional partitioner class
* @param reuseObjects whether to reuse objects in serialization
* @param <K1> Map input key class
* @param <V1> Map input value class
* @param <K2> Map output key class
* @param <V2> Map output value class
* @throws IOException if there is an error initializing the Map job
*/
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
Job job,
Class<? extends DataStore<K1,V1>> dataStoreClass,
Class<K1> inKeyClass,
Class<V1> inValueClass,
Class<K2> outKeyClass,
Class<V2> outValueClass,
Class<? extends GoraMapper> mapperClass,
Class<? extends Partitioner> partitionerClass,
boolean reuseObjects) throws IOException {
//set the input via GoraInputFormat
GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);
job.setMapperClass(mapperClass);
job.setMapOutputKeyClass(outKeyClass);
job.setMapOutputValueClass(outValueClass);
if (partitionerClass != null) {
job.setPartitionerClass(partitionerClass);
}
}
示例5: assertData
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private void assertData(int totalShardCount) throws IOException {
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
for (int i = 0; i < totalShardCount; i++) {
HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
DirectoryReader reader = DirectoryReader.open(directory);
int numDocs = reader.numDocs();
for (int d = 0; d < numDocs; d++) {
Document document = reader.document(d);
IndexableField field = document.getField("id");
Integer id = (Integer) field.numericValue();
int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
assertEquals(i, partition);
}
reader.close();
}
}
示例6: createShard
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
throws IOException {
HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
mergePolicy.setUseCompoundFile(false);
IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);
Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
assertEquals(i, partition);
Document doc = getDoc(i);
indexWriter.addDocument(doc);
indexWriter.close();
}
示例7: groupingOptions
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
protected final GroupingOptions groupingOptions(
Class<? extends Partitioner> partitionerClass,
Class<? extends RawComparator<?>> groupingComparator,
Class<? extends RawComparator<?>> sortComparator) {
GroupingOptions.Builder b = GroupingOptions.builder()
.partitionerClass(partitionerClass)
.numReducers(getNumReducers());
if (groupingComparator != null) {
b.groupingComparatorClass(groupingComparator);
}
if (sortComparator != null) {
b.sortComparatorClass(sortComparator);
}
return b.build();
}
示例8: getEntry
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
Partitioner<K, V> partitioner, K key, V value) throws IOException {
int part = partitioner.getPartition(key, value, readers.length);
return readers[part].get(key, value);
}
示例9: getPartitionerClass
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* Get the {@link Partitioner} class for the job.
*
* @return the {@link Partitioner} class for the job.
*/
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass()
throws ClassNotFoundException {
return (Class<? extends Partitioner<?,?>>)
conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}
示例10: getEntry
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
Writable getEntry(MapFile.Reader[] readers,
Partitioner<K, V> partitioner, K key, V value) throws IOException {
int readerLength = readers.length;
int part;
if (readerLength <= 1) {
part = 0;
} else {
part = partitioner.getPartition(key, value, readers.length);
}
return readers[part].get(key, value);
}
示例11: createJob
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
public Job createJob(
Configuration configuration,
int numberOfNodes,
long currentGenerationNumber,
String generationNameFormat,
Path currentGenerationsBlockReportsFolderPath,
Schema individualWrapperSchema
) throws IOException {
// Creates a job.
Job job = super.createJob(configuration, numberOfNodes, currentGenerationNumber, currentGenerationNumber,
(currentGenerationNumber - 1L), currentGenerationNumber, generationNameFormat,
currentGenerationsBlockReportsFolderPath, individualWrapperSchema,
GlobalMapper.class, Partitioner.class, Reducer.class);
// Sets the input.
NodesInputFormat.setInputPopulationFolderPath(job, this.getInputFolderPath());
NodesInputFormat.activateInitialisation(job, false);
// Configures the fitness value class.
job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_VALUE_CLASS, this.fitnessValueClass,
FitnessValue.class);
// Configures the Fitness Evaluation phase.
job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_EVALUATION_CLASS, this.fitnessEvaluationClass,
FitnessEvaluation.class);
// Disables the reducer.
job.setNumReduceTasks(0);
// Returns the job.
return job;
}
示例12: getTotalOrderPartitionerClass
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* If > hadoop 0.20, then we want to use the hadoop TotalOrderPartitioner.
* If 0.20, then we want to use the TOP that we have under hadoopbackport.
* This method is about hbase being able to run on different versions of
* hadoop. In 0.20.x hadoops, we have to use the TOP that is bundled with
* hbase. Otherwise, we use the one in Hadoop.
* @return Instance of the TotalOrderPartitioner class
* @throws ClassNotFoundException If can't find a TotalOrderPartitioner.
*/
private static Class<? extends Partitioner> getTotalOrderPartitionerClass()
throws ClassNotFoundException {
Class<? extends Partitioner> clazz = null;
try {
clazz = (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner");
} catch (ClassNotFoundException e) {
clazz =
(Class<? extends Partitioner>) Class.forName("org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner");
}
return clazz;
}
示例13: setupAccumuloPartitionerWithGivenPartitioner
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private void setupAccumuloPartitionerWithGivenPartitioner(final Class<? extends Partitioner> partitioner) throws IOException {
// Given
final JobConf localConf = createLocalConf();
final FileSystem fs = FileSystem.getLocal(localConf);
fs.mkdirs(new Path(outputDir));
fs.mkdirs(new Path(splitsDir));
try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
writer.write("1");
}
final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
final Job job = mock(Job.class);
final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
.outputPath(outputDir)
.partitioner(partitioner)
.useProvidedSplits(true)
.splitsFilePath(splitsFile)
.build();
final AccumuloStore store = mock(AccumuloStore.class);
given(job.getConfiguration()).willReturn(localConf);
// When
factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);
// Then
if (NoPartitioner.class.equals(partitioner)) {
verify(job, never()).setNumReduceTasks(Mockito.anyInt());
verify(job, never()).setPartitionerClass(Mockito.any(Class.class));
assertNull(job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
} else {
verify(job).setNumReduceTasks(2);
verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
}
}
示例14: shouldJSONSerialiseAndDeserialise
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
@Test
public void shouldJSONSerialiseAndDeserialise() throws SerialisationException {
// Given
final Map<String, String> inputMapperPairs = new HashMap<>();
inputMapperPairs.put("inputPath", MapperGenerator.class.getName());
final AddElementsFromHdfs addElements = new AddElementsFromHdfs.Builder()
.inputMapperPairs(inputMapperPairs)
.outputPath("outputPath")
.failurePath("failurePath")
.jobInitialiser(new TextJobInitialiser())
.partitioner(Partitioner.class)
.mappers(5)
.reducers(10)
.splitsFilePath("/path/to/splits/file")
.useProvidedSplits(false)
.build();
// When
String json = new String(JSONSerialiser.serialise(addElements, true));
// Then
JsonAssert.assertEquals(String.format("{%n" +
" \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs\",%n" +
" \"failurePath\" : \"failurePath\",%n" +
" \"validate\" : true,%n" +
" \"inputMapperPairs\" : { \"inputPath\" :\"uk.gov.gchq.gaffer.hdfs.operation.mapper.generator.MapperGenerator\"},%n" +
" \"outputPath\" : \"outputPath\",%n" +
" \"jobInitialiser\" : {%n" +
" \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser\"%n" +
" },%n" +
" \"numMapTasks\" : 5,%n" +
" \"numReduceTasks\" : 10,%n" +
" \"splitsFilePath\" : \"/path/to/splits/file\",%n" +
" \"partitioner\" : \"org.apache.hadoop.mapreduce.Partitioner\"%n" +
"}"), json);
}
示例15: validate
import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
* Validates the first non-empty partition hfile has right partitioning function.
* It reads several keys, then calculates the partition according to the partitioning function
* client offering. If the calculated partition number is different with actual partition number
* an exception is thrown. If all partition hfiles are empty, an exception is thrown.
*
* @param parts full absolute path for all partitions
* @param partitionerType type of paritioning function
* @param numShards total number of partitions
* @throws IOException if something goes wrong when reading the hfiles
* @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
*/
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards)
throws IOException {
boolean hasNonEmptyPartition = false;
HColumnDescriptor columnDescriptor = new HColumnDescriptor();
// Disable block cache to ensure it reads the actual file content.
columnDescriptor.setBlockCacheEnabled(false);
for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
Path fileToBeValidated = parts.get(shardIndex);
HFile.Reader reader = null;
try {
FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
CacheConfig cc = new CacheConfig(conf, columnDescriptor);
reader = HFile.createReader(fs, fileToBeValidated, cc);
Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
byte[] rowKey = reader.getFirstRowKey();
if (rowKey == null) {
LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
reader.close();
continue;
}
hasNonEmptyPartition = true;
BytesWritable key = new BytesWritable(rowKey);
int partition = partitioner.getPartition(key, null, numShards);
if (partition != shardIndex) {
throw new IllegalArgumentException(
String.format("wrong partition type %s for key %s in partition %d, expected %d",
partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)
);
}
} finally {
if (reader != null) {
reader.close();
}
}
}
if (!hasNonEmptyPartition) {
throw new IllegalArgumentException("all partitions are empty");
}
}