Java Partitioner类代码示例

本文整理汇总了Java中org.apache.hadoop.mapreduce.Partitioner类的典型用法代码示例。如果您正苦于以下问题：Java Partitioner类的具体用法？Java Partitioner怎么用？Java Partitioner使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

Partitioner类属于org.apache.hadoop.mapreduce包，在下文中一共展示了Partitioner类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: initMapperJob

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * Initializes the Mapper, and sets input parameters for the job. All of 
 * the records in the dataStore are used as the input. If you want to 
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 * @param job the job to set the properties for
 * @param dataStoreClass the datastore class
 * @param inKeyClass Map input key class
 * @param inValueClass Map input value class
 * @param outKeyClass Map output key class
 * @param outValueClass Map output value class
 * @param mapperClass the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects whether to reuse objects in serialization
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}

开发者ID:jianglibo，项目名称:gora-boot，代码行数:39，代码来源:GoraMapper.java

示例2: getTeraSortTotalOrderPartitioner

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * Extracts package-private TeraSort total order partitioner class.
 *
 * @return The class.
 */
@SuppressWarnings("unchecked")
private Class<? extends Partitioner> getTeraSortTotalOrderPartitioner() {
    Class[] classes = TeraSort.class.getDeclaredClasses();

    Class<? extends Partitioner> totalOrderPartitionerCls = null;

    for (Class<?> x: classes) {
        if ("TotalOrderPartitioner".equals(x.getSimpleName())) {
            totalOrderPartitionerCls = (Class<? extends Partitioner>)x;

            break;
        }
    }

    if (totalOrderPartitionerCls == null)
        throw new IllegalStateException("Failed to find TeraSort total order partitioner class.");

    return totalOrderPartitionerCls;
}

开发者ID:apache，项目名称:ignite，代码行数:25，代码来源:HadoopTeraSortTest.java

示例3: run

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
@Override
public int run(String[] args) throws Exception {
	if (args.length != 2) {
		JobBuilder.printUsage(this, "<path> <key>");
		return -1;
	}
	Path path = new Path(args[0]);
	IntWritable key = new IntWritable(Integer.parseInt(args[1]));

	Reader[] readers = MapFileOutputFormat.getReaders(path, getConf());
	Partitioner<IntWritable, Text> partitioner = new HashPartitioner<IntWritable, Text>();
	Text val = new Text();
	Writable entry = MapFileOutputFormat.getEntry(readers, partitioner,
			key, val);
	if (entry == null) {
		System.err.println("Key not found: " + key);
		return -1;
	}
	NcdcRecordParser parser = new NcdcRecordParser();
	parser.parse(val.toString());
	System.out.printf("%s\t%s\n", parser.getStationId(), parser.getYear());
	return 0;
}

开发者ID:lhfei，项目名称:hadoop-in-action，代码行数:24，代码来源:LookupRecordByTemperature.java

示例4: initMapperJob

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * Initializes the Mapper, and sets input parameters for the job. All of
 * the records in the dataStore are used as the input. If you want to
 * include a specific subset, use one of the overloaded methods which takes
 * query parameter.
 *
 * @param job              the job to set the properties for
 * @param dataStoreClass   the datastore class
 * @param inKeyClass       Map input key class
 * @param inValueClass     Map input value class
 * @param outKeyClass      Map output key class
 * @param outValueClass    Map output value class
 * @param mapperClass      the mapper class extending GoraMapper
 * @param partitionerClass optional partitioner class
 * @param reuseObjects     whether to reuse objects in serialization
 * @param <K1> Map input key class
 * @param <V1> Map input value class
 * @param <K2> Map output key class
 * @param <V2> Map output value class
 * @throws IOException if there is an error initializing the Map job
 */
@SuppressWarnings("rawtypes")
public static <K1, V1 extends Persistent, K2, V2> void initMapperJob(
    Job job,
    Class<? extends DataStore<K1,V1>> dataStoreClass,
    Class<K1> inKeyClass, 
    Class<V1> inValueClass,
    Class<K2> outKeyClass, 
    Class<V2> outValueClass,
    Class<? extends GoraMapper> mapperClass,
    Class<? extends Partitioner> partitionerClass, 
    boolean reuseObjects) throws IOException {
  
  //set the input via GoraInputFormat
  GoraInputFormat.setInput(job, dataStoreClass, inKeyClass, inValueClass, reuseObjects);

  job.setMapperClass(mapperClass);
  job.setMapOutputKeyClass(outKeyClass);
  job.setMapOutputValueClass(outValueClass);

  if (partitionerClass != null) {
    job.setPartitionerClass(partitionerClass);
  }
}

开发者ID:apache，项目名称:gora，代码行数:45，代码来源:GoraMapper.java

示例5: assertData

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private void assertData(int totalShardCount) throws IOException {
  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  for (int i = 0; i < totalShardCount; i++) {
    HdfsDirectory directory = new HdfsDirectory(configuration, new Path(path, ShardUtil.getShardName(i)));
    DirectoryReader reader = DirectoryReader.open(directory);
    int numDocs = reader.numDocs();
    for (int d = 0; d < numDocs; d++) {
      Document document = reader.document(d);
      IndexableField field = document.getField("id");
      Integer id = (Integer) field.numericValue();
      int partition = partitioner.getPartition(new IntWritable(id), null, totalShardCount);
      assertEquals(i, partition);
    }
    reader.close();
  }
}

开发者ID:apache，项目名称:incubator-blur，代码行数:17，代码来源:TableShardCountCollapserTest.java

示例6: createShard

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}

开发者ID:apache，项目名称:incubator-blur，代码行数:17，代码来源:TableShardCountCollapserTest.java

示例7: groupingOptions

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
protected final GroupingOptions groupingOptions(
    Class<? extends Partitioner> partitionerClass,
    Class<? extends RawComparator<?>> groupingComparator,
    Class<? extends RawComparator<?>> sortComparator) {
  GroupingOptions.Builder b = GroupingOptions.builder()
      .partitionerClass(partitionerClass)
      .numReducers(getNumReducers());

  if (groupingComparator != null) {
    b.groupingComparatorClass(groupingComparator);
  }
  if (sortComparator != null) {
    b.sortComparatorClass(sortComparator);
  }
  return b.build();
}

开发者ID:apsaltis，项目名称:oryx，代码行数:17，代码来源:JobStep.java

示例8: getEntry

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int part = partitioner.getPartition(key, value, readers.length);
  return readers[part].get(key, value);
}

开发者ID:naver，项目名称:hadoop，代码行数:8，代码来源:MapFileOutputFormat.java

示例9: getPartitionerClass

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * Get the {@link Partitioner} class for the job.
 * 
 * @return the {@link Partitioner} class for the job.
 */
@SuppressWarnings("unchecked")
public Class<? extends Partitioner<?,?>> getPartitionerClass() 
   throws ClassNotFoundException {
  return (Class<? extends Partitioner<?,?>>) 
    conf.getClass(PARTITIONER_CLASS_ATTR, HashPartitioner.class);
}

开发者ID:naver，项目名称:hadoop，代码行数:12，代码来源:JobContextImpl.java

示例10: getEntry

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/** Get an entry from output generated by this class. */
public static <K extends WritableComparable<?>, V extends Writable>
    Writable getEntry(MapFile.Reader[] readers, 
    Partitioner<K, V> partitioner, K key, V value) throws IOException {
  int readerLength = readers.length;
  int part;
  if (readerLength <= 1) {
    part = 0;
  } else {
    part = partitioner.getPartition(key, value, readers.length);
  }
  return readers[part].get(key, value);
}

开发者ID:aliyun-beta，项目名称:aliyun-oss-hadoop-fs，代码行数:14，代码来源:MapFileOutputFormat.java

示例11: createJob

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
public Job createJob(
        Configuration configuration,
        int numberOfNodes,
        long currentGenerationNumber,
        String generationNameFormat,
        Path currentGenerationsBlockReportsFolderPath,
        Schema individualWrapperSchema
) throws IOException {
    // Creates a job.
    Job job = super.createJob(configuration, numberOfNodes, currentGenerationNumber, currentGenerationNumber,
            (currentGenerationNumber - 1L), currentGenerationNumber, generationNameFormat,
            currentGenerationsBlockReportsFolderPath, individualWrapperSchema,
            GlobalMapper.class, Partitioner.class, Reducer.class);

    // Sets the input.
    NodesInputFormat.setInputPopulationFolderPath(job, this.getInputFolderPath());
    NodesInputFormat.activateInitialisation(job, false);

    // Configures the fitness value class.
    job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_VALUE_CLASS, this.fitnessValueClass,
            FitnessValue.class);

    // Configures the Fitness Evaluation phase.
    job.getConfiguration().setClass(Constants.CONFIGURATION_FITNESS_EVALUATION_CLASS, this.fitnessEvaluationClass,
            FitnessEvaluation.class);

    // Disables the reducer.
    job.setNumReduceTasks(0);

    // Returns the job.
    return job;
}

开发者ID:pasqualesalza，项目名称:elephant56，代码行数:33，代码来源:GlobalDistributedDriver.java

示例12: getTotalOrderPartitionerClass

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * If > hadoop 0.20, then we want to use the hadoop TotalOrderPartitioner.
 * If 0.20, then we want to use the TOP that we have under hadoopbackport.
 * This method is about hbase being able to run on different versions of
 * hadoop.  In 0.20.x hadoops, we have to use the TOP that is bundled with
 * hbase.  Otherwise, we use the one in Hadoop.
 * @return Instance of the TotalOrderPartitioner class
 * @throws ClassNotFoundException If can't find a TotalOrderPartitioner.
 */
private static Class<? extends Partitioner> getTotalOrderPartitionerClass()
throws ClassNotFoundException {
  Class<? extends Partitioner> clazz = null;
  try {
    clazz = (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner");
  } catch (ClassNotFoundException e) {
    clazz =
      (Class<? extends Partitioner>) Class.forName("org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner");
  }
  return clazz;
}

开发者ID:fengchen8086，项目名称:LCIndex-HBase-0.94.16，代码行数:21，代码来源:HFileOutputFormat.java

示例13: setupAccumuloPartitionerWithGivenPartitioner

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
private void setupAccumuloPartitionerWithGivenPartitioner(final Class<? extends Partitioner> partitioner) throws IOException {
    // Given
    final JobConf localConf = createLocalConf();
    final FileSystem fs = FileSystem.getLocal(localConf);
    fs.mkdirs(new Path(outputDir));
    fs.mkdirs(new Path(splitsDir));
    try (final BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(fs.create(new Path(splitsFile), true)))) {
        writer.write("1");
    }

    final AccumuloAddElementsFromHdfsJobFactory factory = new AccumuloAddElementsFromHdfsJobFactory();
    final Job job = mock(Job.class);
    final AddElementsFromHdfs operation = new AddElementsFromHdfs.Builder()
            .outputPath(outputDir)
            .partitioner(partitioner)
            .useProvidedSplits(true)
            .splitsFilePath(splitsFile)
            .build();
    final AccumuloStore store = mock(AccumuloStore.class);
    given(job.getConfiguration()).willReturn(localConf);

    // When
    factory.setupJob(job, operation, TextMapperGeneratorImpl.class.getName(), store);

    // Then
    if (NoPartitioner.class.equals(partitioner)) {
        verify(job, never()).setNumReduceTasks(Mockito.anyInt());
        verify(job, never()).setPartitionerClass(Mockito.any(Class.class));
        assertNull(job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
    } else {
        verify(job).setNumReduceTasks(2);
        verify(job).setPartitionerClass(GafferKeyRangePartitioner.class);
        assertEquals(splitsFile, job.getConfiguration().get(GafferRangePartitioner.class.getName() + ".cutFile"));
    }
}

开发者ID:gchq，项目名称:Gaffer，代码行数:36，代码来源:AccumuloAddElementsFromHdfsJobFactoryTest.java

示例14: shouldJSONSerialiseAndDeserialise

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
@Test
public void shouldJSONSerialiseAndDeserialise() throws SerialisationException {
    // Given
    final Map<String, String> inputMapperPairs = new HashMap<>();
    inputMapperPairs.put("inputPath", MapperGenerator.class.getName());
    final AddElementsFromHdfs addElements = new AddElementsFromHdfs.Builder()
            .inputMapperPairs(inputMapperPairs)
            .outputPath("outputPath")
            .failurePath("failurePath")
            .jobInitialiser(new TextJobInitialiser())
            .partitioner(Partitioner.class)
            .mappers(5)
            .reducers(10)
            .splitsFilePath("/path/to/splits/file")
            .useProvidedSplits(false)
            .build();

    // When
    String json = new String(JSONSerialiser.serialise(addElements, true));

    // Then
    JsonAssert.assertEquals(String.format("{%n" +
            "  \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.AddElementsFromHdfs\",%n" +
            "  \"failurePath\" : \"failurePath\",%n" +
            "  \"validate\" : true,%n" +
            "  \"inputMapperPairs\" : { \"inputPath\" :\"uk.gov.gchq.gaffer.hdfs.operation.mapper.generator.MapperGenerator\"},%n" +
            "  \"outputPath\" : \"outputPath\",%n" +
            "  \"jobInitialiser\" : {%n" +
            "    \"class\" : \"uk.gov.gchq.gaffer.hdfs.operation.handler.job.initialiser.TextJobInitialiser\"%n" +
            "  },%n" +
            "  \"numMapTasks\" : 5,%n" +
            "  \"numReduceTasks\" : 10,%n" +
            "  \"splitsFilePath\" : \"/path/to/splits/file\",%n" +
            "  \"partitioner\" : \"org.apache.hadoop.mapreduce.Partitioner\"%n" +
            "}"), json);
}

开发者ID:gchq，项目名称:Gaffer，代码行数:37，代码来源:AddElementsFromHdfsTest.java

示例15: validate

import org.apache.hadoop.mapreduce.Partitioner; //导入依赖的package包/类
/**
 * Validates the first non-empty partition hfile has right partitioning function.
 * It reads several keys, then calculates the partition according to the partitioning function
 * client offering. If the calculated partition number is different with actual partition number
 * an exception is thrown. If all partition hfiles are empty, an exception is thrown.
 *
 * @param parts full absolute path for all partitions
 * @param partitionerType type of paritioning function
 * @param numShards total number of partitions
 * @throws IOException if something goes wrong when reading the hfiles
 * @throws IllegalArgumentException if the partitioner type is wrong or all partitions are empty
 */
public void validate(List<Path> parts, PartitionerType partitionerType, int numShards)
    throws IOException {
  boolean hasNonEmptyPartition = false;
  HColumnDescriptor columnDescriptor = new HColumnDescriptor();
  // Disable block cache to ensure it reads the actual file content.
  columnDescriptor.setBlockCacheEnabled(false);
  for (int shardIndex = 0; shardIndex < parts.size(); shardIndex++) {
    Path fileToBeValidated = parts.get(shardIndex);
    HFile.Reader reader = null;
    try {
      FileSystem fs = FileSystem.newInstance(fileToBeValidated.toUri(), conf);
      CacheConfig cc = new CacheConfig(conf, columnDescriptor);
      reader = HFile.createReader(fs, fileToBeValidated, cc);
      Partitioner partitioner = PartitionerFactory.getPartitioner(partitionerType);
      byte[] rowKey = reader.getFirstRowKey();
      if (rowKey == null) {
        LOG.warn(String.format("empty partition %s", fileToBeValidated.toString()));
        reader.close();
        continue;
      }
      hasNonEmptyPartition = true;
      BytesWritable key = new BytesWritable(rowKey);
      int partition = partitioner.getPartition(key, null,  numShards);
      if (partition != shardIndex) {
        throw new IllegalArgumentException(
            String.format("wrong partition type %s for key %s in partition %d, expected %d",
                partitionerType.toString(), new String(key.getBytes()), shardIndex, partition)
        );
      }
    } finally {
      if (reader != null) {
        reader.close();
      }
    }
  }
  if (!hasNonEmptyPartition) {
    throw new IllegalArgumentException("all partitions are empty");
  }
}

开发者ID:pinterest-attic，项目名称:terrapin，代码行数:52，代码来源:BaseUploader.java

注：本文中的org.apache.hadoop.mapreduce.Partitioner类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。