本文整理汇总了Java中org.apache.spark.Partitioner类的典型用法代码示例。如果您正苦于以下问题:Java Partitioner类的具体用法?Java Partitioner怎么用?Java Partitioner使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Partitioner类属于org.apache.spark包,在下文中一共展示了Partitioner类的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createPartitionPruningRDD
import org.apache.spark.Partitioner; //导入依赖的package包/类
/**
* Wraps the input RDD into a PartitionPruningRDD, which acts as a filter
* of required partitions. The distinct set of required partitions is determined
* via the partitioner of the input RDD.
*
* @param in input matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
* @param filter partition filter
* @return matrix as {@code JavaPairRDD<MatrixIndexes,MatrixBlock>}
*/
private static JavaPairRDD<MatrixIndexes,MatrixBlock> createPartitionPruningRDD(
JavaPairRDD<MatrixIndexes,MatrixBlock> in, List<MatrixIndexes> filter )
{
//build hashset of required partition ids
HashSet<Integer> flags = new HashSet<>();
Partitioner partitioner = in.rdd().partitioner().get();
for( MatrixIndexes key : filter )
flags.add(partitioner.getPartition(key));
//create partition pruning rdd
Function1<Object,Object> f = new PartitionPruningFunction(flags);
PartitionPruningRDD<Tuple2<MatrixIndexes, MatrixBlock>> ppRDD =
PartitionPruningRDD.create(in.rdd(), f);
//wrap output into java pair rdd
return new JavaPairRDD<>(ppRDD,
ClassManifestFactory.fromClass(MatrixIndexes.class),
ClassManifestFactory.fromClass(MatrixBlock.class));
}
示例2: getPartitioner
import org.apache.spark.Partitioner; //导入依赖的package包/类
private Partitioner getPartitioner(JavaPairRDD<Row, Row> keyedArriving) {
if (hasPartitioner()) {
Config partitionerConfig = config.getConfig("partitioner");
return PartitionerFactory.create(partitionerConfig, keyedArriving);
}
else {
return new HashPartitioner(keyedArriving.getNumPartitions());
}
}
示例3: testHash
import org.apache.spark.Partitioner; //导入依赖的package包/类
@Test
public void testHash() throws Exception {
Map<String, Object> configMap = Maps.newHashMap();
configMap.put("type", "hash");
JavaPairRDD<Row, Row> base = getDummyRDD(10);
Config config = ConfigFactory.parseMap(configMap);
Partitioner p = PartitionerFactory.create(config, base);
assertTrue(p instanceof HashPartitioner);
assertEquals(p.numPartitions(), 10);
}
示例4: testRange
import org.apache.spark.Partitioner; //导入依赖的package包/类
@Test
public void testRange() throws Exception {
Map<String, Object> configMap = Maps.newHashMap();
configMap.put("type", "range");
JavaPairRDD<Row, Row> base = getDummyRDD(10);
Config config = ConfigFactory.parseMap(configMap);
Partitioner p = PartitionerFactory.create(config, base);
assertTrue(p instanceof RangePartitioner);
assertEquals(p.numPartitions(), 10);
}
示例5: testUUID
import org.apache.spark.Partitioner; //导入依赖的package包/类
@Test
public void testUUID() throws Exception {
Map<String, Object> configMap = Maps.newHashMap();
configMap.put("type", "uuid");
JavaPairRDD<Row, Row> base = getDummyRDD(10);
Config config = ConfigFactory.parseMap(configMap);
Partitioner p = PartitionerFactory.create(config, base);
assertTrue(p instanceof UUIDPartitioner);
assertEquals(p.numPartitions(), 10);
}
示例6: groupByKeyOnly
import org.apache.spark.Partitioner; //导入依赖的package包/类
/**
* An implementation of
* {@link org.apache.beam.runners.core.GroupByKeyViaGroupByKeyOnly.GroupByKeyOnly}
* for the Spark runner.
*/
public static <K, V> JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<V>>>>> groupByKeyOnly(
JavaRDD<WindowedValue<KV<K, V>>> rdd,
Coder<K> keyCoder,
WindowedValueCoder<V> wvCoder) {
// we use coders to convert objects in the PCollection to byte arrays, so they
// can be transferred over the network for the shuffle.
JavaPairRDD<ByteArray, byte[]> pairRDD =
rdd
.map(new ReifyTimestampsAndWindowsFunction<K, V>())
.map(WindowingHelpers.<KV<K, WindowedValue<V>>>unwindowFunction())
.mapToPair(TranslationUtils.<K, WindowedValue<V>>toPairFunction())
.mapToPair(CoderHelpers.toByteFunction(keyCoder, wvCoder));
// use a default parallelism HashPartitioner.
Partitioner partitioner = new HashPartitioner(rdd.rdd().sparkContext().defaultParallelism());
// using mapPartitions allows to preserve the partitioner
// and avoid unnecessary shuffle downstream.
return pairRDD
.groupByKey(partitioner)
.mapPartitionsToPair(
TranslationUtils.pairFunctionToPairFlatMapFunction(
CoderHelpers.fromByteFunctionIterable(keyCoder, wvCoder)),
true)
.mapPartitions(
TranslationUtils.<K, Iterable<WindowedValue<V>>>fromPairFlatMapFunction(), true)
.mapPartitions(
TranslationUtils.functionToFlatMapFunction(
WindowingHelpers.<KV<K, Iterable<WindowedValue<V>>>>windowFunction()),
true);
}
示例7: partitionForAccumulo
import org.apache.spark.Partitioner; //导入依赖的package包/类
private JavaPairRDD<RowColumn, Bytes> partitionForAccumulo(JavaPairRDD<RowColumn, Bytes> data,
String accumuloTable, BulkImportOptions opts) {
// partition and sort data so that one file is created per an accumulo tablet
Partitioner accumuloPartitioner;
try {
accumuloPartitioner = new AccumuloRangePartitioner(
chooseConnector(opts).tableOperations().listSplits(accumuloTable));
} catch (TableNotFoundException | AccumuloSecurityException | AccumuloException e) {
throw new IllegalStateException(e);
}
return data.repartitionAndSortWithinPartitions(accumuloPartitioner);
}
示例8: partitioner
import org.apache.spark.Partitioner; //导入依赖的package包/类
@Override
public Option<Partitioner> partitioner() {
// setting the partitioner helps to "keep" the same partitioner in the following
// mapWithState read for Read.Unbounded, preventing a post-mapWithState shuffle.
return scala.Some.apply(partitioner);
}