本文整理汇总了Java中org.apache.spark.api.java.function.PairFunction类的典型用法代码示例。如果您正苦于以下问题:Java PairFunction类的具体用法?Java PairFunction怎么用?Java PairFunction使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
PairFunction类属于org.apache.spark.api.java.function包,在下文中一共展示了PairFunction类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
public void run() throws IOException {
SparkConf conf = new SparkConf();
conf.setAppName(getAppName());
conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
JavaSparkUtil.packProjectJars(conf);
setupSparkConf(conf);
JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
List<JavaDStream<T>> streamsList = getStreamsList(ssc);
// Union all the streams if there is more than 1 stream
JavaDStream<T> streams = unionStreams(ssc, streamsList);
JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
public Tuple2<String, RowMutation> call(T t) {
RowMutation rowMutation = convert(t);
return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
}
});
pairDStream.foreachRDD(getFunction());
ssc.start();
ssc.awaitTermination();
}
示例2: tokenizeData
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
public JavaPairRDD<String, List<String>> tokenizeData(JavaPairRDD<String, String> datasetsContentRDD, String splitter) throws Exception {
return datasetsContentRDD.mapToPair(new PairFunction<Tuple2<String, String>, String, List<String>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, List<String>> call(Tuple2<String, String> arg) throws Exception {
String content = arg._2;
List<String> tokens = getTokens(content, splitter);
return new Tuple2<>(arg._1, tokens);
}
});
}
示例3: main
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
if (args.length < 2) {
System.err.println("Usage: JavaTeraSort <HDFS_INPUT> <HDFS_OUTPUT>");
System.exit(1);
}
SparkConf sparkConf = new SparkConf().setAppName("JavaTeraSort");
JavaSparkContext ctx = new JavaSparkContext(sparkConf);
JavaRDD<String> lines = ctx.textFile(args[0], 1);
Integer parallel = sparkConf.getInt("spark.default.parallelism", ctx.defaultParallelism());
Integer reducer = Integer.parseInt(IOCommon.getProperty("hibench.default.shuffle.parallelism").get());
JavaPairRDD<String, String> words = lines.mapToPair(new PairFunction<String, String, String>() {
@Override
public Tuple2<String, String> call(String s) throws Exception {
return new Tuple2<String, String>(s.substring(0, 10), s.substring(10));
}
});
JavaPairRDD<String, String> sorted = words.sortByKey(true, reducer);
JavaRDD<String> result = sorted.map(new Function<Tuple2<String, String>, String>() {
@Override
public String call(Tuple2<String, String> e) throws Exception {
return e._1() + e._2();
}
});
result.saveAsTextFile(args[1]);
ctx.stop();
}
示例4: parallizeData
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
private JavaPairRDD<String, String> parallizeData(SparkDriver spark, List<Tuple2<String, String>> datasetContent) {
JavaRDD<Tuple2<String, String>> datasetContentRDD = spark.sc.parallelize(datasetContent);
return datasetContentRDD.mapToPair(new PairFunction<Tuple2<String, String>, String, String>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, String> call(Tuple2<String, String> term) throws Exception {
return term;
}
});
}
示例5: addNewElement
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
private void addNewElement(JavaPairRDD newPair, JavaPairRDD timeStamp) {
item2ReadCount = item2ReadCount
.union(newPair)
.coalesce(numPartitions, false)
.reduceByKey((v1, v2) -> (Long) v1 + (Long) v2, numPartitions)
.mapToPair((PairFunction<Tuple2<Long, Long>, Long, Long>) Tuple2::swap)
.sortByKey(false, numPartitions)
.mapToPair((PairFunction<Tuple2<Long, Long>, Long, Long>) Tuple2::swap);
item2timeStampData = item2timeStampData
.union(timeStamp)
.coalesce(numPartitions, false)
.reduceByKey(replaceValues)
.mapToPair((PairFunction<Tuple2<Long, Long>, Long, Long>) Tuple2::swap)
.sortByKey(true, numPartitions)
.mapToPair((PairFunction<Tuple2<Long, Long>, Long, Long>) Tuple2::swap);
}
示例6: pairFunctionToPairFlatMapFunction
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
/**
* A utility method that adapts {@link PairFunction} to a {@link PairFlatMapFunction} with an
* {@link Iterator} input. This is particularly useful because it allows to use functions written
* for mapToPair functions in flatmapToPair functions.
*
* @param pairFunction the {@link PairFunction} to adapt.
* @param <T> the input type.
* @param <K> the output key type.
* @param <V> the output value type.
* @return a {@link PairFlatMapFunction} that accepts an {@link Iterator} as an input and applies
* the {@link PairFunction} on every element.
*/
public static <T, K, V> PairFlatMapFunction<Iterator<T>, K, V> pairFunctionToPairFlatMapFunction(
final PairFunction<T, K, V> pairFunction) {
return new PairFlatMapFunction<Iterator<T>, K, V>() {
@Override
public Iterator<Tuple2<K, V>> call(Iterator<T> itr) throws Exception {
final Iterator<Tuple2<K, V>> outputItr =
Iterators.transform(
itr,
new com.google.common.base.Function<T, Tuple2<K, V>>() {
@Override
public Tuple2<K, V> apply(T t) {
try {
return pairFunction.call(t);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
});
return outputItr;
}
};
}
示例7: fromByteFunctionIterable
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
/**
* A function wrapper for converting a byte array pair to a key-value pair, where
* values are {@link Iterable}.
*
* @param keyCoder Coder to deserialize keys.
* @param valueCoder Coder to deserialize values.
* @param <K> The type of the key being deserialized.
* @param <V> The type of the value being deserialized.
* @return A function that accepts a pair of byte arrays and returns a key-value pair.
*/
public static <K, V> PairFunction<Tuple2<ByteArray, Iterable<byte[]>>, K, Iterable<V>>
fromByteFunctionIterable(final Coder<K> keyCoder, final Coder<V> valueCoder) {
return new PairFunction<Tuple2<ByteArray, Iterable<byte[]>>, K, Iterable<V>>() {
@Override
public Tuple2<K, Iterable<V>> call(Tuple2<ByteArray, Iterable<byte[]>> tuple) {
return new Tuple2<>(fromByteArray(tuple._1().getValue(), keyCoder),
Iterables.transform(tuple._2(), new com.google.common.base.Function<byte[], V>() {
@Override
public V apply(byte[] bytes) {
return fromByteArray(bytes, valueCoder);
}
}));
}
};
}
示例8: publishToNats
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
protected void publishToNats(final String subject1, final String subject2, final int partitionsNb) {
final JavaDStream<String> lines = ssc.textFileStream(tempDir.getAbsolutePath()).repartition(partitionsNb);
JavaPairDStream<String, String> stream1 =
lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject1, str);
});
JavaPairDStream<String, String> stream2 =
lines.mapToPair((PairFunction<String, String, String>) str -> {
return new Tuple2<String, String>(subject2, str);
});
final JavaPairDStream<String, String> stream = stream1.union(stream2);
if (logger.isDebugEnabled()) {
stream.print();
}
SparkToNatsConnectorPool
.newPool()
.withNatsURL(NATS_SERVER_URL)
.withConnectionTimeout(Duration.ofSeconds(2))
.publishToNatsAsKeyValue(stream);
}
开发者ID:Logimethods,项目名称:nats-connector-spark,代码行数:24,代码来源:KeyValueSparkToStandardNatsConnectorLifecycleTest.java
示例9: writeText
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
private static <T> TransformEvaluator<TextIO.Write.Bound<T>> writeText() {
return new TransformEvaluator<TextIO.Write.Bound<T>>() {
@Override
public void evaluate(TextIO.Write.Bound<T> transform, EvaluationContext context) {
@SuppressWarnings("unchecked")
JavaPairRDD<T, Void> last =
((JavaRDDLike<WindowedValue<T>, ?>) context.getInputRDD(transform))
.map(WindowingHelpers.<T>unwindowFunction())
.mapToPair(new PairFunction<T, T,
Void>() {
@Override
public Tuple2<T, Void> call(T t) throws Exception {
return new Tuple2<>(t, null);
}
});
ShardTemplateInformation shardTemplateInfo =
new ShardTemplateInformation(transform.getNumShards(),
transform.getShardTemplate(), transform.getFilenamePrefix(),
transform.getFilenameSuffix());
writeHadoopFile(last, new Configuration(), shardTemplateInfo, Text.class,
NullWritable.class, TemplatedTextOutputFormat.class);
}
};
}
示例10: fromByteFunctionIterable
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
/**
* A function wrapper for converting a byte array pair to a key-value pair, where
* values are {@link Iterable}.
*
* @param keyCoder Coder to deserialize keys.
* @param valueCoder Coder to deserialize values.
* @param <K> The type of the key being deserialized.
* @param <V> The type of the value being deserialized.
* @return A function that accepts a pair of byte arrays and returns a key-value pair.
*/
static <K, V> PairFunction<Tuple2<ByteArray, Iterable<byte[]>>, K, Iterable<V>>
fromByteFunctionIterable(final Coder<K> keyCoder, final Coder<V> valueCoder) {
return new PairFunction<Tuple2<ByteArray, Iterable<byte[]>>, K, Iterable<V>>() {
@Override
public Tuple2<K, Iterable<V>> call(Tuple2<ByteArray, Iterable<byte[]>> tuple) {
return new Tuple2<>(fromByteArray(tuple._1().getValue(), keyCoder),
Iterables.transform(tuple._2(), new com.google.common.base.Function<byte[], V>() {
@Override
public V apply(byte[] bytes) {
return fromByteArray(bytes, valueCoder);
}
}));
}
};
}
示例11: execute
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
@Override
public SortedCounts<String> execute(final JavaSparkContext spark) {
final JavaRDD<String> textFile = spark.textFile(inputFile);
final JavaRDD<String> words = textFile.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterable<String> call(final String rawJSON) throws TwitterException {
final Status tweet = TwitterObjectFactory.createStatus(rawJSON);
String text = tweet.getText();
return Arrays.asList(text.split(" "));
}
});
final JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(final String s) {
return new Tuple2<String, Integer>(s.toLowerCase(), 1);
}
});
final JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(final Integer a, final Integer b) {
return a + b;
}
});
return SortedCounts.create(counts);
}
示例12: spatialPartitioningWithoutDuplicates
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
/**
* Spatial partitioning without duplicates.
*
* @return true, if successful
* @throws Exception the exception
*/
private boolean spatialPartitioningWithoutDuplicates() throws Exception
{
this.distributedRasterColorMatrix = this.distributedRasterColorMatrix.mapToPair(new PairFunction<Tuple2<Pixel, Integer>, Pixel, Integer>() {
@Override
public Tuple2<Pixel, Integer> call(Tuple2<Pixel, Integer> pixelDoubleTuple2) throws Exception {
Pixel newPixel = new Pixel(pixelDoubleTuple2._1().getX(),pixelDoubleTuple2._1().getY(),resolutionX,resolutionY);
newPixel.setDuplicate(false);
newPixel.setCurrentPartitionId(VisualizationPartitioner.CalculatePartitionId(resolutionX,resolutionY,partitionX, partitionY, pixelDoubleTuple2._1.getX(), pixelDoubleTuple2._1.getY()));
Tuple2<Pixel,Integer> newPixelDoubleTuple2 = new Tuple2<Pixel, Integer>(newPixel, pixelDoubleTuple2._2());
return newPixelDoubleTuple2;
}
});
this.distributedRasterColorMatrix = this.distributedRasterColorMatrix.partitionBy(new VisualizationPartitioner(this.resolutionX,this.resolutionY,this.partitionX,this.partitionY));
return true;
}
示例13: main
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf sparkConf = new SparkConf();
sparkConf.setMaster("local");
sparkConf.setAppName("TestSpark");
JavaSparkContext sc = new JavaSparkContext(sparkConf);
JavaRDD<String> input = sc.parallelize(data);
JavaPairRDD<String, String> inputPair = input.mapToPair(
new PairFunction<String, String, String>() {
@Override
public Tuple2<String, String> call(String x) throws Exception {
return new Tuple2<String, String>(x.split(" ")[0], x);
}
}
);
System.out.println(inputPair.take(100));
}
示例14: before
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
@Before
public void before() throws Exception {
queryExecutor = new QueryExecutor(deepContext, deepConnectionHandler);
// Stubs
when(deepConnectionHandler.getConnection(CLUSTERNAME_CONSTANT.getName())).thenReturn(deepConnection);
when(deepConnection.getExtractorConfig()).thenReturn(extractorConfig);
when(extractorConfig.clone()).thenReturn(extractorConfig);
when(deepContext.createJavaRDD(any(ExtractorConfig.class))).thenReturn(singleRdd);
when(deepContext.createHDFSRDD(any(ExtractorConfig.class))).thenReturn(rdd);
when(rdd.toJavaRDD()).thenReturn(singleRdd);
when(singleRdd.collect()).thenReturn(generateListOfCells(3));
when(singleRdd.filter(any(Function.class))).thenReturn(singleRdd);
when(singleRdd.map(any(FilterColumns.class))).thenReturn(singleRdd);
when(singleRdd.mapToPair(any(PairFunction.class))).thenReturn(pairRdd);
when(singleRdd.keyBy(any(Function.class))).thenReturn(pairRdd);
when(pairRdd.join(pairRdd)).thenReturn(joinedRdd);
when(pairRdd.reduceByKey(any(Function2.class))).thenReturn(pairRdd);
when(pairRdd.map(any(Function.class))).thenReturn(singleRdd);
when(joinedRdd.map(any(JoinCells.class))).thenReturn(singleRdd);
}
示例15: convertToKeyValue
import org.apache.spark.api.java.function.PairFunction; //导入依赖的package包/类
@SuppressWarnings("serial")
private static final PairFunction<Integer, Integer, Integer> convertToKeyValue() {
/**
* Convert to key-value [key (integer) : value (integer * integer)]
*/
return new PairFunction<Integer, Integer, Integer>() {
@Override
public final Tuple2<Integer, Integer> call(final Integer integer) throws Exception {
/* Tuple : key (integer) : value (integer * integer) */
return new Tuple2<Integer, Integer>(integer, integer * integer);
}
};
}