本文整理汇总了Java中org.apache.spark.api.java.function.Function2类的典型用法代码示例。如果您正苦于以下问题:Java Function2类的具体用法?Java Function2怎么用?Java Function2使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Function2类属于org.apache.spark.api.java.function包,在下文中一共展示了Function2类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length != 2) {
System.err.println("Usage:");
System.err.println(" SparkWordCount <sourceFile> <targetFile>");
System.exit(1);
}
SparkConf conf = new SparkConf()
.setAppName("Word Count");
JavaSparkContext sc = new JavaSparkContext(conf);
JavaRDD<String> textFile = sc.textFile(args[0]);
JavaRDD<String> words = textFile.flatMap(LineIterator::new);
JavaPairRDD<String, Long> pairs =
words.mapToPair(s -> new Tuple2<>(s, 1L));
JavaPairRDD<String, Long> counts =
pairs.reduceByKey((Function2<Long, Long, Long>) (a, b) -> a + b);
System.out.println("Starting task..");
long t = System.currentTimeMillis();
counts.saveAsTextFile(args[1] + "_" + t);
System.out.println("Time=" + (System.currentTimeMillis() - t));
}
示例2: checkByRateInParallel
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
void checkByRateInParallel() throws InterruptedException, IOException {
JavaRDD<String> userRDD = getUserRDD(this.httpType);
LOG.info("Original User count: {}", userRDD.count());
int userCount = 0;
userCount = userRDD.mapPartitions((FlatMapFunction<Iterator<String>, Integer>) iterator -> {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
List<Integer> realUserNums = new ArrayList<>();
while (iterator.hasNext()) {
String s = iterator.next();
Integer realUser = checkByRate(tmpES, s);
realUserNums.add(realUser);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return realUserNums.iterator();
}).reduce((Function2<Integer, Integer, Integer>) (a, b) -> a + b);
LOG.info("User count: {}", Integer.toString(userCount));
}
示例3: main
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local");
JavaSparkContext sc = new JavaSparkContext(conf);
class GetLength implements Function<String, Integer> {
public Integer call(String s) {
return s.length();
}
}
class Sum implements Function2<Integer, Integer, Integer> {
public Integer call(Integer a, Integer b) {
return a + b;
}
}
JavaRDD<String> lines = sc.textFile("src/main/resources/compressed.gz");
JavaRDD<Integer> lineLengths = lines.map(new GetLength());
// Printing an RDD
lineLengths.foreach(x-> System.out.println(x));
int totalLength = lineLengths.reduce(new Sum());
System.out.println(totalLength);
}
示例4: startContext
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* Starts the spark context given a valid configuration.
* starts a test map-reduce such that all spark workers can fetch dependencies in advance
*/
private static void startContext(int numOfWorkers) {
JavaSparkContext sc = SharedService.getContext();
for (int i=0; i<numOfWorkers;i++) {
final int threadnumber = i;
new Thread(){
@Override
public void run() {
ImmutableList<Integer> range =
ContiguousSet.create(Range.closed(1, 5), DiscreteDomain.integers()).asList();
JavaRDD<Integer> data = sc.parallelize(range).repartition(numOfWorkers);
Integer result = data.reduce((Function2<Integer, Integer, Integer>)
(v1, v2) -> v1 + v2);
if (result == 15)
log.info("successfully tested worker"+threadnumber);
else
log.warn("worker "+threadnumber+" yielded a false result: "
+result+" (should be 15)");
}
}.start();
}
}
示例5: numCharacters
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* Counts the number of non-space characters in this data set. This utility method
* is used to check the tokenization result.
* @param lines
* @return number of characters
*/
int numCharacters(JavaRDD<String> lines) {
JavaRDD<Integer> lengths = lines.map(new Function<String, Integer>() {
private static final long serialVersionUID = -2189399343462982586L;
@Override
public Integer call(String line) throws Exception {
line = line.replaceAll("[\\s_]+", "");
return line.length();
}
});
return lengths.reduce(new Function2<Integer, Integer, Integer>() {
private static final long serialVersionUID = -8438072946884289401L;
@Override
public Integer call(Integer e0, Integer e1) throws Exception {
return e0 + e1;
}
});
}
示例6: getRDDCountSum
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) {
final ByteArray ONE = new ByteArray();
Long count = rdd.mapValues(new Function<Object[], Long>() {
@Override
public Long call(Object[] objects) throws Exception {
return (Long) objects[countMeasureIndex];
}
}).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() {
@Override
public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22)
throws Exception {
return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2());
}
})._2();
return count;
}
示例7: execute
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
@SuppressWarnings("serial")
@Override
public SortedCounts<String> execute(final JavaSparkContext spark) {
final JavaRDD<String> textFile = spark.textFile(inputFile);
final JavaRDD<String> words = textFile.flatMap(new FlatMapFunction<String, String>() {
@Override
public Iterable<String> call(final String rawJSON) throws TwitterException {
final Status tweet = TwitterObjectFactory.createStatus(rawJSON);
String text = tweet.getText();
return Arrays.asList(text.split(" "));
}
});
final JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
@Override
public Tuple2<String, Integer> call(final String s) {
return new Tuple2<String, Integer>(s.toLowerCase(), 1);
}
});
final JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
@Override
public Integer call(final Integer a, final Integer b) {
return a + b;
}
});
return SortedCounts.create(counts);
}
示例8: PolygonUnion
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* Polygon union.
*
* @return the polygon
*/
public Polygon PolygonUnion() {
Polygon result = this.rawSpatialRDD.reduce(new Function2<Polygon, Polygon, Polygon>() {
public Polygon call(Polygon v1, Polygon v2) {
//Reduce precision in JTS to avoid TopologyException
PrecisionModel pModel = new PrecisionModel();
GeometryPrecisionReducer pReducer = new GeometryPrecisionReducer(pModel);
Geometry p1 = pReducer.reduce(v1);
Geometry p2 = pReducer.reduce(v2);
//Union two polygons
Geometry polygonGeom = p1.union(p2);
Coordinate[] coordinates = polygonGeom.getCoordinates();
ArrayList<Coordinate> coordinateList = new ArrayList<Coordinate>(Arrays.asList(coordinates));
Coordinate lastCoordinate = coordinateList.get(0);
coordinateList.add(lastCoordinate);
Coordinate[] coordinatesClosed = new Coordinate[coordinateList.size()];
coordinatesClosed = coordinateList.toArray(coordinatesClosed);
GeometryFactory fact = new GeometryFactory();
LinearRing linear = new GeometryFactory().createLinearRing(coordinatesClosed);
Polygon polygon = new Polygon(linear, null, fact);
//Return the two polygon union result
return polygon;
}
});
return result;
}
示例9: countGeometriesByKey
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
private static <U extends Geometry, T extends Geometry> JavaPairRDD<U, Long> countGeometriesByKey(JavaPairRDD<U, T> input) {
return input.aggregateByKey(
0L,
new Function2<Long, T, Long>() {
@Override
public Long call(Long count, T t) throws Exception {
return count + 1;
}
},
new Function2<Long, Long, Long>() {
@Override
public Long call(Long count1, Long count2) throws Exception {
return count1 + count2;
}
});
}
示例10: getBoundBox
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* read and merge bound boxes of all shapefiles user input, if there is no, leave BoundBox null;
*/
public BoundBox getBoundBox(JavaSparkContext sc, String inputPath){
// read bound boxes into memory
JavaPairRDD<Long, BoundBox> bounds = sc.newAPIHadoopFile(
inputPath,
BoundaryInputFormat.class,
Long.class,
BoundBox.class,
sc.hadoopConfiguration()
);
// merge all into one
bounds = bounds.reduceByKey(new Function2<BoundBox, BoundBox, BoundBox>(){
@Override
public BoundBox call(BoundBox box1, BoundBox box2) throws Exception {
return BoundBox.mergeBoundBox(box1, box2);
}
});
// if there is a result assign it to variable : boundBox
if(bounds.count() > 0){
return new BoundBox(bounds.collect().get(0)._2());
}else return null;
}
示例11: readBoundBox
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* read and merge bound boxes of all shapefiles user input, if there is no, leave BoundBox null;
*/
public static BoundBox readBoundBox(JavaSparkContext sc, String inputPath){
// read bound boxes into memory
JavaPairRDD<Long, BoundBox> bounds = sc.newAPIHadoopFile(
inputPath,
BoundaryInputFormat.class,
Long.class,
BoundBox.class,
sc.hadoopConfiguration()
);
// merge all into one
bounds = bounds.reduceByKey(new Function2<BoundBox, BoundBox, BoundBox>(){
@Override
public BoundBox call(BoundBox box1, BoundBox box2) throws Exception {
return BoundBox.mergeBoundBox(box1, box2);
}
});
// if there is a result assign it to variable : boundBox
if(bounds.count() > 0){
return new BoundBox(bounds.collect().get(0)._2());
}else return null;
}
示例12: before
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
@Before
public void before() throws Exception {
queryExecutor = new QueryExecutor(deepContext, deepConnectionHandler);
// Stubs
when(deepConnectionHandler.getConnection(CLUSTERNAME_CONSTANT.getName())).thenReturn(deepConnection);
when(deepConnection.getExtractorConfig()).thenReturn(extractorConfig);
when(extractorConfig.clone()).thenReturn(extractorConfig);
when(deepContext.createJavaRDD(any(ExtractorConfig.class))).thenReturn(singleRdd);
when(deepContext.createHDFSRDD(any(ExtractorConfig.class))).thenReturn(rdd);
when(rdd.toJavaRDD()).thenReturn(singleRdd);
when(singleRdd.collect()).thenReturn(generateListOfCells(3));
when(singleRdd.filter(any(Function.class))).thenReturn(singleRdd);
when(singleRdd.map(any(FilterColumns.class))).thenReturn(singleRdd);
when(singleRdd.mapToPair(any(PairFunction.class))).thenReturn(pairRdd);
when(singleRdd.keyBy(any(Function.class))).thenReturn(pairRdd);
when(pairRdd.join(pairRdd)).thenReturn(joinedRdd);
when(pairRdd.reduceByKey(any(Function2.class))).thenReturn(pairRdd);
when(pairRdd.map(any(Function.class))).thenReturn(singleRdd);
when(joinedRdd.map(any(JoinCells.class))).thenReturn(singleRdd);
}
示例13: getFunction
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
@Override
protected Function2<JavaPairRDD<String, RowMutation>, Time, Void> getFunction() {
return new Function2<JavaPairRDD<String, RowMutation>, Time, Void>() {
// Blur Thrift Client
@Override
public Void call(JavaPairRDD<String, RowMutation> rdd, Time time) throws Exception {
Iface client = getBlurClient();
for (Tuple2<String, RowMutation> tuple : rdd.collect()) {
if (tuple != null) {
try {
RowMutation rm = tuple._2;
// Index using enqueue mutate call
client.enqueueMutate(rm);
} catch (Exception ex) {
LOG.error("Unknown error while trying to call enqueueMutate.", ex);
throw ex;
}
}
}
return null;
}
};
}
示例14: bounds
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
@Override public Rectangle2D bounds() {
final JavaRDD<Rectangle2D> rects;
if (partitions) {
rects = base.mapPartitions(
new FlatMapFunction<Iterator<Glyph<G,I>>,Rectangle2D>() {
public Iterable<Rectangle2D> call(Iterator<Glyph<G, I>> glyphs) throws Exception {
ArrayList<Glyph<G,I>> glyphList = Lists.newArrayList(new IterableIterator<>(glyphs));
return Arrays.asList(Util.bounds(glyphList));
}});
} else {
rects = base.map(new Function<Glyph<G,I>,Rectangle2D>() {
public Rectangle2D call(Glyph<G,I> glyph) throws Exception {
return Util.boundOne(glyph.shape());
}});
}
return rects.reduce(new Function2<Rectangle2D, Rectangle2D,Rectangle2D>() {
public Rectangle2D call(Rectangle2D left, Rectangle2D right) throws Exception {
return Util.bounds(left, right);
}
});
}
示例15: bulidDataQueryRDD
import org.apache.spark.api.java.function.Function2; //导入依赖的package包/类
/**
* bulidDataQueryRDD: convert click stream list to data set queries pairs.
*
* @param clickstreamRDD:
* click stream data
* @param downloadWeight:
* weight of download behavior
* @return JavaPairRDD, key is short name of data set, and values are queries
*/
public JavaPairRDD<String, List<String>> bulidDataQueryRDD(JavaRDD<ClickStream> clickstreamRDD, int downloadWeight) {
return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, List<String>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Tuple2<String, List<String>> call(ClickStream click) throws Exception {
List<String> query = new ArrayList<>();
// important! download behavior is given higher weights
// than viewing
// behavior
boolean download = click.isDownload();
int weight = 1;
if (download) {
weight = downloadWeight;
}
for (int i = 0; i < weight; i++) {
query.add(click.getKeyWords());
}
return new Tuple2<>(click.getViewDataset(), query);
}
}).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public List<String> call(List<String> v1, List<String> v2) throws Exception {
List<String> list = new ArrayList<>();
list.addAll(v1);
list.addAll(v2);
return list;
}
});
}