本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.flatMapToPair方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.flatMapToPair方法的具体用法?Java JavaPairRDD.flatMapToPair怎么用?Java JavaPairRDD.flatMapToPair使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaPairRDD
的用法示例。
在下文中一共展示了JavaPairRDD.flatMapToPair方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: getMapOutput
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
/**
* Get for each entity a block a tuple like eId, [entitiesFromTheOtherCollectionInThisBlock[
* @param blocksFromEI the blocks after block filtering, in the form: blockId, [entityIds]
* @return for each entity in a block a tuple like eId, [entitiesFromTheOtherCollectionInThisBlock[
*/
public static JavaPairRDD<Integer,IntArrayList> getMapOutput(JavaPairRDD<Integer, IntArrayList> blocksFromEI) {
return blocksFromEI.flatMapToPair(block -> {
IntArrayList positives = new IntArrayList();
IntArrayList negatives = new IntArrayList();
for (int entityId : block._2()) { //faster than streaming block._2() twice
if (entityId < 0) {
negatives.add(entityId);
} else {
positives.add(entityId);
}
}
List<Tuple2<Integer,IntArrayList>> mapResults = new ArrayList<>();
if (positives.isEmpty() || negatives.isEmpty()) {
return mapResults.iterator(); //empty result on purpose (to avoid returning null and then filtering out null results)
}
//emit all the negative entities array for each positive entity
for (int positiveId : positives) {
mapResults.add(new Tuple2<>(positiveId, negatives));
}
//emit all the positive entities array for each negative entity
for (int negativeId : negatives) {
mapResults.add(new Tuple2<>(negativeId, positives));
}
return mapResults.iterator();
});
//.filter(x-> x != null); //comment out when return null is replaced by return new ArrayList<>().iterator()
}
示例2: interleaveReads
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
private static JavaPairRDD<Text, SequencedFragment> interleaveReads(String fastq, String fastq2, int splitlen, JavaSparkContext sc) throws IOException {
FileSystem fs = FileSystem.get(new Configuration());
FileStatus fst = fs.getFileStatus(new Path(fastq));
FileStatus fst2 = fs.getFileStatus(new Path(fastq2));
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
return zips.flatMapToPair( splits -> {
FastqInputFormat.FastqRecordReader fqreader = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._1);
FastqInputFormat.FastqRecordReader fqreader2 = new FastqInputFormat.FastqRecordReader(new Configuration(), splits._2);
ArrayList<Tuple2<Text, SequencedFragment>> reads = new ArrayList<Tuple2<Text, SequencedFragment>>();
while (fqreader.nextKeyValue()) {
String key = fqreader.getCurrentKey().toString();
String[] keysplit = key.split(" ");
key = keysplit[0];
SequencedFragment sf = new SequencedFragment();
sf.setQuality(new Text(fqreader.getCurrentValue().getQuality().toString()));
sf.setSequence(new Text(fqreader.getCurrentValue().getSequence().toString()));
if (fqreader2.nextKeyValue()) {
String key2 = fqreader2.getCurrentKey().toString();
String[] keysplit2 = key2.split(" ");
key2 = keysplit2[0];
//key2 = key2.replace(" 2:N:0:1","/2");
SequencedFragment sf2 = new SequencedFragment();
sf2.setQuality(new Text(fqreader2.getCurrentValue().getQuality().toString()));
sf2.setSequence(new Text(fqreader2.getCurrentValue().getSequence().toString()));
reads.add(new Tuple2<Text, SequencedFragment>(new Text(key), sf));
reads.add(new Tuple2<Text, SequencedFragment>(new Text(key2), sf2));
}
}
return reads.iterator();
});
}