本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.mapPartitions方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.mapPartitions方法的具体用法?Java JavaRDD.mapPartitions怎么用?Java JavaRDD.mapPartitions使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaRDD
的用法示例。
在下文中一共展示了JavaRDD.mapPartitions方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: setPartitionHeaders
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {
return reads.mapPartitions(records -> {
//header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
//record.setHeader(header);
BAMHeaderOutputFormat.setHeader(header.getValue());
return records;
});
}
示例2: alignmentsToSAM
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static JavaRDD<SAMRecord> alignmentsToSAM(JavaRDD<String> alignmentRDD, SAMFileHeader header) {
return alignmentRDD.mapPartitions(alns -> {
List<SAMRecord> records = new ArrayList<SAMRecord>();
final SAMLineParser samLP = new SAMLineParser(new DefaultSAMRecordFactory(), ValidationStringency.SILENT, header, null, null);
while (alns.hasNext()) {
String aln = alns.next().replace("\r\n", "").replace("\n", "").replace(System.lineSeparator(), "");
SAMRecord record = null;
try{
record = samLP.parseLine(aln);
records.add(record);
}catch(SAMFormatException e){
System.out.println(e.getMessage().toString());
}
}
return records.iterator();
});
}
示例3: GetLU_COORD
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetLU_COORD(CoordinateMatrix A) {
JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();
JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
@Override
public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();
while(matrixEntryIterator.hasNext()) {
MatrixEntry currentEntry = matrixEntryIterator.next();
if(currentEntry.i() != currentEntry.j()) {
newLowerEntries.add(currentEntry);
}
else {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
}
}
return newLowerEntries.iterator();
}
});
CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());
return newMatrix;
}
示例4: GetD_COORD
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetD_COORD(CoordinateMatrix A, boolean inverseValues, JavaSparkContext jsc) {
JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();
final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);
JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
@Override
public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();
boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();
while(matrixEntryIterator.hasNext()) {
MatrixEntry currentEntry = matrixEntryIterator.next();
if(currentEntry.i() == currentEntry.j()) {
if(inverseValuesValue) {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 1.0/currentEntry.value()));
}
else {
newLowerEntries.add(currentEntry);
}
}
else {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
}
}
return newLowerEntries.iterator();
}
});
CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());
return newMatrix;
}
示例5: getClickStreamListInParallel
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {
List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
LOG.info("Retrieved {}", logIndexList.toString());
List<String> sessionIdList = new ArrayList<>();
for (int n = 0; n < logIndexList.size(); n++) {
String logIndex = logIndexList.get(n);
List<String> tmpsessionList = this.getSessions(props, es, logIndex);
sessionIdList.addAll(tmpsessionList);
}
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
Session session = new Session(props, tmpES);
List<ClickStream> clickstreams = new ArrayList<>();
while (arg0.hasNext()) {
String s = arg0.next();
String[] sArr = s.split(",");
List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
clickstreams.addAll(clicks);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return clickstreams.iterator();
}
});
LOG.info("Clickstream number: {}", clickStreamRDD.count());
return clickStreamRDD;
}
示例6: extractRankingTrainDataInParallel
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
protected JavaRDD<RankingTrainData> extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver es) {
List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
LOG.info(logIndexList.toString());
List<String> sessionIdList = new ArrayList<>();
for (int n = 0; n < logIndexList.size(); n++) {
String logIndex = logIndexList.get(n);
List<String> tmpsessionList = this.getSessions(props, es, logIndex);
sessionIdList.addAll(tmpsessionList);
}
JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, RankingTrainData>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public Iterator<RankingTrainData> call(Iterator<String> arg0) throws Exception {
ESDriver tmpES = new ESDriver(props);
tmpES.createBulkProcessor();
Session session = new Session(props, tmpES);
List<RankingTrainData> clickstreams = new ArrayList<>();
while (arg0.hasNext()) {
String s = arg0.next();
String[] sArr = s.split(",");
List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
clickstreams.addAll(clicks);
}
tmpES.destroyBulkProcessor();
tmpES.close();
return clickstreams.iterator();
}
});
LOG.info("Clickstream number: {}", clickStreamRDD.count());
return clickStreamRDD;
}