当前位置: 首页>>代码示例>>Java>>正文


Java JavaRDD.mapPartitions方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.mapPartitions方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.mapPartitions方法的具体用法?Java JavaRDD.mapPartitions怎么用?Java JavaRDD.mapPartitions使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaRDD的用法示例。


在下文中一共展示了JavaRDD.mapPartitions方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: setPartitionHeaders

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {

        return reads.mapPartitions(records -> {
            //header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
            //record.setHeader(header);

            BAMHeaderOutputFormat.setHeader(header.getValue());
            return records;
        });
    }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:11,代码来源:HDFSWriter.java

示例2: alignmentsToSAM

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static JavaRDD<SAMRecord> alignmentsToSAM(JavaRDD<String> alignmentRDD, SAMFileHeader header) {
    return alignmentRDD.mapPartitions(alns -> {

        List<SAMRecord> records = new ArrayList<SAMRecord>();

        final SAMLineParser samLP = new SAMLineParser(new DefaultSAMRecordFactory(), ValidationStringency.SILENT, header, null, null);
        while (alns.hasNext()) {

            String aln = alns.next().replace("\r\n", "").replace("\n", "").replace(System.lineSeparator(), "");
            SAMRecord record = null;
            try{
                record = samLP.parseLine(aln);
                records.add(record);
            }catch(SAMFormatException e){
                System.out.println(e.getMessage().toString());
            }
        }
        return records.iterator();
    });
}
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:21,代码来源:HDFSWriter.java

示例3: GetLU_COORD

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetLU_COORD(CoordinateMatrix A) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();


                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() != currentEntry.j()) {
                        newLowerEntries.add(currentEntry);
                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:31,代码来源:OtherOperations.java

示例4: GetD_COORD

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetD_COORD(CoordinateMatrix A, boolean inverseValues, JavaSparkContext jsc) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();

                boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();

                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() == currentEntry.j()) {
                        if(inverseValuesValue) {
                            newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 1.0/currentEntry.value()));
                        }
                        else {
                            newLowerEntries.add(currentEntry);
                        }

                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:40,代码来源:OtherOperations.java

示例5: getClickStreamListInParallel

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {

    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));

    LOG.info("Retrieved {}", logIndexList.toString());

    List<String> sessionIdList = new ArrayList<>();
    for (int n = 0; n < logIndexList.size(); n++) {
      String logIndex = logIndexList.get(n);
      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
      sessionIdList.addAll(tmpsessionList);
    }

    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);

    JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();

        Session session = new Session(props, tmpES);
        List<ClickStream> clickstreams = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          String[] sArr = s.split(",");
          List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
          clickstreams.addAll(clicks);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return clickstreams.iterator();
      }
    });

    LOG.info("Clickstream number: {}", clickStreamRDD.count());

    return clickStreamRDD;
  }
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:45,代码来源:SessionExtractor.java

示例6: extractRankingTrainDataInParallel

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
protected JavaRDD<RankingTrainData> extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver es) {

    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));

    LOG.info(logIndexList.toString());

    List<String> sessionIdList = new ArrayList<>();
    for (int n = 0; n < logIndexList.size(); n++) {
      String logIndex = logIndexList.get(n);
      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
      sessionIdList.addAll(tmpsessionList);
    }

    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);

    JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, RankingTrainData>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<RankingTrainData> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();

        Session session = new Session(props, tmpES);
        List<RankingTrainData> clickstreams = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          String[] sArr = s.split(",");
          List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
          clickstreams.addAll(clicks);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return clickstreams.iterator();
      }
    });

    LOG.info("Clickstream number: {}", clickStreamRDD.count());

    return clickStreamRDD;
  }
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:45,代码来源:SessionExtractor.java


注:本文中的org.apache.spark.api.java.JavaRDD.mapPartitions方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。