當前位置: 首頁>>代碼示例>>Java>>正文


Java JavaRDD.mapPartitions方法代碼示例

本文整理匯總了Java中org.apache.spark.api.java.JavaRDD.mapPartitions方法的典型用法代碼示例。如果您正苦於以下問題:Java JavaRDD.mapPartitions方法的具體用法?Java JavaRDD.mapPartitions怎麽用?Java JavaRDD.mapPartitions使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在org.apache.spark.api.java.JavaRDD的用法示例。


在下文中一共展示了JavaRDD.mapPartitions方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Java代碼示例。

示例1: setPartitionHeaders

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
public static JavaRDD<SAMRecord> setPartitionHeaders(final JavaRDD<SAMRecord> reads, final Broadcast<SAMFileHeader> header) {

        return reads.mapPartitions(records -> {
            //header.getValue().setTextHeader(header.getValue().getTextHeader()+"\\[email protected]\\tSN:"+records..getReferenceName());
            //record.setHeader(header);

            BAMHeaderOutputFormat.setHeader(header.getValue());
            return records;
        });
    }
 
開發者ID:NGSeq,項目名稱:ViraPipe,代碼行數:11,代碼來源:HDFSWriter.java

示例2: alignmentsToSAM

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
private static JavaRDD<SAMRecord> alignmentsToSAM(JavaRDD<String> alignmentRDD, SAMFileHeader header) {
    return alignmentRDD.mapPartitions(alns -> {

        List<SAMRecord> records = new ArrayList<SAMRecord>();

        final SAMLineParser samLP = new SAMLineParser(new DefaultSAMRecordFactory(), ValidationStringency.SILENT, header, null, null);
        while (alns.hasNext()) {

            String aln = alns.next().replace("\r\n", "").replace("\n", "").replace(System.lineSeparator(), "");
            SAMRecord record = null;
            try{
                record = samLP.parseLine(aln);
                records.add(record);
            }catch(SAMFormatException e){
                System.out.println(e.getMessage().toString());
            }
        }
        return records.iterator();
    });
}
 
開發者ID:NGSeq,項目名稱:ViraPipe,代碼行數:21,代碼來源:HDFSWriter.java

示例3: GetLU_COORD

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
private static CoordinateMatrix GetLU_COORD(CoordinateMatrix A) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();


                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() != currentEntry.j()) {
                        newLowerEntries.add(currentEntry);
                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
開發者ID:jmabuin,項目名稱:BLASpark,代碼行數:31,代碼來源:OtherOperations.java

示例4: GetD_COORD

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
private static CoordinateMatrix GetD_COORD(CoordinateMatrix A, boolean inverseValues, JavaSparkContext jsc) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();

                boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();

                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() == currentEntry.j()) {
                        if(inverseValuesValue) {
                            newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 1.0/currentEntry.value()));
                        }
                        else {
                            newLowerEntries.add(currentEntry);
                        }

                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
開發者ID:jmabuin,項目名稱:BLASpark,代碼行數:40,代碼來源:OtherOperations.java

示例5: getClickStreamListInParallel

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {

    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));

    LOG.info("Retrieved {}", logIndexList.toString());

    List<String> sessionIdList = new ArrayList<>();
    for (int n = 0; n < logIndexList.size(); n++) {
      String logIndex = logIndexList.get(n);
      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
      sessionIdList.addAll(tmpsessionList);
    }

    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);

    JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();

        Session session = new Session(props, tmpES);
        List<ClickStream> clickstreams = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          String[] sArr = s.split(",");
          List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
          clickstreams.addAll(clicks);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return clickstreams.iterator();
      }
    });

    LOG.info("Clickstream number: {}", clickStreamRDD.count());

    return clickStreamRDD;
  }
 
開發者ID:apache,項目名稱:incubator-sdap-mudrod,代碼行數:45,代碼來源:SessionExtractor.java

示例6: extractRankingTrainDataInParallel

import org.apache.spark.api.java.JavaRDD; //導入方法依賴的package包/類
protected JavaRDD<RankingTrainData> extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver es) {

    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));

    LOG.info(logIndexList.toString());

    List<String> sessionIdList = new ArrayList<>();
    for (int n = 0; n < logIndexList.size(); n++) {
      String logIndex = logIndexList.get(n);
      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
      sessionIdList.addAll(tmpsessionList);
    }

    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);

    JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, RankingTrainData>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<RankingTrainData> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();

        Session session = new Session(props, tmpES);
        List<RankingTrainData> clickstreams = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          String[] sArr = s.split(",");
          List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
          clickstreams.addAll(clicks);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return clickstreams.iterator();
      }
    });

    LOG.info("Clickstream number: {}", clickStreamRDD.count());

    return clickStreamRDD;
  }
 
開發者ID:apache,項目名稱:incubator-sdap-mudrod,代碼行數:45,代碼來源:SessionExtractor.java


注:本文中的org.apache.spark.api.java.JavaRDD.mapPartitions方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。