当前位置: 首页>>代码示例>>Java>>正文


Java JavaRDD.rdd方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.rdd方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.rdd方法的具体用法?Java JavaRDD.rdd怎么用?Java JavaRDD.rdd使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaRDD的用法示例。


在下文中一共展示了JavaRDD.rdd方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildScan

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public RDD<Row> buildScan() {
    log.debug("-> buildScan()");
    schema();

    // I have isolated the work to a method to keep the plumbing code as simple as
    // possible.
    List<PhotoMetadata> table = collectData();

    @SuppressWarnings("resource")
    JavaSparkContext sparkContext = new JavaSparkContext(sqlContext.sparkContext());
    JavaRDD<Row> rowRDD = sparkContext.parallelize(table)
            .map(photo -> SparkBeanUtils.getRowFromBean(schema, photo));

    return rowRDD.rdd();
}
 
开发者ID:jgperrin,项目名称:net.jgp.labs.spark.datasources,代码行数:17,代码来源:ExifDirectoryRelation.java

示例2: getSVDMatrix

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
 * GetSVDMatrix: Create SVD matrix csv file from original csv file.
 *
 * @param csvFileName       each row is a term, and each column is a document.
 * @param svdDimention      Dimension of SVD matrix
 * @param svdMatrixFileName CSV file name of SVD matrix
 */
public void getSVDMatrix(String csvFileName, int svdDimention, String svdMatrixFileName) {

  JavaPairRDD<String, Vector> importRDD = MatrixUtil.loadVectorFromCSV(spark, csvFileName, 1);
  JavaRDD<Vector> vectorRDD = importRDD.values();
  RowMatrix wordDocMatrix = new RowMatrix(vectorRDD.rdd());
  RowMatrix tfidfMatrix = MatrixUtil.createTFIDFMatrix(wordDocMatrix);
  RowMatrix svdMatrix = MatrixUtil.buildSVDMatrix(tfidfMatrix, svdDimention);

  List<String> rowKeys = importRDD.keys().collect();
  List<String> colKeys = new ArrayList<>();
  for (int i = 0; i < svdDimention; i++) {
    colKeys.add("dimension" + i);
  }
  MatrixUtil.exportToCSV(svdMatrix, rowKeys, colKeys, svdMatrixFileName);
}
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:23,代码来源:SVDAnalyzer.java

示例3: GetLU_COORD

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetLU_COORD(CoordinateMatrix A) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();


                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() != currentEntry.j()) {
                        newLowerEntries.add(currentEntry);
                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:31,代码来源:OtherOperations.java

示例4: GetD_COORD

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetD_COORD(CoordinateMatrix A, boolean inverseValues, JavaSparkContext jsc) {

        JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();

        final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);

        JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
            @Override
            public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
                List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();

                boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();

                while(matrixEntryIterator.hasNext()) {
                    MatrixEntry currentEntry = matrixEntryIterator.next();

                    if(currentEntry.i() == currentEntry.j()) {
                        if(inverseValuesValue) {
                            newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 1.0/currentEntry.value()));
                        }
                        else {
                            newLowerEntries.add(currentEntry);
                        }

                    }
                    else {
                        newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
                    }

                }

                return newLowerEntries.iterator();
            }
        });

        CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:40,代码来源:OtherOperations.java

示例5: buildIndexRowMatrix

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
 * Convert vectorRDD to indexed row matrix.
 *
 * @param vecs
 *          Vector RDD
 * @return IndexedRowMatrix
 */
public static IndexedRowMatrix buildIndexRowMatrix(JavaRDD<Vector> vecs) {
  JavaRDD<IndexedRow> indexrows = vecs.zipWithIndex().map(new Function<Tuple2<Vector, Long>, IndexedRow>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public IndexedRow call(Tuple2<Vector, Long> docId) {
      return new IndexedRow(docId._2, docId._1);
    }
  });
  return new IndexedRowMatrix(indexrows.rdd());
}
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:22,代码来源:MatrixUtil.java

示例6: DmXV

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public DmXV(GeneralOptions DmXV_Options, JavaSparkContext ctx ) {

        this.ctx = ctx;

        this.ctx.getConf().setAppName("BLASpark - Example DmXV");

        long numPartitions = DmXV_Options.getNumPartitions();

        this.inputVectorPath = DmXV_Options.getInputVectorPath();
        this.inputMatrixPath = DmXV_Options.getInputMatrixPath();
        this.outputVectorPath = DmXV_Options.getOutputVectorPath();

        this.alpha = DmXV_Options.getAlpha();
        this.beta = DmXV_Options.getBeta();

        // Read MATRIX input data
        JavaRDD<IndexedRow> inputMatrixData;

        if(numPartitions != 0) {
            inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
                    Long.class, double[].class, ctx.hadoopConfiguration())
                    .map(new Array2IndexedRow())
                    .repartition((int)numPartitions);
        }
        else {
            inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
                    Long.class, double[].class, ctx.hadoopConfiguration())
                    .map(new Array2IndexedRow());
        }


        this.tmpMatrix = new IndexedRowMatrix(inputMatrixData.rdd());

        if(DmXV_Options.getMatrixFormat() == GeneralOptions.MatrixFormat.COORDINATE) {
            LOG.info("The matrix format is CoordinateMatrix");
            this.matrix = this.tmpMatrix.toCoordinateMatrix();
            ((CoordinateMatrix)this.matrix).entries().cache();
        }
        else if(DmXV_Options.getMatrixFormat() == GeneralOptions.MatrixFormat.BLOCK) {
            LOG.info("The matrix format is BlockMatrix. Nrows: "+DmXV_Options.getRowsPerlBlock()+". Ncols: "+DmXV_Options.getColsPerBlock());
            this.matrix = this.tmpMatrix.toBlockMatrix(DmXV_Options.getRowsPerlBlock(), DmXV_Options.getColsPerBlock());
            ((BlockMatrix)this.matrix).blocks().cache();
        }
        else {
            //this.tmpMatrix.rows().cache();
            LOG.info("The matrix format is IndexedRowMatrix");
            this.matrix = this.tmpMatrix;
            ((IndexedRowMatrix)this.matrix).rows().cache();
        }


        // Read VECTOR input data
        this.vector = IO.readVectorFromFileInHDFS(this.inputVectorPath, this.ctx.hadoopConfiguration());

        this.outputVector = Vectors.zeros(this.vector.size()).toDense();





    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:62,代码来源:DmXV.java

示例7: GetD_IRW

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix GetD_IRW(IndexedRowMatrix A, boolean inverseValues, JavaSparkContext jsc) {

        JavaRDD<IndexedRow> rows = A.rows().toJavaRDD().cache();

        final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);
        JavaRDD<IndexedRow> LURows = rows.map(new Function<IndexedRow, IndexedRow>() {

            @Override
            public IndexedRow call(IndexedRow indexedRow) throws Exception {
                long index = indexedRow.index();
                DenseVector vect = indexedRow.vector().toDense();

                boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();

                double newValues[] = new double[vect.size()];


                for(int i = 0; i< vect.size(); i++) {

                    if( i == index) {
                        if(inverseValuesValue) {
                            newValues[i] = 1.0/vect.apply(i);
                        }
                        else {
                            newValues[i] = vect.apply(i);
                        }
                    }
                    else {
                        newValues[i] = 0.0;
                    }

                }

                DenseVector newVector = new DenseVector(newValues);

                return new IndexedRow(index, newVector);

            }
        });

        IndexedRowMatrix newMatrix = new IndexedRowMatrix(LURows.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:45,代码来源:OtherOperations.java

示例8: buildModel

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public PMML buildModel(JavaSparkContext sparkContext,
                       JavaRDD<String> trainData,
                       List<?> hyperParameters,
                       Path candidatePath) {
  int features = (Integer) hyperParameters.get(0);
  double lambda = (Double) hyperParameters.get(1);
  double alpha = (Double) hyperParameters.get(2);
  double epsilon = Double.NaN;
  if (logStrength) {
    epsilon = (Double) hyperParameters.get(3);
  }
  Preconditions.checkArgument(features > 0);
  Preconditions.checkArgument(lambda >= 0.0);
  Preconditions.checkArgument(alpha > 0.0);
  if (logStrength) {
    Preconditions.checkArgument(epsilon > 0.0);
  }

  JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN);
  parsedRDD.cache();

  Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true);
  Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false);

  log.info("Broadcasting ID-index mappings for {} users, {} items",
           userIDIndexMap.size(), itemIDIndexMap.size());

  Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap);
  Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap);

  JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex);
  trainRatingData = aggregateScores(trainRatingData, epsilon);
  ALS als = new ALS()
      .setRank(features)
      .setIterations(iterations)
      .setLambda(lambda)
      .setCheckpointInterval(5);
  if (implicit) {
    als = als.setImplicitPrefs(true).setAlpha(alpha);
  }

  RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd();
  trainingRatingDataRDD.cache();
  MatrixFactorizationModel model = als.run(trainingRatingDataRDD);
  trainingRatingDataRDD.unpersist(false);

  bUserIDToIndex.unpersist();
  bItemIDToIndex.unpersist();

  parsedRDD.unpersist();

  Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap));
  Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));

  PMML pmml = mfModelToPMML(model,
                            features,
                            lambda,
                            alpha,
                            epsilon,
                            implicit,
                            logStrength,
                            candidatePath,
                            bUserIndexToID,
                            bItemIndexToID);
  unpersist(model);

  bUserIndexToID.unpersist();
  bItemIndexToID.unpersist();

  return pmml;
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:73,代码来源:ALSUpdate.java

示例9: SCAL_IRW

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix SCAL_IRW(double alpha, IndexedRowMatrix A, IndexedRowMatrix B, JavaSparkContext jsc) {

        JavaRDD<IndexedRow> rows = A.rows().toJavaRDD();

        final Broadcast<Double> alphaBC = jsc.broadcast(alpha);

        JavaRDD<IndexedRow> newRows = rows.map(new Function<IndexedRow, IndexedRow>() {
            @Override
            public IndexedRow call(IndexedRow indexedRow) throws Exception {

                double alphaValue = alphaBC.getValue().doubleValue();

                long index = indexedRow.index();

                double values[] = new double[indexedRow.vector().size()];

                for(int i = 0; i< values.length; i++) {
                    values[i] = indexedRow.vector().apply(i) * alphaValue;
                }

                return new IndexedRow(index, new DenseVector(values));

            }
        });

        B = new IndexedRowMatrix(newRows.rdd());

        return B;

    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:31,代码来源:L3.java

示例10: SCAL_BCK

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static BlockMatrix SCAL_BCK(double alpha, BlockMatrix A, BlockMatrix B, JavaSparkContext jsc) {

        JavaRDD<Tuple2<Tuple2<Object, Object>, Matrix>> blocks = A.blocks().toJavaRDD();

        final Broadcast<Double> alphaBC = jsc.broadcast(alpha);

        JavaRDD<Tuple2<Tuple2<Object, Object>, Matrix>> newBlocks = blocks.map(new Function<Tuple2<Tuple2<Object, Object>, Matrix>, Tuple2<Tuple2<Object, Object>, Matrix>>() {
            @Override
            public Tuple2<Tuple2<Object, Object>, Matrix> call(Tuple2<Tuple2<Object, Object>, Matrix> block) throws Exception {

                double alphaBCRec = alphaBC.getValue().doubleValue();

                Integer row = (Integer)block._1._1; //Integer.parseInt(block._1._1.toString());
                Integer col = (Integer)block._1._2;
                Matrix matrixBlock = block._2;

                for(int i = 0; i< matrixBlock.numRows(); i++) {

                    for(int j = 0; j< matrixBlock.numCols(); j++) {
                        matrixBlock.update(i,j, matrixBlock.apply(i,j) * alphaBCRec);
                    }

                }

                return new Tuple2<Tuple2<Object, Object>, Matrix>(new Tuple2<Object, Object>(row, col), matrixBlock);

            }
        });

        B = new BlockMatrix(newBlocks.rdd(), A.rowsPerBlock(), A.colsPerBlock());

        return B;

    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:35,代码来源:L3.java

示例11: GetLU_IRW

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix GetLU_IRW(IndexedRowMatrix A) {

        JavaRDD<IndexedRow> rows = A.rows().toJavaRDD().cache();

        JavaRDD<IndexedRow> LURows = rows.map(new Function<IndexedRow, IndexedRow>() {

            @Override
            public IndexedRow call(IndexedRow indexedRow) throws Exception {
                long index = indexedRow.index();
                DenseVector vect = indexedRow.vector().toDense();

                double newValues[] = new double[vect.size()];


                for(int i = 0; i< vect.size(); i++) {

                    if( i != index) {
                        newValues[i] = vect.apply(i);
                    }
                    else {
                        newValues[i] = 0.0;
                    }

                }

                DenseVector newVector = new DenseVector(newValues);

                return new IndexedRow(index, newVector);

            }
        });

        IndexedRowMatrix newMatrix = new IndexedRowMatrix(LURows.rdd());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:37,代码来源:OtherOperations.java

示例12: DGER_IRW

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix DGER_IRW(IndexedRowMatrix A, double alpha, DenseVector x, DenseVector y, JavaSparkContext jsc) {

        final Broadcast<Double> AlphaBC = jsc.broadcast(alpha);
        final Broadcast<DenseVector> BCVector_X = jsc.broadcast(x);
        final Broadcast<DenseVector> BCVector_Y = jsc.broadcast(y);

        JavaRDD<IndexedRow> rows = A.rows().toJavaRDD();

        JavaRDD<IndexedRow> resultRows = rows.map(new Function<IndexedRow, IndexedRow>() {
            @Override
            public IndexedRow call(IndexedRow indexedRow) throws Exception {

                DenseVector Vector_X = BCVector_X.getValue();
                DenseVector Vector_Y = BCVector_Y.getValue();
                double alphaBCRec = AlphaBC.getValue().doubleValue();

                DenseVector row = indexedRow.vector().toDense();

                double[] resultArray = new double[row.size()];

                long i = indexedRow.index();

                for( int j = 0; j< Vector_Y.size(); j++) {
                    resultArray[j] = alphaBCRec * Vector_X.apply((int)i) * Vector_Y.apply(j) + row.apply(j);
                }

                DenseVector result = new DenseVector(resultArray);

                return new IndexedRow(indexedRow.index(), result);

            }
        });

        IndexedRowMatrix newMatrix = new IndexedRowMatrix(resultRows.rdd(), x.size(), y.size());

        return newMatrix;
    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:38,代码来源:L2.java

示例13: buildSVDMatrix

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
 * buildSVDMatrix: Generate SVD matrix from Vector RDD.
 *
 * @param vecRDD
 *          vectors of terms in feature space
 * @param dimension
 *          Column number of the SVD matrix
 * @return RowMatrix, each row is a term and each column is a dimension in the
 *         feature space, each cell is value of the term in the corresponding
 *         dimension.
 */
public static RowMatrix buildSVDMatrix(JavaRDD<Vector> vecRDD, int dimension) {
  RowMatrix tfidfMatrix = new RowMatrix(vecRDD.rdd());
  SingularValueDecomposition<RowMatrix, Matrix> svd = tfidfMatrix.computeSVD(dimension, true, 1.0E-9d);
  RowMatrix u = svd.U();
  Vector s = svd.s();
  return u.multiply(Matrices.diag(s));
}
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:19,代码来源:MatrixUtil.java

示例14: createTFIDFMatrix

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
 * Create TF-IDF matrix from word-doc matrix.
 *
 * @param wordDocMatrix,
 *          each row is a term, each column is a document name and each cell
 *          is number of the term in the corresponding document.
 * @return RowMatrix, each row is a term and each column is a document name
 *         and each cell is the TF-IDF value of the term in the corresponding
 *         document.
 */
public static RowMatrix createTFIDFMatrix(RowMatrix wordDocMatrix) {
  JavaRDD<Vector> newcountRDD = wordDocMatrix.rows().toJavaRDD();
  IDFModel idfModel = new IDF().fit(newcountRDD);
  JavaRDD<Vector> idf = idfModel.transform(newcountRDD);
  return new RowMatrix(idf.rdd());
}
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:17,代码来源:MatrixUtil.java

示例15: ConjugateGradientExample

import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public ConjugateGradientExample(GeneralOptions CG_Options, JavaSparkContext ctx ) {

		this.ctx = ctx;

		this.ctx.getConf().setAppName("BLASpark - Example CG");

		this.iterationNumber = CG_Options.getIterationNumber();
		long numPartitions = CG_Options.getNumPartitions();


		this.inputVectorPath = CG_Options.getInputVectorPath();
		this.inputMatrixPath = CG_Options.getInputMatrixPath();
		this.outputVectorPath = CG_Options.getOutputVectorPath();

		// Read MATRIX input data
		this.matrixFormat = CG_Options.getMatrixFormat();

		if(this.matrixFormat == GeneralOptions.MatrixFormat.PAIRLINE) {
			JavaRDD<IndexedRow> inputMatrixData;

			if(numPartitions != 0) {
				inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
						Long.class, double[].class, ctx.hadoopConfiguration())
						.map(new Array2IndexedRow())
						.repartition((int)numPartitions);
			}
			else {
				inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
						Long.class, double[].class, ctx.hadoopConfiguration())
						.map(new Array2IndexedRow());
			}


			this.matrix = new IndexedRowMatrix(inputMatrixData.rdd());
			((IndexedRowMatrix)this.matrix).rows().cache();

		}


		// Read VECTOR input data
		this.vector = IO.readVectorFromFileInHDFS(this.inputVectorPath, this.ctx.hadoopConfiguration());

		this.outputVector = Vectors.zeros(this.vector.size()).toDense();





	}
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:50,代码来源:ConjugateGradientExample.java


注:本文中的org.apache.spark.api.java.JavaRDD.rdd方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。