本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.rdd方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.rdd方法的具体用法?Java JavaRDD.rdd怎么用?Java JavaRDD.rdd使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaRDD
的用法示例。
在下文中一共展示了JavaRDD.rdd方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildScan
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public RDD<Row> buildScan() {
log.debug("-> buildScan()");
schema();
// I have isolated the work to a method to keep the plumbing code as simple as
// possible.
List<PhotoMetadata> table = collectData();
@SuppressWarnings("resource")
JavaSparkContext sparkContext = new JavaSparkContext(sqlContext.sparkContext());
JavaRDD<Row> rowRDD = sparkContext.parallelize(table)
.map(photo -> SparkBeanUtils.getRowFromBean(schema, photo));
return rowRDD.rdd();
}
示例2: getSVDMatrix
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* GetSVDMatrix: Create SVD matrix csv file from original csv file.
*
* @param csvFileName each row is a term, and each column is a document.
* @param svdDimention Dimension of SVD matrix
* @param svdMatrixFileName CSV file name of SVD matrix
*/
public void getSVDMatrix(String csvFileName, int svdDimention, String svdMatrixFileName) {
JavaPairRDD<String, Vector> importRDD = MatrixUtil.loadVectorFromCSV(spark, csvFileName, 1);
JavaRDD<Vector> vectorRDD = importRDD.values();
RowMatrix wordDocMatrix = new RowMatrix(vectorRDD.rdd());
RowMatrix tfidfMatrix = MatrixUtil.createTFIDFMatrix(wordDocMatrix);
RowMatrix svdMatrix = MatrixUtil.buildSVDMatrix(tfidfMatrix, svdDimention);
List<String> rowKeys = importRDD.keys().collect();
List<String> colKeys = new ArrayList<>();
for (int i = 0; i < svdDimention; i++) {
colKeys.add("dimension" + i);
}
MatrixUtil.exportToCSV(svdMatrix, rowKeys, colKeys, svdMatrixFileName);
}
示例3: GetLU_COORD
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetLU_COORD(CoordinateMatrix A) {
JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();
JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
@Override
public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();
while(matrixEntryIterator.hasNext()) {
MatrixEntry currentEntry = matrixEntryIterator.next();
if(currentEntry.i() != currentEntry.j()) {
newLowerEntries.add(currentEntry);
}
else {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
}
}
return newLowerEntries.iterator();
}
});
CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());
return newMatrix;
}
示例4: GetD_COORD
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static CoordinateMatrix GetD_COORD(CoordinateMatrix A, boolean inverseValues, JavaSparkContext jsc) {
JavaRDD<MatrixEntry> rows = A.entries().toJavaRDD().cache();
final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);
JavaRDD<MatrixEntry> LUEntries = rows.mapPartitions(new FlatMapFunction<Iterator<MatrixEntry>, MatrixEntry>() {
@Override
public Iterator<MatrixEntry> call(Iterator<MatrixEntry> matrixEntryIterator) throws Exception {
List<MatrixEntry> newLowerEntries = new ArrayList<MatrixEntry>();
boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();
while(matrixEntryIterator.hasNext()) {
MatrixEntry currentEntry = matrixEntryIterator.next();
if(currentEntry.i() == currentEntry.j()) {
if(inverseValuesValue) {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 1.0/currentEntry.value()));
}
else {
newLowerEntries.add(currentEntry);
}
}
else {
newLowerEntries.add(new MatrixEntry(currentEntry.i(), currentEntry.j(), 0.0));
}
}
return newLowerEntries.iterator();
}
});
CoordinateMatrix newMatrix = new CoordinateMatrix(LUEntries.rdd());
return newMatrix;
}
示例5: buildIndexRowMatrix
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* Convert vectorRDD to indexed row matrix.
*
* @param vecs
* Vector RDD
* @return IndexedRowMatrix
*/
public static IndexedRowMatrix buildIndexRowMatrix(JavaRDD<Vector> vecs) {
JavaRDD<IndexedRow> indexrows = vecs.zipWithIndex().map(new Function<Tuple2<Vector, Long>, IndexedRow>() {
/**
*
*/
private static final long serialVersionUID = 1L;
@Override
public IndexedRow call(Tuple2<Vector, Long> docId) {
return new IndexedRow(docId._2, docId._1);
}
});
return new IndexedRowMatrix(indexrows.rdd());
}
示例6: DmXV
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public DmXV(GeneralOptions DmXV_Options, JavaSparkContext ctx ) {
this.ctx = ctx;
this.ctx.getConf().setAppName("BLASpark - Example DmXV");
long numPartitions = DmXV_Options.getNumPartitions();
this.inputVectorPath = DmXV_Options.getInputVectorPath();
this.inputMatrixPath = DmXV_Options.getInputMatrixPath();
this.outputVectorPath = DmXV_Options.getOutputVectorPath();
this.alpha = DmXV_Options.getAlpha();
this.beta = DmXV_Options.getBeta();
// Read MATRIX input data
JavaRDD<IndexedRow> inputMatrixData;
if(numPartitions != 0) {
inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
Long.class, double[].class, ctx.hadoopConfiguration())
.map(new Array2IndexedRow())
.repartition((int)numPartitions);
}
else {
inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
Long.class, double[].class, ctx.hadoopConfiguration())
.map(new Array2IndexedRow());
}
this.tmpMatrix = new IndexedRowMatrix(inputMatrixData.rdd());
if(DmXV_Options.getMatrixFormat() == GeneralOptions.MatrixFormat.COORDINATE) {
LOG.info("The matrix format is CoordinateMatrix");
this.matrix = this.tmpMatrix.toCoordinateMatrix();
((CoordinateMatrix)this.matrix).entries().cache();
}
else if(DmXV_Options.getMatrixFormat() == GeneralOptions.MatrixFormat.BLOCK) {
LOG.info("The matrix format is BlockMatrix. Nrows: "+DmXV_Options.getRowsPerlBlock()+". Ncols: "+DmXV_Options.getColsPerBlock());
this.matrix = this.tmpMatrix.toBlockMatrix(DmXV_Options.getRowsPerlBlock(), DmXV_Options.getColsPerBlock());
((BlockMatrix)this.matrix).blocks().cache();
}
else {
//this.tmpMatrix.rows().cache();
LOG.info("The matrix format is IndexedRowMatrix");
this.matrix = this.tmpMatrix;
((IndexedRowMatrix)this.matrix).rows().cache();
}
// Read VECTOR input data
this.vector = IO.readVectorFromFileInHDFS(this.inputVectorPath, this.ctx.hadoopConfiguration());
this.outputVector = Vectors.zeros(this.vector.size()).toDense();
}
示例7: GetD_IRW
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix GetD_IRW(IndexedRowMatrix A, boolean inverseValues, JavaSparkContext jsc) {
JavaRDD<IndexedRow> rows = A.rows().toJavaRDD().cache();
final Broadcast<Boolean> inverseValuesBC = jsc.broadcast(inverseValues);
JavaRDD<IndexedRow> LURows = rows.map(new Function<IndexedRow, IndexedRow>() {
@Override
public IndexedRow call(IndexedRow indexedRow) throws Exception {
long index = indexedRow.index();
DenseVector vect = indexedRow.vector().toDense();
boolean inverseValuesValue = inverseValuesBC.getValue().booleanValue();
double newValues[] = new double[vect.size()];
for(int i = 0; i< vect.size(); i++) {
if( i == index) {
if(inverseValuesValue) {
newValues[i] = 1.0/vect.apply(i);
}
else {
newValues[i] = vect.apply(i);
}
}
else {
newValues[i] = 0.0;
}
}
DenseVector newVector = new DenseVector(newValues);
return new IndexedRow(index, newVector);
}
});
IndexedRowMatrix newMatrix = new IndexedRowMatrix(LURows.rdd());
return newMatrix;
}
示例8: buildModel
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public PMML buildModel(JavaSparkContext sparkContext,
JavaRDD<String> trainData,
List<?> hyperParameters,
Path candidatePath) {
int features = (Integer) hyperParameters.get(0);
double lambda = (Double) hyperParameters.get(1);
double alpha = (Double) hyperParameters.get(2);
double epsilon = Double.NaN;
if (logStrength) {
epsilon = (Double) hyperParameters.get(3);
}
Preconditions.checkArgument(features > 0);
Preconditions.checkArgument(lambda >= 0.0);
Preconditions.checkArgument(alpha > 0.0);
if (logStrength) {
Preconditions.checkArgument(epsilon > 0.0);
}
JavaRDD<String[]> parsedRDD = trainData.map(MLFunctions.PARSE_FN);
parsedRDD.cache();
Map<String,Integer> userIDIndexMap = buildIDIndexMapping(parsedRDD, true);
Map<String,Integer> itemIDIndexMap = buildIDIndexMapping(parsedRDD, false);
log.info("Broadcasting ID-index mappings for {} users, {} items",
userIDIndexMap.size(), itemIDIndexMap.size());
Broadcast<Map<String,Integer>> bUserIDToIndex = sparkContext.broadcast(userIDIndexMap);
Broadcast<Map<String,Integer>> bItemIDToIndex = sparkContext.broadcast(itemIDIndexMap);
JavaRDD<Rating> trainRatingData = parsedToRatingRDD(parsedRDD, bUserIDToIndex, bItemIDToIndex);
trainRatingData = aggregateScores(trainRatingData, epsilon);
ALS als = new ALS()
.setRank(features)
.setIterations(iterations)
.setLambda(lambda)
.setCheckpointInterval(5);
if (implicit) {
als = als.setImplicitPrefs(true).setAlpha(alpha);
}
RDD<Rating> trainingRatingDataRDD = trainRatingData.rdd();
trainingRatingDataRDD.cache();
MatrixFactorizationModel model = als.run(trainingRatingDataRDD);
trainingRatingDataRDD.unpersist(false);
bUserIDToIndex.unpersist();
bItemIDToIndex.unpersist();
parsedRDD.unpersist();
Broadcast<Map<Integer,String>> bUserIndexToID = sparkContext.broadcast(invertMap(userIDIndexMap));
Broadcast<Map<Integer,String>> bItemIndexToID = sparkContext.broadcast(invertMap(itemIDIndexMap));
PMML pmml = mfModelToPMML(model,
features,
lambda,
alpha,
epsilon,
implicit,
logStrength,
candidatePath,
bUserIndexToID,
bItemIndexToID);
unpersist(model);
bUserIndexToID.unpersist();
bItemIndexToID.unpersist();
return pmml;
}
示例9: SCAL_IRW
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix SCAL_IRW(double alpha, IndexedRowMatrix A, IndexedRowMatrix B, JavaSparkContext jsc) {
JavaRDD<IndexedRow> rows = A.rows().toJavaRDD();
final Broadcast<Double> alphaBC = jsc.broadcast(alpha);
JavaRDD<IndexedRow> newRows = rows.map(new Function<IndexedRow, IndexedRow>() {
@Override
public IndexedRow call(IndexedRow indexedRow) throws Exception {
double alphaValue = alphaBC.getValue().doubleValue();
long index = indexedRow.index();
double values[] = new double[indexedRow.vector().size()];
for(int i = 0; i< values.length; i++) {
values[i] = indexedRow.vector().apply(i) * alphaValue;
}
return new IndexedRow(index, new DenseVector(values));
}
});
B = new IndexedRowMatrix(newRows.rdd());
return B;
}
示例10: SCAL_BCK
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static BlockMatrix SCAL_BCK(double alpha, BlockMatrix A, BlockMatrix B, JavaSparkContext jsc) {
JavaRDD<Tuple2<Tuple2<Object, Object>, Matrix>> blocks = A.blocks().toJavaRDD();
final Broadcast<Double> alphaBC = jsc.broadcast(alpha);
JavaRDD<Tuple2<Tuple2<Object, Object>, Matrix>> newBlocks = blocks.map(new Function<Tuple2<Tuple2<Object, Object>, Matrix>, Tuple2<Tuple2<Object, Object>, Matrix>>() {
@Override
public Tuple2<Tuple2<Object, Object>, Matrix> call(Tuple2<Tuple2<Object, Object>, Matrix> block) throws Exception {
double alphaBCRec = alphaBC.getValue().doubleValue();
Integer row = (Integer)block._1._1; //Integer.parseInt(block._1._1.toString());
Integer col = (Integer)block._1._2;
Matrix matrixBlock = block._2;
for(int i = 0; i< matrixBlock.numRows(); i++) {
for(int j = 0; j< matrixBlock.numCols(); j++) {
matrixBlock.update(i,j, matrixBlock.apply(i,j) * alphaBCRec);
}
}
return new Tuple2<Tuple2<Object, Object>, Matrix>(new Tuple2<Object, Object>(row, col), matrixBlock);
}
});
B = new BlockMatrix(newBlocks.rdd(), A.rowsPerBlock(), A.colsPerBlock());
return B;
}
示例11: GetLU_IRW
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix GetLU_IRW(IndexedRowMatrix A) {
JavaRDD<IndexedRow> rows = A.rows().toJavaRDD().cache();
JavaRDD<IndexedRow> LURows = rows.map(new Function<IndexedRow, IndexedRow>() {
@Override
public IndexedRow call(IndexedRow indexedRow) throws Exception {
long index = indexedRow.index();
DenseVector vect = indexedRow.vector().toDense();
double newValues[] = new double[vect.size()];
for(int i = 0; i< vect.size(); i++) {
if( i != index) {
newValues[i] = vect.apply(i);
}
else {
newValues[i] = 0.0;
}
}
DenseVector newVector = new DenseVector(newValues);
return new IndexedRow(index, newVector);
}
});
IndexedRowMatrix newMatrix = new IndexedRowMatrix(LURows.rdd());
return newMatrix;
}
示例12: DGER_IRW
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
private static IndexedRowMatrix DGER_IRW(IndexedRowMatrix A, double alpha, DenseVector x, DenseVector y, JavaSparkContext jsc) {
final Broadcast<Double> AlphaBC = jsc.broadcast(alpha);
final Broadcast<DenseVector> BCVector_X = jsc.broadcast(x);
final Broadcast<DenseVector> BCVector_Y = jsc.broadcast(y);
JavaRDD<IndexedRow> rows = A.rows().toJavaRDD();
JavaRDD<IndexedRow> resultRows = rows.map(new Function<IndexedRow, IndexedRow>() {
@Override
public IndexedRow call(IndexedRow indexedRow) throws Exception {
DenseVector Vector_X = BCVector_X.getValue();
DenseVector Vector_Y = BCVector_Y.getValue();
double alphaBCRec = AlphaBC.getValue().doubleValue();
DenseVector row = indexedRow.vector().toDense();
double[] resultArray = new double[row.size()];
long i = indexedRow.index();
for( int j = 0; j< Vector_Y.size(); j++) {
resultArray[j] = alphaBCRec * Vector_X.apply((int)i) * Vector_Y.apply(j) + row.apply(j);
}
DenseVector result = new DenseVector(resultArray);
return new IndexedRow(indexedRow.index(), result);
}
});
IndexedRowMatrix newMatrix = new IndexedRowMatrix(resultRows.rdd(), x.size(), y.size());
return newMatrix;
}
示例13: buildSVDMatrix
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* buildSVDMatrix: Generate SVD matrix from Vector RDD.
*
* @param vecRDD
* vectors of terms in feature space
* @param dimension
* Column number of the SVD matrix
* @return RowMatrix, each row is a term and each column is a dimension in the
* feature space, each cell is value of the term in the corresponding
* dimension.
*/
public static RowMatrix buildSVDMatrix(JavaRDD<Vector> vecRDD, int dimension) {
RowMatrix tfidfMatrix = new RowMatrix(vecRDD.rdd());
SingularValueDecomposition<RowMatrix, Matrix> svd = tfidfMatrix.computeSVD(dimension, true, 1.0E-9d);
RowMatrix u = svd.U();
Vector s = svd.s();
return u.multiply(Matrices.diag(s));
}
示例14: createTFIDFMatrix
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* Create TF-IDF matrix from word-doc matrix.
*
* @param wordDocMatrix,
* each row is a term, each column is a document name and each cell
* is number of the term in the corresponding document.
* @return RowMatrix, each row is a term and each column is a document name
* and each cell is the TF-IDF value of the term in the corresponding
* document.
*/
public static RowMatrix createTFIDFMatrix(RowMatrix wordDocMatrix) {
JavaRDD<Vector> newcountRDD = wordDocMatrix.rows().toJavaRDD();
IDFModel idfModel = new IDF().fit(newcountRDD);
JavaRDD<Vector> idf = idfModel.transform(newcountRDD);
return new RowMatrix(idf.rdd());
}
示例15: ConjugateGradientExample
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public ConjugateGradientExample(GeneralOptions CG_Options, JavaSparkContext ctx ) {
this.ctx = ctx;
this.ctx.getConf().setAppName("BLASpark - Example CG");
this.iterationNumber = CG_Options.getIterationNumber();
long numPartitions = CG_Options.getNumPartitions();
this.inputVectorPath = CG_Options.getInputVectorPath();
this.inputMatrixPath = CG_Options.getInputMatrixPath();
this.outputVectorPath = CG_Options.getOutputVectorPath();
// Read MATRIX input data
this.matrixFormat = CG_Options.getMatrixFormat();
if(this.matrixFormat == GeneralOptions.MatrixFormat.PAIRLINE) {
JavaRDD<IndexedRow> inputMatrixData;
if(numPartitions != 0) {
inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
Long.class, double[].class, ctx.hadoopConfiguration())
.map(new Array2IndexedRow())
.repartition((int)numPartitions);
}
else {
inputMatrixData = ctx.newAPIHadoopFile(inputMatrixPath, RowPerLineInputFormat.class,
Long.class, double[].class, ctx.hadoopConfiguration())
.map(new Array2IndexedRow());
}
this.matrix = new IndexedRowMatrix(inputMatrixData.rdd());
((IndexedRowMatrix)this.matrix).rows().cache();
}
// Read VECTOR input data
this.vector = IO.readVectorFromFileInHDFS(this.inputVectorPath, this.ctx.hadoopConfiguration());
this.outputVector = Vectors.zeros(this.vector.size()).toDense();
}