本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.collect方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.collect方法的具体用法?Java JavaRDD.collect怎么用?Java JavaRDD.collect使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaRDD
的用法示例。
在下文中一共展示了JavaRDD.collect方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: sparkTrain
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public boolean sparkTrain(JavaRDD<String> rdd) {
JavaRDD<String> repartition = rdd.repartition(slaveNum);
JavaRDD<Boolean> partRDD = repartition.mapPartitionsWithIndex(trainFunc, true);
List<Boolean> res = partRDD.collect();
for (boolean result : res) {
if (!result) {
return false;
}
}
return true;
}
示例2: writeMatrixToFileInHDFS
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public static void writeMatrixToFileInHDFS(String file, DistributedMatrix matrix, Configuration conf){
try {
List<IndexedRow> localRows;
long numRows = 0;
long numCols = 0;
FileSystem fs = FileSystem.get(conf);
Path pt = new Path(file);
//FileSystem fileSystem = FileSystem.get(context.getConfiguration());
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(fs.create(pt, true)));
JavaRDD<IndexedRow> rows;
if( matrix.getClass() == IndexedRowMatrix.class) {
rows = ((IndexedRowMatrix) matrix).rows().toJavaRDD();
}
else if (matrix.getClass() == CoordinateMatrix.class) {
rows = ((CoordinateMatrix)matrix).toIndexedRowMatrix().rows().toJavaRDD();
}
else if (matrix.getClass() == BlockMatrix.class){
rows = ((BlockMatrix)matrix).toIndexedRowMatrix().rows().toJavaRDD();
}
else {
rows = null;
}
localRows = rows.collect();
Vector vectors[] = new Vector[localRows.size()];
for(int i = 0; i< localRows.size(); i++) {
vectors[(int)localRows.get(i).index()] = localRows.get(i).vector();
}
numRows = matrix.numRows();
numCols = matrix.numCols();
bw.write("%%MatrixMarket matrix array real general");
bw.newLine();
bw.write(numRows+" "+numCols+" "+(numRows * numCols));
bw.newLine();
for(int i = 0; i< vectors.length; i++) {
bw.write(i+":");
for(int j = 0; j< vectors[i].size(); j++) {
bw.write(String.valueOf(vectors[i].apply(j))+",");
}
bw.newLine();
}
bw.close();
//fs.close();
} catch (IOException e) {
LOG.error("Error in " + IO.class.getName() + ": " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
示例3: calTermSimfromMatrix
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
/**
* Calculate term similarity from CSV matrix.
*
* @param csvFileName csv file of matrix, each row is a term, and each column is a
* dimension in feature space
* @param simType the type of similary calculation to execute e.g.
* <ul>
* <li>{@link org.apache.sdap.mudrod.utils.SimilarityUtil#SIM_COSINE} - 3,</li>
* <li>{@link org.apache.sdap.mudrod.utils.SimilarityUtil#SIM_HELLINGER} - 2,</li>
* <li>{@link org.apache.sdap.mudrod.utils.SimilarityUtil#SIM_PEARSON} - 1</li>
* </ul>
* @param skipRow number of rows to skip in input CSV file e.g. header
* @return Linkage triple list
*/
public List<LinkageTriple> calTermSimfromMatrix(String csvFileName, int simType, int skipRow) {
JavaPairRDD<String, Vector> importRDD = MatrixUtil.loadVectorFromCSV(spark, csvFileName, skipRow);
if (importRDD.values().first().size() == 0) {
return null;
}
JavaRDD<LinkageTriple> triples = SimilarityUtil.calculateSimilarityFromVector(importRDD, simType);
return triples.collect();
}