本文整理汇总了Java中org.apache.spark.mllib.linalg.Vectors类的典型用法代码示例。如果您正苦于以下问题:Java Vectors类的具体用法?Java Vectors怎么用?Java Vectors使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Vectors类属于org.apache.spark.mllib.linalg包,在下文中一共展示了Vectors类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: parseLabeledPoint
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public JavaRDD<LabeledPoint> parseLabeledPoint(JavaRDD<String> data, String sep, int targetClassIdx){
JavaRDD<LabeledPoint> parsedData = data.map(line -> {
String[] features = line.split(sep);
double[] v = new double[features.length-1];
int targetIdx = (features.length + targetClassIdx) % features.length;
int idx = 0;
for (int i = 0; i < features.length; i++) {
if(i==targetIdx){
continue;
}
else{
v[idx] = Double.parseDouble(features[i]);
idx += 1;
}
}
return new LabeledPoint(Double.parseDouble(features[targetIdx]), Vectors.dense(v));
});
return parsedData;
}
示例2: comp
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
* Method of compare two search resutls
*
* @param o1 search result 1
* @param o2 search result 2
* @return 1 if o1 is greater than o2, 0 otherwise
*/
public int comp(SResult o1, SResult o2) {
List<Double> instList = new ArrayList<>();
for (int i = 0; i < SResult.rlist.length; i++) {
double o2Score = SResult.get(o2, SResult.rlist[i]);
double o1Score = SResult.get(o1, SResult.rlist[i]);
instList.add(o2Score - o1Score);
}
double[] ins = instList.stream().mapToDouble(i -> i).toArray();
LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins));
double prediction = le.classify(insPoint);
if (equalComp(prediction, 1)) { //different from weka where the return value is 1 or 2
return 0;
} else {
return 1;
}
}
示例3: sparseVectorTimesMatrix
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
static org.apache.spark.mllib.linalg.Vector sparseVectorTimesMatrix(org.apache.spark.mllib.linalg.Vector sparseVector, Matrix matrix) {
int matrixCols = matrix.numCols();
int[] indices;
ArrayList<Tuple2<Integer, Double>> tupleList = new ArrayList<Tuple2<Integer, Double>>();
for (int col = 0; col < matrixCols; col++)
{
indices=((SparseVector)sparseVector).indices();
int index = 0, i=0;
double value = 0;
double dotRes = 0;
for(i=0; i <indices.length; i++)
{
index=indices[i];
value=sparseVector.apply(index);
dotRes += matrix.getQuick(index,col) * value;
}
if(dotRes !=0)
{
Tuple2<Integer,Double> tuple = new Tuple2<Integer,Double>(col,dotRes);
tupleList.add(tuple);
}
}
org.apache.spark.mllib.linalg.Vector sparkVector = Vectors.sparse(matrixCols,tupleList);
return sparkVector;
}
示例4: transform
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public LabeledPoint transform(Tuple2<Double, Multiset<String>> doc) {
double label = doc._1();
List<Tuple2<Integer, Double>> vector = new ArrayList<>();
for (Multiset.Entry<String> entry : doc._2().entrySet()) {
String word = entry.getElement();
int tf = entry.getCount();
Tuple2<Integer, Long> wordInfo = idf.get(word);
if (wordInfo != null) {
int index = wordInfo._2().intValue();
int numDocs = (int) this.newsCount;
int df = wordInfo._2().intValue();
double tfidf = this.calculate(tf, df, numDocs);
vector.add(new Tuple2<>(index, tfidf));
}
}
Vector features = Vectors.sparse((int) featuresCount, vector);
return new LabeledPoint(label, features);
}
示例5: tokensToSparseVector
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public Vector tokensToSparseVector(String[] tokens) {
List<Integer> indices = new ArrayList();
for (String token : tokens) {
String stem = Stemmer.stemWord(token);
if(! noiseWords.contains(stem) && validWord((stem))) {
if (! wordMap.containsKey(stem)) {
wordMap.put(stem, startingWordIndex++);
}
indices.add(wordMap.get(stem));
}
}
int[] ind = new int[MAX_WORDS];
double [] vals = new double[MAX_WORDS];
for (int i=0, len=indices.size(); i<len; i++) {
int index = indices.get(i);
ind[i] = index;
vals[i] = 1d;
}
Vector ret = Vectors.sparse(MAX_WORDS, ind, vals);
return ret;
}
示例6: convertRealMatrixToSparkRowMatrix
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
* Create a distributed matrix given an Apache Commons RealMatrix.
*
* @param sc Never {@code null}
* @param realMat Apache Commons RealMatrix. Never {@code null}
* @return A distributed Spark matrix
*/
public static RowMatrix convertRealMatrixToSparkRowMatrix(JavaSparkContext sc, RealMatrix realMat, int numSlices) {
logger.info("Converting matrix to distributed Spark matrix...");
final double [][] dataArray = realMat.getData();
final LinkedList<Vector> rowsList = new LinkedList<>();
for (final double [] i : dataArray) {
final Vector currentRow = Vectors.dense(i);
rowsList.add(currentRow);
}
// We may want to swap out this static value for something dynamic (as shown below), but this seems to slow it down.
// final int totalSpace = realMat.getColumnDimension() * realMat.getRowDimension() * Double.BYTES;
// // Want the partitions to be ~100KB of space
// final int slices = totalSpace/100000;
final JavaRDD<Vector> rows = sc.parallelize(rowsList, numSlices);
// Create a RowMatrix from JavaRDD<Vector>.
final RowMatrix mat = new RowMatrix(rows.rdd());
logger.info("Done converting matrix to distributed Spark matrix...");
return mat;
}
示例7: call
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public Vector call(String[] tokens) throws MLModelBuilderException {
try {
double[] features = new double[indices.size()];
int i = 0;
for (int j : indices) {
if (NumberUtils.isNumber(tokens[j])) {
features[i] = Double.parseDouble(tokens[j]);
}
i++;
}
return Vectors.dense(features);
} catch (Exception e) {
throw new MLModelBuilderException(
"An error occurred while converting tokens to vectors: " + e.getMessage(), e);
}
}
示例8: main
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static void main(String[] args) {
SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkStreamsSampleTrainingApplication");
JavaSparkContext jsc = new JavaSparkContext(conf);
JavaRDD<String> lines = jsc.textFile("data/random_2d_training.csv");
JavaRDD<Vector> parsedData = lines.map(
new Function<String, Vector>() {
@Override
public Vector call(String s) {
String[] sarray = s.split(",");
double[] values = new double[sarray.length];
for (int i = 0; i < sarray.length; i++) {
values[i] = Double.parseDouble(sarray[i]);
}
return Vectors.dense(values);
}
}
);
parsedData.cache();
int numClusters = 10;
int numIterations = 20;
KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);
clusters.save(jsc.sc(), "etc/kmeans_model");
jsc.close();
}
示例9: predict
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public Double predict(double[] point) throws DDFException {
MLClassMethods.PredictMethod predictMethod= new MLClassMethods.PredictMethod(this.getRawModel(), MLClassMethods.DEFAULT_PREDICT_METHOD_NAME,
new Class<?>[]{Vector.class});
if(predictMethod.getMethod() == null) {
throw new DDFException(String.format("Cannot locate method specified by %s", MLClassMethods.DEFAULT_PREDICT_METHOD_NAME));
}
Object prediction = predictMethod.instanceInvoke(Vectors.dense(point));
if(prediction instanceof Double) {
return (Double) prediction;
} else if (prediction instanceof Integer) {
return ((Integer) prediction).doubleValue();
} else {
throw new DDFException(String.format("Error getting prediction from model %s", this.getRawModel().getClass().getName()));
}
}
示例10: call
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public LabeledPoint call(Tuple2<WritableComparable, HCatRecord> tuple) throws Exception {
HCatRecord record = tuple._2();
if (record == null) {
log.info("@@@ Null record");
return defaultLabeledPoint;
}
double[] features = new double[numFeatures];
for (int i = 0; i < numFeatures; i++) {
int featurePos = featurePositions[i];
features[i] = featureValueMappers[i].call(record.get(featurePos));
}
double label = featureValueMappers[labelColumnPos].call(record.get(labelColumnPos));
return new LabeledPoint(label, Vectors.dense(features));
}
示例11: pointOf
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
* Returns a labeled point of the writables
* where the final item is the point and the rest of the items are
* features
* @param writables the writables
* @return the labeled point
*/
public static LabeledPoint pointOf(Collection<Writable> writables) {
double[] ret = new double[writables.size() - 1];
int count = 0;
double target = 0;
for (Writable w : writables) {
if (count < writables.size() - 1)
ret[count++] = Float.parseFloat(w.toString());
else
target = Float.parseFloat(w.toString());
}
if (target < 0)
throw new IllegalStateException("Target must be >= 0");
return new LabeledPoint(target, Vectors.dense(ret));
}
示例12: DGEMV
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static DenseVector DGEMV(double alpha, DistributedMatrix A, DenseVector x, double beta, DenseVector y, JavaSparkContext jsc){
// First form y := beta*y.
if (beta != 1.0) {
if (beta == 0.0) {
y = Vectors.zeros(y.size()).toDense();
}
else {
BLAS.scal(beta, y);
}
}
if (alpha == 0.0) {
return y;
}
DenseVector tmpVector = Vectors.zeros(y.size()).toDense();
// Form y := alpha*A*x + y.
// Case of IndexedRowMatrix
if( A.getClass() == IndexedRowMatrix.class) {
tmpVector = L2.DGEMV_IRW((IndexedRowMatrix) A, alpha, x, jsc);
}
else if (A.getClass() == CoordinateMatrix.class) {
tmpVector = L2.DGEMV_COORD((CoordinateMatrix) A, alpha, x, jsc);
}
else if (A.getClass() == BlockMatrix.class){
tmpVector = L2.DGEMV_BCK((BlockMatrix) A, alpha, x, jsc);
}
else {
tmpVector = null;
}
BLAS.axpy(1.0, tmpVector, y);
return y;
}
示例13: main
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static void main( String[] args ){
SparkConf conf = new SparkConf().setMaster("local[4]").setAppName("K-means Example");
JavaSparkContext sc = new JavaSparkContext(conf);
// Load and parse data
String path = "data/km-data.txt";
JavaRDD<String> data = sc.textFile(path);
JavaRDD<Vector> parsedData = data.map(
new Function<String, Vector>() {
public Vector call(String s) {
String[] sarray = s.split(" ");
double[] values = new double[sarray.length];
for (int i = 0; i < sarray.length; i++)
values[i] = Double.parseDouble(sarray[i]);
return Vectors.dense(values);
}
}
);
parsedData.cache();
// Cluster the data into two classes using KMeans
int numClusters = 2;
int numIterations = 20;
KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);
// Evaluate clustering by computing Within Set Sum of Squared Errors
double WSSSE = clusters.computeCost(parsedData.rdd());
System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
}
示例14: postProcessing
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
Vector postProcessing(HashMap<String, Object> value) {
org.apache.spark.mllib.linalg.Vector normedForVal;
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
HashMap<String, Object> features = (HashMap<String, Object>) value.get(AthenaFeatureField.FEATURE);
if (features.containsKey(listOfTargetFeatures.get(j).getValue())) {
Object obj = features.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return null;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (isAbsolute) {
values[j] = Math.abs(values[j]);
}
}
}
if (isNormalization) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
return normedForVal;
}
示例15: parsedToVectorRDD
import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
private JavaRDD<Vector> parsedToVectorRDD(JavaRDD<String[]> parsedRDD) {
return parsedRDD.map(data -> {
try {
return Vectors.dense(KMeansUtils.featuresFromTokens(data, inputSchema));
} catch (NumberFormatException | ArrayIndexOutOfBoundsException e) {
log.warn("Bad input: {}", Arrays.toString(data));
throw e;
}
});
}