当前位置: 首页>>代码示例>>Java>>正文


Java Vectors类代码示例

本文整理汇总了Java中org.apache.spark.mllib.linalg.Vectors的典型用法代码示例。如果您正苦于以下问题:Java Vectors类的具体用法?Java Vectors怎么用?Java Vectors使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。


Vectors类属于org.apache.spark.mllib.linalg包,在下文中一共展示了Vectors类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: parseLabeledPoint

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public JavaRDD<LabeledPoint> parseLabeledPoint(JavaRDD<String> data, String sep, int targetClassIdx){
    JavaRDD<LabeledPoint> parsedData = data.map(line -> {
      String[] features = line.split(sep);
      double[] v = new double[features.length-1];
      int targetIdx = (features.length + targetClassIdx) % features.length;
      int idx = 0;
      for (int i = 0; i < features.length; i++) {
        if(i==targetIdx){
          continue;
        }
        else{
          v[idx] = Double.parseDouble(features[i]);
          idx += 1;
        }
      }
      return new LabeledPoint(Double.parseDouble(features[targetIdx]), Vectors.dense(v));
    });
    return parsedData;
}
 
开发者ID:Chih-Ling-Hsu,项目名称:Spark-Machine-Learning-Modules,代码行数:20,代码来源:ParsingUnit.java

示例2: comp

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
 * Method of compare two search resutls
 *
 * @param o1 search result 1
 * @param o2 search result 2
 * @return 1 if o1 is greater than o2, 0 otherwise
 */
public int comp(SResult o1, SResult o2) {
  List<Double> instList = new ArrayList<>();
  for (int i = 0; i < SResult.rlist.length; i++) {
    double o2Score = SResult.get(o2, SResult.rlist[i]);
    double o1Score = SResult.get(o1, SResult.rlist[i]);
    instList.add(o2Score - o1Score);
  }

  double[] ins = instList.stream().mapToDouble(i -> i).toArray();
  LabeledPoint insPoint = new LabeledPoint(99.0, Vectors.dense(ins));
  double prediction = le.classify(insPoint);
  if (equalComp(prediction, 1)) { //different from weka where the return value is 1 or 2
    return 0;
  } else {
    return 1;
  }
}
 
开发者ID:apache,项目名称:incubator-sdap-mudrod,代码行数:25,代码来源:Ranker.java

示例3: sparseVectorTimesMatrix

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
static org.apache.spark.mllib.linalg.Vector sparseVectorTimesMatrix(org.apache.spark.mllib.linalg.Vector sparseVector, Matrix matrix) {
	int matrixCols = matrix.numCols();
	int[] indices;
	ArrayList<Tuple2<Integer, Double>> tupleList = new  ArrayList<Tuple2<Integer, Double>>();
	for (int col = 0; col < matrixCols; col++) 
	{
		indices=((SparseVector)sparseVector).indices();
		int index = 0, i=0;
		double value = 0;
		double dotRes = 0;
		for(i=0; i <indices.length; i++)
		{
			index=indices[i];
			value=sparseVector.apply(index);
			dotRes += matrix.getQuick(index,col) * value;
		}
		if(dotRes !=0)
		{
			Tuple2<Integer,Double> tuple = new Tuple2<Integer,Double>(col,dotRes);
			tupleList.add(tuple);
		}
	}
	org.apache.spark.mllib.linalg.Vector sparkVector = Vectors.sparse(matrixCols,tupleList);
       return sparkVector;
}
 
开发者ID:SiddharthMalhotra,项目名称:sPCA,代码行数:26,代码来源:PCAUtils.java

示例4: transform

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public LabeledPoint transform(Tuple2<Double, Multiset<String>> doc) {
	double label = doc._1();
	List<Tuple2<Integer, Double>> vector = new ArrayList<>();
	for (Multiset.Entry<String> entry : doc._2().entrySet()) {
		String word = entry.getElement();
		int tf = entry.getCount();

		Tuple2<Integer, Long> wordInfo = idf.get(word);
		if (wordInfo != null) {
			int index = wordInfo._2().intValue();
			int numDocs = (int) this.newsCount;
			int df = wordInfo._2().intValue();

			double tfidf = this.calculate(tf, df, numDocs);

			vector.add(new Tuple2<>(index, tfidf));
		}
	}
	Vector features = Vectors.sparse((int) featuresCount, vector);

	return new LabeledPoint(label, features);
}
 
开发者ID:mhardalov,项目名称:news-credibility,代码行数:23,代码来源:TFIDFTransform.java

示例5: tokensToSparseVector

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public Vector tokensToSparseVector(String[] tokens) {
  List<Integer> indices = new ArrayList();
  for (String token : tokens) {
    String stem = Stemmer.stemWord(token);
    if(! noiseWords.contains(stem) && validWord((stem))) {
      if (! wordMap.containsKey(stem)) {
        wordMap.put(stem, startingWordIndex++);
      }
      indices.add(wordMap.get(stem));
    }
  }
  int[] ind = new int[MAX_WORDS];

  double [] vals = new double[MAX_WORDS];
  for (int i=0, len=indices.size(); i<len; i++) {
    int index = indices.get(i);
    ind[i] = index;
    vals[i] = 1d;
  }
  Vector ret = Vectors.sparse(MAX_WORDS, ind, vals);
  return ret;
}
 
开发者ID:mark-watson,项目名称:power-java,代码行数:23,代码来源:TextToSparseVector.java

示例6: convertRealMatrixToSparkRowMatrix

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
 * Create a distributed matrix given an Apache Commons RealMatrix.
 *
 * @param sc Never {@code null}
 * @param realMat Apache Commons RealMatrix.  Never {@code null}
 * @return A distributed Spark matrix
 */
public static RowMatrix convertRealMatrixToSparkRowMatrix(JavaSparkContext sc, RealMatrix realMat, int numSlices) {
    logger.info("Converting matrix to distributed Spark matrix...");
    final double [][] dataArray = realMat.getData();
    final LinkedList<Vector> rowsList = new LinkedList<>();
    for (final double [] i : dataArray) {
        final Vector currentRow = Vectors.dense(i);
        rowsList.add(currentRow);
    }

    // We may want to swap out this static value for something dynamic (as shown below), but this seems to slow it down.
    // final int totalSpace = realMat.getColumnDimension() * realMat.getRowDimension() * Double.BYTES;
    // // Want the partitions to be ~100KB of space
    // final int slices = totalSpace/100000;
    final JavaRDD<Vector> rows = sc.parallelize(rowsList, numSlices);

    // Create a RowMatrix from JavaRDD<Vector>.
    final RowMatrix mat = new RowMatrix(rows.rdd());
    logger.info("Done converting matrix to distributed Spark matrix...");
    return mat;
}
 
开发者ID:broadinstitute,项目名称:gatk-protected,代码行数:28,代码来源:SparkConverter.java

示例7: call

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public Vector call(String[] tokens) throws MLModelBuilderException {
    try {
        double[] features = new double[indices.size()];
        int i = 0;
        for (int j : indices) {
            if (NumberUtils.isNumber(tokens[j])) {
                features[i] = Double.parseDouble(tokens[j]);
            }
            i++;
        }
        return Vectors.dense(features);
    } catch (Exception e) {
        throw new MLModelBuilderException(
                "An error occurred while converting tokens to vectors: " + e.getMessage(), e);
    }
}
 
开发者ID:wso2-attic,项目名称:carbon-ml,代码行数:18,代码来源:TokensToVectors.java

示例8: main

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static void main(String[] args) {
	SparkConf conf = new SparkConf().setMaster("local").setAppName("SparkStreamsSampleTrainingApplication");
	JavaSparkContext jsc = new JavaSparkContext(conf);
	
	JavaRDD<String> lines = jsc.textFile("data/random_2d_training.csv");
	JavaRDD<Vector> parsedData = lines.map(
      new Function<String, Vector>() {
		@Override
		public Vector call(String s) {
		    String[] sarray = s.split(",");
	          double[] values = new double[sarray.length];
	          for (int i = 0; i < sarray.length; i++) {
	            values[i] = Double.parseDouble(sarray[i]);
	          }
	          return Vectors.dense(values);
		}
      }
    );
	parsedData.cache();
	
    int numClusters = 10;
    int numIterations = 20;
    KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);
    clusters.save(jsc.sc(), "etc/kmeans_model");
    jsc.close();
}
 
开发者ID:IBMStreams,项目名称:streamsx.sparkMLLib,代码行数:27,代码来源:JavaTrainingApplication.java

示例9: predict

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public Double predict(double[] point) throws DDFException {
  MLClassMethods.PredictMethod predictMethod= new MLClassMethods.PredictMethod(this.getRawModel(), MLClassMethods.DEFAULT_PREDICT_METHOD_NAME,
    new Class<?>[]{Vector.class});
  if(predictMethod.getMethod() == null) {
    throw new DDFException(String.format("Cannot locate method specified by %s", MLClassMethods.DEFAULT_PREDICT_METHOD_NAME));

  }
  Object prediction = predictMethod.instanceInvoke(Vectors.dense(point));
  if(prediction instanceof Double) {
    return (Double) prediction;
  } else if (prediction instanceof Integer) {
    return ((Integer) prediction).doubleValue();
  } else {
    throw new DDFException(String.format("Error getting prediction from model %s", this.getRawModel().getClass().getName()));
  }
}
 
开发者ID:ddf-project,项目名称:DDF,代码行数:18,代码来源:Model.java

示例10: call

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
@Override
public LabeledPoint call(Tuple2<WritableComparable, HCatRecord> tuple) throws Exception {
  HCatRecord record = tuple._2();

  if (record == null) {
    log.info("@@@ Null record");
    return defaultLabeledPoint;
  }

  double[] features = new double[numFeatures];

  for (int i = 0; i < numFeatures; i++) {
    int featurePos = featurePositions[i];
    features[i] = featureValueMappers[i].call(record.get(featurePos));
  }

  double label = featureValueMappers[labelColumnPos].call(record.get(labelColumnPos));
  return new LabeledPoint(label, Vectors.dense(features));
}
 
开发者ID:apache,项目名称:lens,代码行数:20,代码来源:ColumnFeatureFunction.java

示例11: pointOf

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
/**
 * Returns a labeled point of the writables
 * where the final item is the point and the rest of the items are
 * features
 * @param writables the writables
 * @return the labeled point
 */
public static LabeledPoint pointOf(Collection<Writable> writables) {
    double[] ret = new double[writables.size() - 1];
    int count = 0;
    double target = 0;
    for (Writable w : writables) {
        if (count < writables.size() - 1)
            ret[count++] = Float.parseFloat(w.toString());
        else
            target = Float.parseFloat(w.toString());
    }

    if (target < 0)
        throw new IllegalStateException("Target must be >= 0");
    return new LabeledPoint(target, Vectors.dense(ret));
}
 
开发者ID:deeplearning4j,项目名称:deeplearning4j,代码行数:23,代码来源:MLLibUtil.java

示例12: DGEMV

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static DenseVector DGEMV(double alpha, DistributedMatrix A, DenseVector x, double beta, DenseVector y, JavaSparkContext jsc){

        // First form  y := beta*y.
        if (beta != 1.0) {
            if (beta == 0.0) {
                y = Vectors.zeros(y.size()).toDense();
            }
            else {
                BLAS.scal(beta, y);
            }
        }

        if (alpha == 0.0) {
            return y;
        }

        DenseVector tmpVector = Vectors.zeros(y.size()).toDense();

        // Form  y := alpha*A*x + y.
        // Case of IndexedRowMatrix
        if( A.getClass() == IndexedRowMatrix.class) {
            tmpVector = L2.DGEMV_IRW((IndexedRowMatrix) A, alpha, x, jsc);
        }
        else if (A.getClass() == CoordinateMatrix.class) {
            tmpVector = L2.DGEMV_COORD((CoordinateMatrix) A, alpha, x, jsc);
        }
        else if (A.getClass() == BlockMatrix.class){
            tmpVector = L2.DGEMV_BCK((BlockMatrix) A, alpha, x, jsc);
        }
        else {
            tmpVector = null;
        }

        BLAS.axpy(1.0, tmpVector, y);


        return y;

    }
 
开发者ID:jmabuin,项目名称:BLASpark,代码行数:40,代码来源:L2.java

示例13: main

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
public static void main( String[] args ){
	SparkConf conf = new SparkConf().setMaster("local[4]").setAppName("K-means Example");
    JavaSparkContext sc = new JavaSparkContext(conf);

    // Load and parse data
    String path = "data/km-data.txt";
    JavaRDD<String> data = sc.textFile(path);
    JavaRDD<Vector> parsedData = data.map(
      new Function<String, Vector>() {
        public Vector call(String s) {
          String[] sarray = s.split(" ");
          double[] values = new double[sarray.length];
          for (int i = 0; i < sarray.length; i++)
            values[i] = Double.parseDouble(sarray[i]);
          return Vectors.dense(values);
        }
      }
    );
    parsedData.cache();

    // Cluster the data into two classes using KMeans
    int numClusters = 2;
    int numIterations = 20;
    KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);

    // Evaluate clustering by computing Within Set Sum of Squared Errors
    double WSSSE = clusters.computeCost(parsedData.rdd());
    System.out.println("Within Set Sum of Squared Errors = " + WSSSE);
	
	
	
}
 
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:33,代码来源:KMeansClusteringMlib.java

示例14: postProcessing

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
Vector postProcessing(HashMap<String, Object> value) {
    org.apache.spark.mllib.linalg.Vector normedForVal;

    double[] values = new double[numberOfTargetValue];
    for (int j = 0; j < numberOfTargetValue; j++) {
        values[j] = 0;
        HashMap<String, Object> features = (HashMap<String, Object>) value.get(AthenaFeatureField.FEATURE);

        if (features.containsKey(listOfTargetFeatures.get(j).getValue())) {
            Object obj = features.get(listOfTargetFeatures.get(j).getValue());
            if (obj instanceof Long) {
                values[j] = (Long) obj;
            } else if (obj instanceof Double) {
                values[j] = (Double) obj;
            } else if (obj instanceof Boolean) {
                values[j] = (Boolean) obj ? 1 : 0;
            } else {
                return null;
            }

            //check weight
            if (weight.containsKey(listOfTargetFeatures.get(j))) {
                values[j] *= weight.get(listOfTargetFeatures.get(j));
            }
            //check absolute
            if (isAbsolute) {
                values[j] = Math.abs(values[j]);
            }
        }
    }


    if (isNormalization) {
        normedForVal = normalizer.transform(Vectors.dense(values));
    } else {
        normedForVal = Vectors.dense(values);
    }

    return normedForVal;
}
 
开发者ID:shlee89,项目名称:athena,代码行数:41,代码来源:OnlineFeatureHandler.java

示例15: parsedToVectorRDD

import org.apache.spark.mllib.linalg.Vectors; //导入依赖的package包/类
private JavaRDD<Vector> parsedToVectorRDD(JavaRDD<String[]> parsedRDD) {
  return parsedRDD.map(data -> {
    try {
      return Vectors.dense(KMeansUtils.featuresFromTokens(data, inputSchema));
    } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) {
      log.warn("Bad input: {}", Arrays.toString(data));
      throw e;
    }
  });
}
 
开发者ID:oncewang,项目名称:oryx2,代码行数:11,代码来源:KMeansUpdate.java


注:本文中的org.apache.spark.mllib.linalg.Vectors类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。