本文整理汇总了Java中org.apache.mahout.math.Vector类的典型用法代码示例。如果您正苦于以下问题:Java Vector类的具体用法?Java Vector怎么用?Java Vector使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
Vector类属于org.apache.mahout.math包,在下文中一共展示了Vector类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: produceSamples
import org.apache.mahout.math.Vector; //导入依赖的package包/类
public long produceSamples(List<Vector> target) throws Exception {
long numTotal = this.numSamples;
int centriodNum = genParams.length;
int numPerCluster = (int) Math.ceil((double) numTotal / (double) centriodNum);
LOG.info("Cluster number=" + centriodNum + " numbers per cluster=" + numPerCluster);
GaussianGenerator[] gg = new GaussianGenerator[dimension];
for (int k = 0; k < genParams.length; k++) {
if (genParams[k].length != dimension)
throw new Exception("The dimension of mean vector or std vector does not match desired dimension!");
for (int d = 0; d < dimension; d++) {
if (genParams[k][d].length != 2)
throw new Exception("The dimension of mean vector or std vector does not match desired dimension");
gg[d] = new GaussianGenerator(genParams[k][d][0], genParams[k][d][1], rng);
}
double[] vec = new double[dimension];
for (int i = 0; i < numPerCluster; i++) {
for (int d = 0; d < dimension; d++)
vec[d] = gg[d].nextValue();
Vector p = new RandomAccessSparseVector(dimension);
p.assign(vec);
target.add(p);
}
}
return numPerCluster * centriodNum;
}
示例2: AtBMapper
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/***
* Mi = (Yi-Ym)' x (Xi-Xm) = Yi' x (Xi-Xm) - Ym' x (Xi-Xm)
*
* M = Sum(Mi) = Sum(Yi' x (Xi-Xm)) - Ym' x (Sum(Xi)-N*Xm)
*
* The first part is done in mapper and the second in the combiner
*/
private void AtBMapper(Vector yi, Vector ym, Vector xi, Vector xm,
DenseMatrix resMatrix) {
// 1. Sum(Yi' x (Xi-Xm))
int xSize = xi.size();
Iterator<Vector.Element> nonZeroElements = yi.nonZeroes().iterator();
while (nonZeroElements.hasNext()) {
Vector.Element e = nonZeroElements.next();
int yRow = e.index();
double yScale = e.get();
for (int xCol = 0; xCol < xSize; xCol++) {
double centeredValue = xi.getQuick(xCol) - xm.getQuick(xCol);
double currValue = resMatrix.getQuick(yRow, xCol);
currValue += centeredValue * yScale;
resMatrix.setQuick(yRow, xCol, currValue);
}
}
}
示例3: observe
import org.apache.mahout.math.Vector; //导入依赖的package包/类
@Override
public void observe(Vector x, double weight) {
s0 += weight;
Vector weightedX = x.times(weight);
if (s1 == null) {
s1 = weightedX;
} else {
s1.assign(weightedX, Functions.PLUS);
}
Vector x2 = x.times(x).times(weight);
if (s2 == null) {
s2 = x2;
} else {
s2.assign(x2, Functions.PLUS);
}
}
示例4: reduce
import org.apache.mahout.math.Vector; //导入依赖的package包/类
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context) throws IOException,
InterruptedException {
Vector vector = null;
for (VectorWritable value : values) {
if (vector == null) {
vector = value.get().clone();
continue;
}
//value.get().addTo(vector);
vector.assign(value.get(), Functions.PLUS);
}
if (normPower != PartialVectorMerger.NO_NORMALIZING) {
if (logNormalize) {
vector = vector.logNormalize(normPower);
} else {
vector = vector.normalize(normPower);
}
}
VectorWritable vectorWritable = new VectorWritable(vector);
context.write(key, vectorWritable);
}
示例5: vectorAssign
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/**
* This method overrides the Vector.assign method to allow optimization for
* ZeroIndifferent functions
*
* @param vector
* the vector to be updated
* @param other
* the other vector
* @param function
* the function that operates on elements of the two vectors
* @return the modified vector
*/
static public Vector vectorAssign(Vector vector, Vector other, ZeroIndifferentFunc function) {
if (vector.size() != other.size()) {
throw new CardinalityException(vector.size(), other.size());
}
// special case: iterate only over the non-zero elements of the vector to
// add
Iterator<Element> it = other.nonZeroes().iterator();
Element e;
while (it.hasNext() && (e = it.next()) != null) {
double val = vector.getQuick(e.index());
double newVal = function.apply(val, e.get());
vector.setQuick(e.index(), newVal);
}
return vector;
}
示例6: printSequenceFile
import org.apache.mahout.math.Vector; //导入依赖的package包/类
private static void printSequenceFile(String inputStr, int printRow) throws IOException {
Configuration conf = new Configuration();
Path finalNumberFile = new Path(inputStr);
SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf),
finalNumberFile, conf);
IntWritable key = new IntWritable();
VectorWritable value = new VectorWritable();
Vector printVector = null;
while (reader.next(key, value)) {
if (key.get() == printRow)
printVector = value.get();
int cnt = 0;
Iterator<Element> iter = value.get().nonZeroes().iterator();
for (; iter.hasNext(); iter.next())
cnt++;
System.out.println("# "+ key + " " + cnt + " " + value.get().zSum());
}
reader.close();
if (printVector != null)
System.out.println("##### "+ printRow + " " + printVector);
else
System.out.println("##### "+ key + " " + value.get());
}
示例7: clusterPoints
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/**
* This is the reference mean-shift implementation. Given its inputs it
* iterates over the points and clusters until their centers converge or until
* the maximum number of iterations is exceeded.
*
* @param points
* the input List<Vector> of points
* @param measure
* the DistanceMeasure to use
* @param numIter
* the maximum number of iterations
*/
public static List<MeanShiftCanopy> clusterPoints(Iterable<Vector> points,
DistanceMeasure measure, IKernelProfile aKernelProfileDerivative,
double convergenceThreshold, double t1, double t2, int numIter) {
MeanShiftCanopyClusterer clusterer = new MeanShiftCanopyClusterer(measure,
aKernelProfileDerivative, t1, t2, convergenceThreshold, true);
int nextCanopyId = 0;
List<MeanShiftCanopy> canopies = Lists.newArrayList();
for (Vector point : points) {
clusterer.mergeCanopy(
new MeanShiftCanopy(point, nextCanopyId++, measure), canopies);
}
List<MeanShiftCanopy> newCanopies = canopies;
boolean[] converged = { false };
for (int iter = 0; !converged[0] && iter < numIter; iter++) {
newCanopies = clusterer.iterate(newCanopies, converged);
}
return newCanopies;
}
示例8: sampleFromPrior
import org.apache.mahout.math.Vector; //导入依赖的package包/类
@Override
public Model<VectorWritable>[] sampleFromPrior(int howMany) {
Model<VectorWritable>[] result = new GaussianCluster[howMany];
for (int i = 0; i < howMany; i++) {
Vector prototype = getModelPrototype().get();
Vector mean = prototype.like();
for (int j = 0; j < prototype.size(); j++) {
mean.set(j, UncommonDistributions.rNorm(0, 1));
}
Vector sd = prototype.like();
for (int j = 0; j < prototype.size(); j++) {
sd.set(j, UncommonDistributions.rNorm(1, 1));
}
result[i] = new GaussianCluster(mean, sd, i);
}
return result;
}
示例9: updateXtXAndYtx
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/***
* Mi = (Yi-Ym)' x (Xi-Xm) = Yi' x (Xi-Xm) - Ym' x (Xi-Xm)
*
* M = Sum(Mi) = Sum(Yi' x (Xi-Xm)) - Ym' x (Sum(Xi)-N*Xm)
*
* The second part is done in this function
*/
public static Matrix updateXtXAndYtx(Matrix realCentralYtx,
Vector realCentralSumX, Vector ym, Vector xm, int nRows) {
for (int yRow = 0; yRow < ym.size(); yRow++) {
double scale = ym.getQuick(yRow);
for (int xCol = 0; xCol < realCentralSumX.size(); xCol++) {
double centeredValue = realCentralSumX.getQuick(xCol) - nRows
* xm.getQuick(xCol);
double currValue = realCentralYtx.getQuick(yRow, xCol);
currValue -= centeredValue * scale;
realCentralYtx.setQuick(yRow, xCol, currValue);
}
}
return realCentralYtx;
}
示例10: sparseVectorTimesMatrix
import org.apache.mahout.math.Vector; //导入依赖的package包/类
static org.apache.spark.mllib.linalg.Vector sparseVectorTimesMatrix(org.apache.spark.mllib.linalg.Vector sparseVector, Matrix matrix) {
int matrixCols = matrix.numCols();
int[] indices;
ArrayList<Tuple2<Integer, Double>> tupleList = new ArrayList<Tuple2<Integer, Double>>();
for (int col = 0; col < matrixCols; col++)
{
indices=((SparseVector)sparseVector).indices();
int index = 0, i=0;
double value = 0;
double dotRes = 0;
for(i=0; i <indices.length; i++)
{
index=indices[i];
value=sparseVector.apply(index);
dotRes += matrix.getQuick(index,col) * value;
}
if(dotRes !=0)
{
Tuple2<Integer,Double> tuple = new Tuple2<Integer,Double>(col,dotRes);
tupleList.add(tuple);
}
}
org.apache.spark.mllib.linalg.Vector sparkVector = Vectors.sparse(matrixCols,tupleList);
return sparkVector;
}
示例11: createModelDistribution
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/**
* Create an instance of AbstractVectorModelDistribution from the given command line arguments
*/
public ModelDistribution<VectorWritable> createModelDistribution(Configuration conf) {
AbstractVectorModelDistribution modelDistribution =
ClassUtils.instantiateAs(modelFactory, AbstractVectorModelDistribution.class);
Vector prototype = ClassUtils.instantiateAs(modelPrototype,
Vector.class,
new Class<?>[] {int.class},
new Object[] {prototypeSize});
modelDistribution.setModelPrototype(new VectorWritable(prototype));
if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);
measure.configure(conf);
((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
}
return modelDistribution;
}
示例12: addPointToCanopies
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/**
* This is the same algorithm as the reference but inverted to iterate over
* existing canopies instead of the points. Because of this it does not need
* to actually store the points, instead storing a total points vector and
* the number of points. From this a centroid can be computed.
* <p/>
* This method is used by the CanopyMapper, CanopyReducer and CanopyDriver.
*
* @param point
* the point to be added
* @param canopies
* the List<Canopy> to be appended
*/
public void addPointToCanopies(Vector point, Collection<Canopy> canopies) {
boolean pointStronglyBound = false;
for (Canopy canopy : canopies) {
double dist = measure.distance(canopy.getCenter().getLengthSquared(), canopy.getCenter(), point);
if (dist < t1) {
if (log.isDebugEnabled()) {
log.debug("Added point: {} to canopy: {}", AbstractCluster.formatVector(point, null), canopy.getIdentifier());
}
canopy.observe(point);
}
pointStronglyBound = pointStronglyBound || dist < t2;
}
if (!pointStronglyBound) {
if (log.isDebugEnabled()) {
log.debug("Created new Canopy:{} at center:{}", nextCanopyId, AbstractCluster.formatVector(point, null));
}
canopies.add(new Canopy(point, nextCanopyId++, measure));
}
}
示例13: iterate
import org.apache.mahout.math.Vector; //导入依赖的package包/类
/**
* Iterate over data using a prior-trained ClusterClassifier, for a number of iterations
*
* @param data
* a {@code List<Vector>} of input vectors
* @param classifier
* a prior ClusterClassifier
* @param numIterations
* the int number of iterations to perform
*
* @return the posterior ClusterClassifier
*/
public static ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
ClusteringPolicy policy = classifier.getPolicy();
for (int iteration = 1; iteration <= numIterations; iteration++) {
for (Vector vector : data) {
// update the policy based upon the prior
policy.update(classifier);
// classification yields probabilities
Vector probabilities = classifier.classify(vector);
// policy selects weights for models given those probabilities
Vector weights = policy.select(probabilities);
// training causes all models to observe data
for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
int index = it.next().index();
classifier.train(index, vector, weights.get(index));
}
}
// compute the posterior models
classifier.close();
}
return classifier;
}
示例14: distance
import org.apache.mahout.math.Vector; //导入依赖的package包/类
@Override
public double distance(double centroidLengthSquare, Vector centroid, Vector v) {
double lengthSquaredv = v.getLengthSquared();
double dotProduct = v.dot(centroid);
double denominator = Math.sqrt(centroidLengthSquare) * Math.sqrt(lengthSquaredv);
// correct for floating-point rounding errors
if (denominator < dotProduct) {
denominator = dotProduct;
}
// correct for zero-vector corner case
if (denominator == 0 && dotProduct == 0) {
return 1;
}
return 1.0 - dotProduct / denominator;
}
示例15: main
import org.apache.mahout.math.Vector; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
showAuc = true;
showConfusion = true;
Auc collector = new Auc();
LogisticModelParameters lmp = LogisticModelParameters.loadFrom(new File(modelFile));
CsvRecordFactory csv = lmp.getCsvRecordFactory();
OnlineLogisticRegression lr = lmp.createRegression();
BufferedReader in = OnlineLogisticRegressionTest.open(inputFile);
String line = in.readLine();
csv.firstLine(line);
line = in.readLine();
PrintWriter output=new PrintWriter(new OutputStreamWriter(System.out, Charsets.UTF_8), true);
output.println("\"target\",\"model-output\",\"log-likelihood\"");
while (line != null) {
System.out.println("-----" + line);
Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
int target = csv.processLine(line, v);
double score = lr.classifyScalarNoLink(v);
output.printf(Locale.ENGLISH, "%d,%.3f,%.6f%n", target, score, lr.logLikelihood(target, v));
collector.add(target, score);
line = in.readLine();
System.out.println("I am here");
}
output.printf(Locale.ENGLISH, "AUC = %.2f%n", collector.auc());
Matrix m = collector.confusion();
output.printf(Locale.ENGLISH, "confusion: [[%.1f, %.1f], [%.1f, %.1f]]%n",
m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
m = collector.entropy();
output.printf(Locale.ENGLISH, "entropy: [[%.1f, %.1f], [%.1f, %.1f]]%n",
m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1));
}
开发者ID:PacktPublishing,项目名称:Java-Data-Science-Cookbook,代码行数:32,代码来源:OnlineLogisticRegressionTest.java