本文整理汇总了Java中org.apache.mahout.math.Matrix类的典型用法代码示例。如果您正苦于以下问题:Java Matrix类的具体用法?Java Matrix怎么用?Java Matrix使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Matrix类属于org.apache.mahout.math包,在下文中一共展示了Matrix类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runSequential
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/***
* PPCA: sequential PPCA based on the paper from Tipping and Bishop
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential(Configuration conf, Path input, Path output,
final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix centralC = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(centralC, ss);
// Matrix sampledYe = sample(centralY);
// runSequential(conf, sampledYe, initVal, 100);
double error = runSequential(conf, centralY, initVal, 100);
return error;
}
示例2: runSequential_JacobVersion
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* PPCA: sequential PPCA based on the matlab implementation of Jacob Verbeek
*
* @param conf
* the configuration
* @param input
* the path to the input matrix Y
* @param output
* the output path (not used currently)
* @param nRows
* number or rows in Y
* @param nCols
* number of columns in Y
* @param nPCs
* number of desired principal components
* @return the error
* @throws Exception
*/
double runSequential_JacobVersion(Configuration conf, Path input,
Path output, final int nRows, final int nCols, final int nPCs) throws Exception {
Matrix centralY = new DenseMatrix(nRows, nCols);
FileSystem fs = FileSystem.get(input.toUri(), conf);
if (fs.listStatus(input).length == 0) {
System.err.println("No file under " + input);
return 0;
}
int row = 0;
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, null, conf)) {
centralY.assignRow(row, vw.get());
row++;
}
Matrix C = PCACommon.randomMatrix(nCols, nPCs);
double ss = PCACommon.randSS();
InitialValues initVal = new InitialValues(C, ss);
double error = runSequential_JacobVersion(conf, centralY, initVal, 100);
return error;
}
示例3: pTopicGivenTerm
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}.
* {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null},
* {@code p(a|x)} (also un-normalized).
*
* @param document doc-term vector encoding {@code w(term a|document i)}.
* @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given
* document. If {@code null}, a topic weight of {@code 1.0} is used for all topics.
* @param termTopicDist storage for output {@code p(x|a,i)} distributions.
*/
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
// for each topic x
for (int x = 0; x < numTopics; x++) {
// get p(topic x | document i), or 1.0 if docTopics is null
double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
// get w(term a | topic x)
Vector topicTermRow = topicTermCounts.viewRow(x);
// get \sum_a w(term a | topic x)
double topicSum = topicSums.get(x);
// get p(topic x | term a) distribution to update
Vector termTopicRow = termTopicDist.viewRow(x);
// for each term a in document i with non-zero weight
Iterator<Vector.Element> it = document.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
int termIndex = e.index();
// calc un-normalized p(topic x | term a, document i)
double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms);
termTopicRow.set(termIndex, termTopicLikelihood);
}
}
}
示例4: writeMatrix
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
static void writeMatrix(Matrix origMatrix,
Path outPath, Path tmpPath, String label) throws IOException {
Configuration conf = new Configuration();
Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x"
+ origMatrix.numCols());
FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
if (!fs.exists(outputDir)) {
Path outputFile = new Path(outputDir, "singleSliceMatrix");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf,
outputFile, IntWritable.class, VectorWritable.class);
VectorWritable vectorWritable = new VectorWritable();
try {
for (int r = 0; r < origMatrix.numRows(); r++) {
Vector vector = origMatrix.viewRow(r);
vectorWritable.set(vector);
writer.append(new IntWritable(r), vectorWritable);
}
} finally {
writer.close();
}
} else {
log.warn("----------- Skip matrix " + outputDir + " - already exists");
}
}
示例5: setCovarianceMatrix
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Computes the inverse covariance from the input covariance matrix given in input.
*
* @param m A covariance matrix.
* @throws IllegalArgumentException if <tt>eigen values equal to 0 found</tt>.
*/
public void setCovarianceMatrix(Matrix m) {
if (m.numRows() != m.numCols()) {
throw new CardinalityException(m.numRows(), m.numCols());
}
// See http://www.mlahanas.de/Math/svd.htm for details,
// which specifically details the case of covariance matrix inversion
// Complexity: O(min(nm2,mn2))
SingularValueDecomposition svd = new SingularValueDecomposition(m);
Matrix sInv = svd.getS();
// Inverse Diagonal Elems
for (int i = 0; i < sInv.numRows(); i++) {
double diagElem = sInv.get(i, i);
if (diagElem > 0.0) {
sInv.set(i, i, 1 / diagElem);
} else {
throw new IllegalStateException("Eigen Value equals to 0 found.");
}
}
inverseCovarianceMatrix = svd.getU().times(sInv.times(svd.getU().transpose()));
Preconditions.checkArgument(inverseCovarianceMatrix != null, "inverseCovarianceMatrix not initialized");
}
示例6: getMatrix
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
public Matrix getMatrix() {
int length = confusionMatrix.length;
Matrix m = new DenseMatrix(length, length);
for (int r = 0; r < length; r++) {
for (int c = 0; c < length; c++) {
m.set(r, c, confusionMatrix[r][c]);
}
}
Map<String,Integer> labels = Maps.newHashMap();
for (Map.Entry<String, Integer> entry : labelMap.entrySet()) {
labels.put(entry.getKey(), entry.getValue());
}
m.setRowLabelBindings(labels);
m.setColumnLabelBindings(labels);
return m;
}
示例7: updateXtXAndYtx
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/***
* Mi = (Yi-Ym)' x (Xi-Xm) = Yi' x (Xi-Xm) - Ym' x (Xi-Xm)
*
* M = Sum(Mi) = Sum(Yi' x (Xi-Xm)) - Ym' x (Sum(Xi)-N*Xm)
*
* The second part is done in this function
*/
public static Matrix updateXtXAndYtx(Matrix realCentralYtx,
Vector realCentralSumX, Vector ym, Vector xm, int nRows) {
for (int yRow = 0; yRow < ym.size(); yRow++) {
double scale = ym.getQuick(yRow);
for (int xCol = 0; xCol < realCentralSumX.size(); xCol++) {
double centeredValue = realCentralSumX.getQuick(xCol) - nRows
* xm.getQuick(xCol);
double currValue = realCentralYtx.getQuick(yRow, xCol);
currValue -= centeredValue * scale;
realCentralYtx.setQuick(yRow, xCol, currValue);
}
}
return realCentralYtx;
}
示例8: sparseVectorTimesMatrix
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* multiply a sparse vector by a matrix
* @param sparseVector
* @param matrix
* @param resArray
*/
static void sparseVectorTimesMatrix(org.apache.spark.mllib.linalg.Vector sparseVector, Matrix matrix,
double[] resArray) {
int matrixCols = matrix.numCols();
int[] indices;
for (int col = 0; col < matrixCols; col++)
{
indices=((SparseVector)sparseVector).indices();
int index = 0, i=0;
double value = 0;
double dotRes = 0;
for(i=0; i <indices.length; i++)
{
index=indices[i];
value=sparseVector.apply(index);
dotRes += matrix.getQuick(index,col) * value;
}
resArray[col] = dotRes;
}
}
示例9: convertMahoutToSparkMatrix
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Convert org.apache.mahout.math.Matrix object to org.apache.spark.mllib.linalg.Matrix object to be used in Spark Programs
*/
public static org.apache.spark.mllib.linalg.Matrix convertMahoutToSparkMatrix(Matrix mahoutMatrix)
{
int rows=mahoutMatrix.numRows();
int cols=mahoutMatrix.numCols();
int arraySize= rows*cols;
int arrayIndex=0;
double[] colMajorArray= new double[arraySize];
for(int i=0;i<cols; i++)
{
for(int j=0; j< rows; j++)
{
colMajorArray[arrayIndex] = mahoutMatrix.get(j, i);
arrayIndex++;
}
}
org.apache.spark.mllib.linalg.Matrix sparkMatrix = Matrices.dense(rows, cols, colMajorArray);
return sparkMatrix;
}
示例10: printMatrixToFile
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Writes the matrix to file based on the given format format
*/
public static void printMatrixToFile(org.apache.spark.mllib.linalg.Matrix m, OutputFormat format, String outputPath) {
String outputFilePath=outputPath+ File.separator + "PCs.txt";
switch(format)
{
case DENSE:
printMatrixInDenseTextFormat(m,outputFilePath);
break;
case LIL:
printMatrixInListOfListsFormat(m,outputFilePath);
break;
case COO:
printMatrixInCoordinateFormat(m,outputFilePath);
break;
}
}
示例11: printMatrixInDenseTextFormat
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Writes the matrix in a Dense text format
*/
public static void printMatrixInDenseTextFormat(org.apache.spark.mllib.linalg.Matrix m, String outputPath) {
try {
FileWriter fileWriter = new FileWriter(outputPath);
PrintWriter printWriter= new PrintWriter(fileWriter);
for(int i=0; i < m.numRows(); i++)
{
for(int j=0; j < m.numCols(); j++)
{
printWriter.print(m.apply(i, j) + " ");
}
printWriter.println();
}
printWriter.close();
fileWriter.close();
}
catch (Exception e) {
Log.error("Output file " + outputPath + " not found ");
}
}
示例12: printMatrixInCoordinateFormat
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
/**
* Writes the matrix in a Coordinate list (COO) format
*/
public static void printMatrixInCoordinateFormat(org.apache.spark.mllib.linalg.Matrix m, String outputPath) {
try
{
FileWriter fileWriter = new FileWriter(outputPath);
PrintWriter printWriter= new PrintWriter(fileWriter);
double val;
for(int i=0; i < m.numRows(); i++)
{
for(int j=0; j < m.numCols(); j++)
{
val=m.apply(i, j);
if(val!=0)
printWriter.println(i + "," + j + "," + val);
}
}
printWriter.close();
fileWriter.close();
}
catch (Exception e) {
Log.error("Output file " + outputPath + " not found ");
}
}
示例13: TopicModel
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha,
String[] dictionary, int numThreads, double modelWeight) {
this.dictionary = dictionary;
this.topicTermCounts = topicTermCounts;
this.topicSums = topicSums;
this.numTopics = topicSums.size();
this.numTerms = topicTermCounts.numCols();
this.eta = eta;
this.alpha = alpha;
this.sampler = new Sampler(RandomUtils.getRandom());
this.numThreads = numThreads;
if (modelWeight != 1) {
topicSums.assign(Functions.mult(modelWeight));
for (int x = 0; x < numTopics; x++) {
topicTermCounts.viewRow(x).assign(Functions.mult(modelWeight));
}
}
initializeThreadPool();
}
示例14: trainDocTopicModel
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
// first calculate p(topic|term,document) for all terms in original, and all topics,
// using p(term|topic) and p(topic|doc)
pTopicGivenTerm(original, topics, docTopicModel);
normalizeByTopic(docTopicModel);
// now multiply, term-by-term, by the document, to get the weighted distribution of
// term-topic pairs from this document.
Iterator<Vector.Element> it = original.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
for (int x = 0; x < numTopics; x++) {
Vector docTopicModelRow = docTopicModel.viewRow(x);
docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
}
}
// now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
topics.assign(0.0);
for (int x = 0; x < numTopics; x++) {
topics.set(x, docTopicModel.viewRow(x).norm(1));
}
// now renormalize so that sum_x(p(x|doc)) = 1
topics.assign(Functions.mult(1/topics.norm(1)));
}
示例15: normalizeByTopic
import org.apache.mahout.math.Matrix; //导入依赖的package包/类
private void normalizeByTopic(Matrix perTopicSparseDistributions) {
Iterator<Vector.Element> it = perTopicSparseDistributions.viewRow(0).iterateNonZero();
// then make sure that each of these is properly normalized by topic: sum_x(p(x|t,d)) = 1
while (it.hasNext()) {
Vector.Element e = it.next();
int a = e.index();
double sum = 0;
for (int x = 0; x < numTopics; x++) {
sum += perTopicSparseDistributions.viewRow(x).get(a);
}
for (int x = 0; x < numTopics; x++) {
perTopicSparseDistributions.viewRow(x).set(a,
perTopicSparseDistributions.viewRow(x).get(a) / sum);
}
}
}