本文整理汇总了Java中org.apache.mahout.math.Vector.iterateNonZero方法的典型用法代码示例。如果您正苦于以下问题:Java Vector.iterateNonZero方法的具体用法?Java Vector.iterateNonZero怎么用?Java Vector.iterateNonZero使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.mahout.math.Vector
的用法示例。
在下文中一共展示了Vector.iterateNonZero方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: trainDocTopicModel
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
// first calculate p(topic|term,document) for all terms in original, and all topics,
// using p(term|topic) and p(topic|doc)
pTopicGivenTerm(original, topics, docTopicModel);
normalizeByTopic(docTopicModel);
// now multiply, term-by-term, by the document, to get the weighted distribution of
// term-topic pairs from this document.
Iterator<Vector.Element> it = original.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
for (int x = 0; x < numTopics; x++) {
Vector docTopicModelRow = docTopicModel.viewRow(x);
docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
}
}
// now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
topics.assign(0.0);
for (int x = 0; x < numTopics; x++) {
topics.set(x, docTopicModel.viewRow(x).norm(1));
}
// now renormalize so that sum_x(p(x|doc)) = 1
topics.assign(Functions.mult(1/topics.norm(1)));
}
示例2: pTopicGivenTerm
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
* Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}.
* {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null},
* {@code p(a|x)} (also un-normalized).
*
* @param document doc-term vector encoding {@code w(term a|document i)}.
* @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given
* document. If {@code null}, a topic weight of {@code 1.0} is used for all topics.
* @param termTopicDist storage for output {@code p(x|a,i)} distributions.
*/
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
// for each topic x
for (int x = 0; x < numTopics; x++) {
// get p(topic x | document i), or 1.0 if docTopics is null
double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
// get w(term a | topic x)
Vector topicTermRow = topicTermCounts.viewRow(x);
// get \sum_a w(term a | topic x)
double topicSum = topicSums.get(x);
// get p(topic x | term a) distribution to update
Vector termTopicRow = termTopicDist.viewRow(x);
// for each term a in document i with non-zero weight
Iterator<Vector.Element> it = document.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
int termIndex = e.index();
// calc un-normalized p(topic x | term a, document i)
double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms);
termTopicRow.set(termIndex, termTopicLikelihood);
}
}
}
示例3: iterate
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
* Iterate over data using a prior-trained ClusterClassifier, for a number of iterations
*
* @param data
* a {@code List<Vector>} of input vectors
* @param classifier
* a prior ClusterClassifier
* @param numIterations
* the int number of iterations to perform
*
* @return the posterior ClusterClassifier
*/
public static ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
ClusteringPolicy policy = classifier.getPolicy();
for (int iteration = 1; iteration <= numIterations; iteration++) {
for (Vector vector : data) {
// update the policy based upon the prior
policy.update(classifier);
// classification yields probabilities
Vector probabilities = classifier.classify(vector);
// policy selects weights for models given those probabilities
Vector weights = policy.select(probabilities);
// training causes all models to observe data
for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
int index = it.next().index();
classifier.train(index, vector, weights.get(index));
}
}
// compute the posterior models
classifier.close();
}
return classifier;
}
示例4: distance
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
public double distance(Vector v1, Vector v2) {
if (v1.size() != v2.size()) {
throw new CardinalityException(v1.size(), v2.size());
}
double result = 0.0;
Vector vector = v1.minus(v2);
Iterator<Vector.Element> iter = vector.iterateNonZero();
while (iter.hasNext()) {
Vector.Element e = iter.next();
double d = Math.abs(e.get());
if (d > result) {
result = d;
}
}
return result;
}
示例5: writeAllAboveThreshold
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold,
SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
Iterator<Element> iterateNonZero = pdfPerCluster.iterateNonZero();
while (iterateNonZero.hasNext()) {
Element pdf = iterateNonZero.next();
if (pdf.get() >= clusterClassificationThreshold) {
WeightedVectorWritable wvw = new WeightedVectorWritable(pdf.get(), vw.get());
int clusterIndex = pdf.index();
write(clusterModels, writer, wvw, clusterIndex);
}
}
}
示例6: vectorToSortedString
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public static String vectorToSortedString(Vector vector, String[] dictionary) {
List<Pair<String,Double>> vectorValues =
new ArrayList<Pair<String, Double>>(vector.getNumNondefaultElements());
Iterator<Vector.Element> it = vector.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
vectorValues.add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()),
e.get()));
}
Collections.sort(vectorValues, new Comparator<Pair<String, Double>>() {
@Override public int compare(Pair<String, Double> x, Pair<String, Double> y) {
return y.getSecond().compareTo(x.getSecond());
}
});
Iterator<Pair<String,Double>> listIt = vectorValues.iterator();
StringBuilder bldr = new StringBuilder(2048);
bldr.append('{');
int i = 0;
while (listIt.hasNext() && i < 25) {
i++;
Pair<String,Double> p = listIt.next();
bldr.append(p.getFirst());
bldr.append(':');
bldr.append(p.getSecond());
bldr.append(',');
}
if (bldr.length() > 1) {
bldr.setCharAt(bldr.length() - 1, '}');
}
return bldr.toString();
}
示例7: distance
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
public double distance(Vector v1, Vector v2) {
if (v1.size() != v2.size()) {
throw new CardinalityException(v1.size(), v2.size());
}
double result = 0;
Vector vector = v1.minus(v2);
Iterator<Vector.Element> iter = vector.iterateNonZero();
// this contains all non zero elements between the two
while (iter.hasNext()) {
Vector.Element e = iter.next();
result += Math.abs(e.get());
}
return result;
}
示例8: map
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException,
InterruptedException {
Vector probabilities = classifier.classify(value.get());
Vector selections = policy.select(probabilities);
for (Iterator<Element> it = selections.iterateNonZero(); it.hasNext();) {
Element el = it.next();
classifier.train(el.index(), value.get(), el.get());
}
}
示例9: map
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context)
throws IOException, InterruptedException {
Vector vector = value.get();
Iterator<Vector.Element> it = vector.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
context.write(new IntWritable(e.index()), ONE);
}
context.write(TOTAL_COUNT, ONE);
}
示例10: reduce
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context)
throws IOException, InterruptedException {
Iterator<VectorWritable> it = values.iterator();
if (!it.hasNext()) {
return;
}
Vector value = it.next().get();
Vector vector = value.clone();
if (maxDf > -1) {
Iterator<Vector.Element> it1 = value.iterateNonZero();
while (it1.hasNext()) {
Vector.Element e = it1.next();
if (!dictionary.containsKey(e.index())) {
vector.setQuick(e.index(), 0.0);
continue;
}
long df = dictionary.get(e.index());
if (df > maxDf) {
vector.setQuick(e.index(), 0.0);
}
}
}
VectorWritable vectorWritable = new VectorWritable(vector);
context.write(key, vectorWritable);
}
示例11: distance
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
* Math.pow is clever about integer-valued doubles
*/
@Override
public double distance(Vector v1, Vector v2) {
Vector distVector = v1.minus(v2);
double sum = 0.0;
Iterator<Element> it = distVector.iterateNonZero();
while (it.hasNext()) {
Element e = it.next();
sum += Math.pow(Math.abs(e.get()), exponent);
}
return Math.pow(sum, 1.0 / exponent);
}
示例12: infer
import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public Vector infer(Vector original, Vector docTopics) {
Vector pTerm = original.like();
Iterator<Vector.Element> it = original.iterateNonZero();
while (it.hasNext()) {
Vector.Element e = it.next();
int term = e.index();
// p(a) = sum_x (p(a|x) * p(x|i))
double pA = 0;
for (int x = 0; x < numTopics; x++) {
pA += (topicTermCounts.viewRow(x).get(term) / topicSums.get(x)) * docTopics.get(x);
}
pTerm.set(term, pA);
}
return pTerm;
}