当前位置: 首页>>代码示例>>Java>>正文


Java Vector.iterateNonZero方法代码示例

本文整理汇总了Java中org.apache.mahout.math.Vector.iterateNonZero方法的典型用法代码示例。如果您正苦于以下问题:Java Vector.iterateNonZero方法的具体用法?Java Vector.iterateNonZero怎么用?Java Vector.iterateNonZero使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.mahout.math.Vector的用法示例。


在下文中一共展示了Vector.iterateNonZero方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: trainDocTopicModel

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
  // first calculate p(topic|term,document) for all terms in original, and all topics,
  // using p(term|topic) and p(topic|doc)
  pTopicGivenTerm(original, topics, docTopicModel);
  normalizeByTopic(docTopicModel);
  // now multiply, term-by-term, by the document, to get the weighted distribution of
  // term-topic pairs from this document.
  Iterator<Vector.Element> it = original.iterateNonZero();
  while (it.hasNext()) {
    Vector.Element e = it.next();
    for (int x = 0; x < numTopics; x++) {
      Vector docTopicModelRow = docTopicModel.viewRow(x);
      docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
    }
  }
  // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
  topics.assign(0.0);
  for (int x = 0; x < numTopics; x++) {
    topics.set(x, docTopicModel.viewRow(x).norm(1));
  }
  // now renormalize so that sum_x(p(x|doc)) = 1
  topics.assign(Functions.mult(1/topics.norm(1)));
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:24,代码来源:TopicModel.java

示例2: pTopicGivenTerm

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
 * Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}.
 * {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null},
 * {@code p(a|x)} (also un-normalized).
 *
 * @param document doc-term vector encoding {@code w(term a|document i)}.
 * @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given
 *          document. If {@code null}, a topic weight of {@code 1.0} is used for all topics.
 * @param termTopicDist storage for output {@code p(x|a,i)} distributions.
 */
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
  // for each topic x
  for (int x = 0; x < numTopics; x++) {
    // get p(topic x | document i), or 1.0 if docTopics is null
    double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
    // get w(term a | topic x)
    Vector topicTermRow = topicTermCounts.viewRow(x);
    // get \sum_a w(term a | topic x)
    double topicSum = topicSums.get(x);
    // get p(topic x | term a) distribution to update
    Vector termTopicRow = termTopicDist.viewRow(x);

    // for each term a in document i with non-zero weight
    Iterator<Vector.Element> it = document.iterateNonZero();
    while (it.hasNext()) {
      Vector.Element e = it.next();
      int termIndex = e.index();

      // calc un-normalized p(topic x | term a, document i)
      double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha) / (topicSum + eta * numTerms);
      termTopicRow.set(termIndex, termTopicLikelihood);
    }
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:35,代码来源:TopicModel.java

示例3: iterate

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
 * Iterate over data using a prior-trained ClusterClassifier, for a number of iterations
 *
 * @param data
 *          a {@code List<Vector>} of input vectors
 * @param classifier
 *          a prior ClusterClassifier
 * @param numIterations
 *          the int number of iterations to perform
 * 
 * @return the posterior ClusterClassifier
 */
public static ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier classifier, int numIterations) {
  ClusteringPolicy policy = classifier.getPolicy();
  for (int iteration = 1; iteration <= numIterations; iteration++) {
    for (Vector vector : data) {
      // update the policy based upon the prior
      policy.update(classifier);
      // classification yields probabilities
      Vector probabilities = classifier.classify(vector);
      // policy selects weights for models given those probabilities
      Vector weights = policy.select(probabilities);
      // training causes all models to observe data
      for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
        int index = it.next().index();
        classifier.train(index, vector, weights.get(index));
      }
    }
    // compute the posterior models
    classifier.close();
  }
  return classifier;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:34,代码来源:ClusterIterator.java

示例4: distance

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
public double distance(Vector v1, Vector v2) {
  if (v1.size() != v2.size()) {
    throw new CardinalityException(v1.size(), v2.size());
  }
  double result = 0.0;
  Vector vector = v1.minus(v2);
  Iterator<Vector.Element> iter = vector.iterateNonZero(); 
  while (iter.hasNext()) {
    Vector.Element e = iter.next();
    double d = Math.abs(e.get());
    if (d > result) {
      result = d;
    }
  }
  return result;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:18,代码来源:ChebyshevDistanceMeasure.java

示例5: writeAllAboveThreshold

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
private static void writeAllAboveThreshold(List<Cluster> clusterModels, Double clusterClassificationThreshold,
    SequenceFile.Writer writer, VectorWritable vw, Vector pdfPerCluster) throws IOException {
  Iterator<Element> iterateNonZero = pdfPerCluster.iterateNonZero();
  while (iterateNonZero.hasNext()) {
    Element pdf = iterateNonZero.next();
    if (pdf.get() >= clusterClassificationThreshold) {
      WeightedVectorWritable wvw = new WeightedVectorWritable(pdf.get(), vw.get());
      int clusterIndex = pdf.index();
      write(clusterModels, writer, wvw, clusterIndex);
    }
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:13,代码来源:ClusterClassificationDriver.java

示例6: vectorToSortedString

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public static String vectorToSortedString(Vector vector, String[] dictionary) {
  List<Pair<String,Double>> vectorValues =
      new ArrayList<Pair<String, Double>>(vector.getNumNondefaultElements());
  Iterator<Vector.Element> it = vector.iterateNonZero();
  while (it.hasNext()) {
    Vector.Element e = it.next();
    vectorValues.add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()),
                             e.get()));
  }
  Collections.sort(vectorValues, new Comparator<Pair<String, Double>>() {
    @Override public int compare(Pair<String, Double> x, Pair<String, Double> y) {
      return y.getSecond().compareTo(x.getSecond());
    }
  });
  Iterator<Pair<String,Double>> listIt = vectorValues.iterator();
  StringBuilder bldr = new StringBuilder(2048);
  bldr.append('{');
  int i = 0;
  while (listIt.hasNext() && i < 25) {
    i++;
    Pair<String,Double> p = listIt.next();
    bldr.append(p.getFirst());
    bldr.append(':');
    bldr.append(p.getSecond());
    bldr.append(',');
  }
  if (bldr.length() > 1) {
    bldr.setCharAt(bldr.length() - 1, '}');
  }
  return bldr.toString();
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:32,代码来源:TopicModel.java

示例7: distance

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
public double distance(Vector v1, Vector v2) {
  if (v1.size() != v2.size()) {
    throw new CardinalityException(v1.size(), v2.size());
  }
  double result = 0;
  Vector vector = v1.minus(v2);
  Iterator<Vector.Element> iter = vector.iterateNonZero(); 
  // this contains all non zero elements between the two
  while (iter.hasNext()) {
    Vector.Element e = iter.next();
    result += Math.abs(e.get());
  }
  return result;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:16,代码来源:ManhattanDistanceMeasure.java

示例8: map

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context) throws IOException,
    InterruptedException {
  Vector probabilities = classifier.classify(value.get());
  Vector selections = policy.select(probabilities);
  for (Iterator<Element> it = selections.iterateNonZero(); it.hasNext();) {
    Element el = it.next();
    classifier.train(el.index(), value.get(), el.get());
  }
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:11,代码来源:CIMapper.java

示例9: map

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context context)
  throws IOException, InterruptedException {
  Vector vector = value.get();
  Iterator<Vector.Element> it = vector.iterateNonZero();

  while (it.hasNext()) {
    Vector.Element e = it.next();
    context.write(new IntWritable(e.index()), ONE);
  }
  context.write(TOTAL_COUNT, ONE);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:13,代码来源:TermDocumentCountMapper.java

示例10: reduce

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
@Override
protected void reduce(WritableComparable<?> key, Iterable<VectorWritable> values, Context context)
        throws IOException, InterruptedException {
  Iterator<VectorWritable> it = values.iterator();
  if (!it.hasNext()) {
    return;
  }
  Vector value = it.next().get();
  Vector vector = value.clone();
  if (maxDf > -1) {
    Iterator<Vector.Element> it1 = value.iterateNonZero();
    while (it1.hasNext()) {
      Vector.Element e = it1.next();
      if (!dictionary.containsKey(e.index())) {
        vector.setQuick(e.index(), 0.0);
        continue;
      }
      long df = dictionary.get(e.index());
      if (df > maxDf) {
        vector.setQuick(e.index(), 0.0);
      }
    }
  }

  VectorWritable vectorWritable = new VectorWritable(vector);
  context.write(key, vectorWritable);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Max,代码行数:28,代码来源:WordsPrunerReducer.java

示例11: distance

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
/**
 *  Math.pow is clever about integer-valued doubles
 */
@Override
public double distance(Vector v1, Vector v2) {
  Vector distVector = v1.minus(v2);
  double sum = 0.0;
  Iterator<Element> it = distVector.iterateNonZero();
  while (it.hasNext()) {
    Element e = it.next();
    sum += Math.pow(Math.abs(e.get()), exponent);
  }
  return Math.pow(sum, 1.0 / exponent);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:15,代码来源:MinkowskiDistanceMeasure.java

示例12: infer

import org.apache.mahout.math.Vector; //导入方法依赖的package包/类
public Vector infer(Vector original, Vector docTopics) {
  Vector pTerm = original.like();
  Iterator<Vector.Element> it = original.iterateNonZero();
  while (it.hasNext()) {
    Vector.Element e = it.next();
    int term = e.index();
    // p(a) = sum_x (p(a|x) * p(x|i))
    double pA = 0;
    for (int x = 0; x < numTopics; x++) {
      pA += (topicTermCounts.viewRow(x).get(term) / topicSums.get(x)) * docTopics.get(x);
    }
    pTerm.set(term, pA);
  }
  return pTerm;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigData-Ave,代码行数:16,代码来源:TopicModel.java


注:本文中的org.apache.mahout.math.Vector.iterateNonZero方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。