本文整理汇总了Java中org.apache.mahout.clustering.ModelDistribution类的典型用法代码示例。如果您正苦于以下问题:Java ModelDistribution类的具体用法?Java ModelDistribution怎么用?Java ModelDistribution使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
ModelDistribution类属于org.apache.mahout.clustering包,在下文中一共展示了ModelDistribution类的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: createModelDistribution
import org.apache.mahout.clustering.ModelDistribution; //导入依赖的package包/类
/**
* Create an instance of AbstractVectorModelDistribution from the given command line arguments
*/
public ModelDistribution<VectorWritable> createModelDistribution(Configuration conf) {
AbstractVectorModelDistribution modelDistribution =
ClassUtils.instantiateAs(modelFactory, AbstractVectorModelDistribution.class);
Vector prototype = ClassUtils.instantiateAs(modelPrototype,
Vector.class,
new Class<?>[] {int.class},
new Object[] {prototypeSize});
modelDistribution.setModelPrototype(new VectorWritable(prototype));
if (modelDistribution instanceof DistanceMeasureClusterDistribution) {
DistanceMeasure measure = ClassUtils.instantiateAs(distanceMeasure, DistanceMeasure.class);
measure.configure(conf);
((DistanceMeasureClusterDistribution) modelDistribution).setMeasure(measure);
}
return modelDistribution;
}
示例2: buildClusters
import org.apache.mahout.clustering.ModelDistribution; //导入依赖的package包/类
/**
* Iterate over the input vectors to produce cluster directories for each iteration
*
* @param conf
* the hadoop configuration
* @param input
* the directory Path for input points
* @param output
* the directory Path for output points
* @param description
* model distribution parameters
* @param numClusters
* the number of models to iterate over
* @param maxIterations
* the maximum number of iterations
* @param alpha0
* the alpha_0 value for the DirichletDistribution
* @param runSequential
* execute sequentially if true
*
* @return the Path of the final clusters directory
*/
public static Path buildClusters(Configuration conf, Path input, Path output, DistributionDescription description,
int numClusters, int maxIterations, double alpha0, boolean runSequential) throws IOException,
ClassNotFoundException, InterruptedException {
Path clustersIn = new Path(output, Cluster.INITIAL_CLUSTERS_DIR);
ModelDistribution<VectorWritable> modelDist = description.createModelDistribution(conf);
List<Cluster> models = Lists.newArrayList();
for (Model<VectorWritable> cluster : modelDist.sampleFromPrior(numClusters)) {
models.add((Cluster) cluster);
}
ClusterClassifier prior = new ClusterClassifier(models, new DirichletClusteringPolicy(numClusters, alpha0));
prior.writeToSeqFiles(clustersIn);
if (runSequential) {
ClusterIterator.iterateSeq(conf, input, clustersIn, output, maxIterations);
} else {
ClusterIterator.iterateMR(conf, input, clustersIn, output, maxIterations);
}
return output;
}