本文整理汇总了Java中org.apache.mahout.clustering.Cluster.CLUSTERS_DIR属性的典型用法代码示例。如果您正苦于以下问题:Java Cluster.CLUSTERS_DIR属性的具体用法?Java Cluster.CLUSTERS_DIR怎么用?Java Cluster.CLUSTERS_DIR使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在类org.apache.mahout.clustering.Cluster
的用法示例。
在下文中一共展示了Cluster.CLUSTERS_DIR属性的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: buildClustersSeq
/**
* Build a directory of Canopy clusters from the input vectors and other
* arguments. Run sequential execution
*
* @param input
* the Path to the directory containing input vectors
* @param output
* the Path for all output directories
* @param measure
* the DistanceMeasure
* @param t1
* the double T1 distance metric
* @param t2
* the double T2 distance metric
* @param clusterFilter
* the int minimum size of canopies produced
* @return the canopy output directory Path
*/
private static Path buildClustersSeq(Path input, Path output,
DistanceMeasure measure, double t1, double t2, int clusterFilter)
throws IOException {
CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
Collection<Canopy> canopies = Lists.newArrayList();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
clusterer.addPointToCanopies(vw.get(), canopies);
}
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
Path path = new Path(canopyOutputDir, "part-r-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Text.class, ClusterWritable.class);
ClusterWritable clusterWritable = new ClusterWritable();
try {
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (log.isDebugEnabled()) {
log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}",
new Object[] { canopy.getIdentifier(),
AbstractCluster.formatVector(canopy.getCenter(), null),
canopy.getNumObservations(),
AbstractCluster.formatVector(canopy.getRadius(), null) });
}
if (canopy.getNumObservations() > clusterFilter) {
clusterWritable.setValue(canopy);
writer.append(new Text(canopy.getIdentifier()), clusterWritable);
}
}
} finally {
Closeables.closeQuietly(writer);
}
return canopyOutputDir;
}
示例2: buildClustersMR
/**
* Build a directory of Canopy clusters from the input vectors and other
* arguments. Run mapreduce execution
*
* @param conf
* the Configuration
* @param input
* the Path to the directory containing input vectors
* @param output
* the Path for all output directories
* @param measure
* the DistanceMeasure
* @param t1
* the double T1 distance metric
* @param t2
* the double T2 distance metric
* @param t3
* the reducer's double T1 distance metric
* @param t4
* the reducer's double T2 distance metric
* @param clusterFilter
* the int minimum size of canopies produced
* @return the canopy output directory Path
*/
private static Path buildClustersMR(Configuration conf, Path input,
Path output, DistanceMeasure measure, double t1, double t2, double t3,
double t4, int clusterFilter) throws IOException, InterruptedException,
ClassNotFoundException {
conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
.getName());
conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
conf.set(CanopyConfigKeys.T3_KEY, String.valueOf(t3));
conf.set(CanopyConfigKeys.T4_KEY, String.valueOf(t4));
conf.set(CanopyConfigKeys.CF_KEY, String.valueOf(clusterFilter));
Job job = new Job(conf, "Canopy Driver running buildClusters over input: "
+ input);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CanopyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(VectorWritable.class);
job.setReducerClass(CanopyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(ClusterWritable.class);
job.setNumReduceTasks(1);
job.setJarByClass(CanopyDriver.class);
FileInputFormat.addInputPath(job, input);
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
FileOutputFormat.setOutputPath(job, canopyOutputDir);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Canopy Job failed processing " + input);
}
return canopyOutputDir;
}
示例3: buildClustersMR
/**
* Build new clusters using Hadoop
*
*/
private static Path buildClustersMR(Configuration conf, Path clustersIn,
Path output, DistanceMeasure measure, IKernelProfile aKernelProfile,
double t1, double t2, double convergenceDelta, int maxIterations,
boolean runClustering) throws IOException, InterruptedException,
ClassNotFoundException {
// iterate until the clusters converge
boolean converged = false;
int iteration = 1;
while (!converged && iteration <= maxIterations) {
int numReducers = Integer.valueOf(conf.get(MAPRED_REDUCE_TASKS, "1"));
log.info("Mean Shift Iteration: {}, numReducers {}", new Object[] {
iteration, numReducers });
// point the output to a new directory per iteration
Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
Path controlOut = new Path(output, CONTROL_CONVERGED);
runIterationMR(conf, clustersIn, clustersOut, controlOut, measure
.getClass().getName(), aKernelProfile.getClass().getName(), t1, t2,
convergenceDelta, runClustering);
converged = FileSystem.get(controlOut.toUri(), conf).exists(controlOut);
// now point the input to the old output directory
clustersIn = clustersOut;
iteration++;
// decrease the number of reducers if it is > 1 to cross-pollenate
// map sets
if (numReducers > 1) {
numReducers--;
conf.set(MAPRED_REDUCE_TASKS, String.valueOf(numReducers));
}
}
Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
return finalClustersIn;
}
示例4: iterateSeq
/**
* Iterate over data using a prior-trained ClusterClassifier, for a number of iterations using a sequential
* implementation
*
* @param conf
* the Configuration
* @param inPath
* a Path to input VectorWritables
* @param priorPath
* a Path to the prior classifier
* @param outPath
* a Path of output directory
* @param numIterations
* the int number of iterations to perform
*/
public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
throws IOException {
ClusterClassifier classifier = new ClusterClassifier();
classifier.readFromSeqFiles(conf, priorPath);
Path clustersOut = null;
int iteration = 1;
while (iteration <= numIterations) {
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(inPath, PathType.LIST,
PathFilters.logsCRCFilter(), conf)) {
Vector vector = vw.get();
// classification yields probabilities
Vector probabilities = classifier.classify(vector);
// policy selects weights for models given those probabilities
Vector weights = classifier.getPolicy().select(probabilities);
// training causes all models to observe data
for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
int index = it.next().index();
classifier.train(index, vector, weights.get(index));
}
}
// compute the posterior models
classifier.close();
// update the policy
classifier.getPolicy().update(classifier);
// output the classifier
clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
classifier.writeToSeqFiles(clustersOut);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
iteration++;
if (isConverged(clustersOut, conf, fs)) {
break;
}
}
Path finalClustersIn = new Path(outPath, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
FileSystem.get(clustersOut.toUri(), conf).rename(clustersOut, finalClustersIn);
}
示例5: iterateMR
/**
* Iterate over data using a prior-trained ClusterClassifier, for a number of iterations using a mapreduce
* implementation
*
* @param conf
* the Configuration
* @param inPath
* a Path to input VectorWritables
* @param priorPath
* a Path to the prior classifier
* @param outPath
* a Path of output directory
* @param numIterations
* the int number of iterations to perform
*/
public static void iterateMR(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
throws IOException, InterruptedException, ClassNotFoundException {
ClusteringPolicy policy = ClusterClassifier.readPolicy(priorPath);
Path clustersOut = null;
int iteration = 1;
while (iteration <= numIterations) {
conf.set(PRIOR_PATH_KEY, priorPath.toString());
String jobName = "Cluster Iterator running iteration " + iteration + " over priorPath: " + priorPath;
Job job = new Job(conf, jobName);
job.setMapOutputKeyClass(IntWritable.class);
job.setMapOutputValueClass(ClusterWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(ClusterWritable.class);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CIMapper.class);
job.setReducerClass(CIReducer.class);
FileInputFormat.addInputPath(job, inPath);
clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
priorPath = clustersOut;
FileOutputFormat.setOutputPath(job, clustersOut);
job.setJarByClass(ClusterIterator.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Cluster Iteration " + iteration + " failed processing " + priorPath);
}
ClusterClassifier.writePolicy(policy, clustersOut);
FileSystem fs = FileSystem.get(outPath.toUri(), conf);
iteration++;
if (isConverged(clustersOut, conf, fs)) {
break;
}
}
Path finalClustersIn = new Path(outPath, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
FileSystem.get(clustersOut.toUri(), conf).rename(clustersOut, finalClustersIn);
}