当前位置: 首页>>代码示例>>Java>>正文


Java Cluster.CLUSTERS_DIR属性代码示例

本文整理汇总了Java中org.apache.mahout.clustering.Cluster.CLUSTERS_DIR属性的典型用法代码示例。如果您正苦于以下问题:Java Cluster.CLUSTERS_DIR属性的具体用法?Java Cluster.CLUSTERS_DIR怎么用?Java Cluster.CLUSTERS_DIR使用的例子?那么恭喜您, 这里精选的属性代码示例或许可以为您提供帮助。您也可以进一步了解该属性所在org.apache.mahout.clustering.Cluster的用法示例。


在下文中一共展示了Cluster.CLUSTERS_DIR属性的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: buildClustersSeq

/**
 * Build a directory of Canopy clusters from the input vectors and other
 * arguments. Run sequential execution
 * 
 * @param input
 *          the Path to the directory containing input vectors
 * @param output
 *          the Path for all output directories
 * @param measure
 *          the DistanceMeasure
 * @param t1
 *          the double T1 distance metric
 * @param t2
 *          the double T2 distance metric
 * @param clusterFilter
 *          the int minimum size of canopies produced
 * @return the canopy output directory Path
 */
private static Path buildClustersSeq(Path input, Path output,
    DistanceMeasure measure, double t1, double t2, int clusterFilter)
    throws IOException {
  CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
  Collection<Canopy> canopies = Lists.newArrayList();
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(input.toUri(), conf);

  for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
      input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
    clusterer.addPointToCanopies(vw.get(), canopies);
  }

  Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
  Path path = new Path(canopyOutputDir, "part-r-00000");
  SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
      Text.class, ClusterWritable.class);
  ClusterWritable clusterWritable = new ClusterWritable();
  try {
    for (Canopy canopy : canopies) {
      canopy.computeParameters();
      if (log.isDebugEnabled()) {
        log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}",
            new Object[] { canopy.getIdentifier(),
                AbstractCluster.formatVector(canopy.getCenter(), null),
                canopy.getNumObservations(),
                AbstractCluster.formatVector(canopy.getRadius(), null) });
      }
      if (canopy.getNumObservations() > clusterFilter) {
      	clusterWritable.setValue(canopy);
      	writer.append(new Text(canopy.getIdentifier()), clusterWritable);
      }
    }
  } finally {
    Closeables.closeQuietly(writer);
  }
  return canopyOutputDir;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:56,代码来源:CanopyDriver.java

示例2: buildClustersMR

/**
 * Build a directory of Canopy clusters from the input vectors and other
 * arguments. Run mapreduce execution
 * 
 * @param conf
 *          the Configuration
 * @param input
 *          the Path to the directory containing input vectors
 * @param output
 *          the Path for all output directories
 * @param measure
 *          the DistanceMeasure
 * @param t1
 *          the double T1 distance metric
 * @param t2
 *          the double T2 distance metric
 * @param t3
 *          the reducer's double T1 distance metric
 * @param t4
 *          the reducer's double T2 distance metric
 * @param clusterFilter
 *          the int minimum size of canopies produced
 * @return the canopy output directory Path
 */
private static Path buildClustersMR(Configuration conf, Path input,
    Path output, DistanceMeasure measure, double t1, double t2, double t3,
    double t4, int clusterFilter) throws IOException, InterruptedException,
    ClassNotFoundException {
  conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
      .getName());
  conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
  conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
  conf.set(CanopyConfigKeys.T3_KEY, String.valueOf(t3));
  conf.set(CanopyConfigKeys.T4_KEY, String.valueOf(t4));
  conf.set(CanopyConfigKeys.CF_KEY, String.valueOf(clusterFilter));

  Job job = new Job(conf, "Canopy Driver running buildClusters over input: "
      + input);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(CanopyMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(VectorWritable.class);
  job.setReducerClass(CanopyReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(ClusterWritable.class);
  job.setNumReduceTasks(1);
  job.setJarByClass(CanopyDriver.class);

  FileInputFormat.addInputPath(job, input);
  Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
  FileOutputFormat.setOutputPath(job, canopyOutputDir);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("Canopy Job failed processing " + input);
  }
  return canopyOutputDir;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:57,代码来源:CanopyDriver.java

示例3: buildClustersMR

/**
 * Build new clusters using Hadoop
 * 
 */
private static Path buildClustersMR(Configuration conf, Path clustersIn,
    Path output, DistanceMeasure measure, IKernelProfile aKernelProfile,
    double t1, double t2, double convergenceDelta, int maxIterations,
    boolean runClustering) throws IOException, InterruptedException,
    ClassNotFoundException {
  // iterate until the clusters converge
  boolean converged = false;
  int iteration = 1;
  while (!converged && iteration <= maxIterations) {
    int numReducers = Integer.valueOf(conf.get(MAPRED_REDUCE_TASKS, "1"));
    log.info("Mean Shift Iteration: {}, numReducers {}", new Object[] {
        iteration, numReducers });
    // point the output to a new directory per iteration
    Path clustersOut = new Path(output, Cluster.CLUSTERS_DIR + iteration);
    Path controlOut = new Path(output, CONTROL_CONVERGED);
    runIterationMR(conf, clustersIn, clustersOut, controlOut, measure
        .getClass().getName(), aKernelProfile.getClass().getName(), t1, t2,
        convergenceDelta, runClustering);
    converged = FileSystem.get(controlOut.toUri(), conf).exists(controlOut);
    // now point the input to the old output directory
    clustersIn = clustersOut;
    iteration++;
    // decrease the number of reducers if it is > 1 to cross-pollenate
    // map sets
    if (numReducers > 1) {
      numReducers--;
      conf.set(MAPRED_REDUCE_TASKS, String.valueOf(numReducers));
    }
  }
  Path fromPath = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1));
  Path finalClustersIn = new Path(output, Cluster.CLUSTERS_DIR + (iteration-1) + Cluster.FINAL_ITERATION_SUFFIX);
  FileSystem.get(fromPath.toUri(), conf).rename(fromPath, finalClustersIn);
  return finalClustersIn;
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:38,代码来源:MeanShiftCanopyDriver.java

示例4: iterateSeq

/**
 * Iterate over data using a prior-trained ClusterClassifier, for a number of iterations using a sequential
 * implementation
 * 
 * @param conf
 *          the Configuration
 * @param inPath
 *          a Path to input VectorWritables
 * @param priorPath
 *          a Path to the prior classifier
 * @param outPath
 *          a Path of output directory
 * @param numIterations
 *          the int number of iterations to perform
 */
public static void iterateSeq(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
  throws IOException {
  ClusterClassifier classifier = new ClusterClassifier();
  classifier.readFromSeqFiles(conf, priorPath);
  Path clustersOut = null;
  int iteration = 1;
  while (iteration <= numIterations) {
    for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(inPath, PathType.LIST,
        PathFilters.logsCRCFilter(), conf)) {
      Vector vector = vw.get();
      // classification yields probabilities
      Vector probabilities = classifier.classify(vector);
      // policy selects weights for models given those probabilities
      Vector weights = classifier.getPolicy().select(probabilities);
      // training causes all models to observe data
      for (Iterator<Vector.Element> it = weights.iterateNonZero(); it.hasNext();) {
        int index = it.next().index();
        classifier.train(index, vector, weights.get(index));
      }
    }
    // compute the posterior models
    classifier.close();
    // update the policy
    classifier.getPolicy().update(classifier);
    // output the classifier
    clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
    classifier.writeToSeqFiles(clustersOut);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    iteration++;
    if (isConverged(clustersOut, conf, fs)) {
      break;
    }
  }
  Path finalClustersIn = new Path(outPath, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
  FileSystem.get(clustersOut.toUri(), conf).rename(clustersOut, finalClustersIn);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:51,代码来源:ClusterIterator.java

示例5: iterateMR

/**
 * Iterate over data using a prior-trained ClusterClassifier, for a number of iterations using a mapreduce
 * implementation
 * 
 * @param conf
 *          the Configuration
 * @param inPath
 *          a Path to input VectorWritables
 * @param priorPath
 *          a Path to the prior classifier
 * @param outPath
 *          a Path of output directory
 * @param numIterations
 *          the int number of iterations to perform
 */
public static void iterateMR(Configuration conf, Path inPath, Path priorPath, Path outPath, int numIterations)
  throws IOException, InterruptedException, ClassNotFoundException {
  ClusteringPolicy policy = ClusterClassifier.readPolicy(priorPath);
  Path clustersOut = null;
  int iteration = 1;
  while (iteration <= numIterations) {
    conf.set(PRIOR_PATH_KEY, priorPath.toString());
    
    String jobName = "Cluster Iterator running iteration " + iteration + " over priorPath: " + priorPath;
    Job job = new Job(conf, jobName);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(ClusterWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(ClusterWritable.class);
    
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(CIMapper.class);
    job.setReducerClass(CIReducer.class);
    
    FileInputFormat.addInputPath(job, inPath);
    clustersOut = new Path(outPath, Cluster.CLUSTERS_DIR + iteration);
    priorPath = clustersOut;
    FileOutputFormat.setOutputPath(job, clustersOut);
    
    job.setJarByClass(ClusterIterator.class);
    if (!job.waitForCompletion(true)) {
      throw new InterruptedException("Cluster Iteration " + iteration + " failed processing " + priorPath);
    }
    ClusterClassifier.writePolicy(policy, clustersOut);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    iteration++;
    if (isConverged(clustersOut, conf, fs)) {
      break;
    }
  }
  Path finalClustersIn = new Path(outPath, Cluster.CLUSTERS_DIR + (iteration - 1) + Cluster.FINAL_ITERATION_SUFFIX);
  FileSystem.get(clustersOut.toUri(), conf).rename(clustersOut, finalClustersIn);
}
 
开发者ID:saradelrio,项目名称:Chi-FRBCS-BigDataCS,代码行数:54,代码来源:ClusterIterator.java


注:本文中的org.apache.mahout.clustering.Cluster.CLUSTERS_DIR属性示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。