Java ClusterWritable类代码示例

本文整理汇总了Java中org.apache.mahout.clustering.iterator.ClusterWritable类的典型用法代码示例。如果您正苦于以下问题：Java ClusterWritable类的具体用法？Java ClusterWritable怎么用？Java ClusterWritable使用的例子？那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

ClusterWritable类属于org.apache.mahout.clustering.iterator包，在下文中一共展示了ClusterWritable类的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: reduce

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
    Context context) throws IOException, InterruptedException {
  for (VectorWritable value : values) {
    Vector point = value.get();
    canopyClusterer.addPointToCanopies(point, canopies);
  }
  for (Canopy canopy : canopies) {
    ClusterWritable clusterWritable = new ClusterWritable();
    canopy.computeParameters();
    if (canopy.getNumObservations() > clusterFilter) {
  	clusterWritable.setValue(canopy);
      context.write(new Text(canopy.getIdentifier()), clusterWritable);
    }
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:17，代码来源:CanopyReducer.java

示例2: configureWithClusterInfo

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Create a list of SoftClusters from whatever type is passed in as the prior
 * 
 * @param conf
 *          the Configuration
 * @param clusterPath
 *          the path to the prior Clusters
 * @param clusters
 *          a List<Cluster> to put values into
 */
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, List<Cluster> clusters) {
  for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
      PathFilters.partFilter(), conf)) {
    Class<? extends Writable> valueClass = value.getClass();
    
    if (valueClass.equals(ClusterWritable.class)) {
      ClusterWritable clusterWritable = (ClusterWritable) value;
      value = clusterWritable.getValue();
      valueClass = value.getClass();
    }
    
    if (valueClass.equals(Kluster.class)) {
      // get the cluster info
      Kluster cluster = (Kluster) value;
      clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
    } else if (valueClass.equals(SoftCluster.class)) {
      // get the cluster info
      clusters.add((SoftCluster) value);
    } else if (valueClass.equals(Canopy.class)) {
      // get the cluster info
      Canopy canopy = (Canopy) value;
      clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
    } else {
      throw new IllegalStateException("Bad value class: " + valueClass);
    }
  }
  
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:39，代码来源:FuzzyKMeansUtil.java

示例3: configureWithClusterInfo

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Create a list of Klusters from whatever Cluster type is passed in as the prior
 * 
 * @param conf
 *          the Configuration
 * @param clusterPath
 *          the path to the prior Clusters
 * @param clusters
 *          a List<Cluster> to put values into
 */
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, Collection<Cluster> clusters) {
  for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
      PathFilters.partFilter(), conf)) {
    Class<? extends Writable> valueClass = value.getClass();
    if (valueClass.equals(ClusterWritable.class)) {
      ClusterWritable clusterWritable = (ClusterWritable) value;
      value = clusterWritable.getValue();
      valueClass = value.getClass();
    }
    log.debug("Read 1 Cluster from {}", clusterPath);
    
    if (valueClass.equals(Kluster.class)) {
      // get the cluster info
      clusters.add((Kluster) value);
    } else if (valueClass.equals(Canopy.class)) {
      // get the cluster info
      Canopy canopy = (Canopy) value;
      clusters.add(new Kluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
    } else {
      throw new IllegalStateException("Bad value class: " + valueClass);
    }
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:34，代码来源:KMeansUtil.java

示例4: writeToSeqFiles

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public void writeToSeqFiles(Path path) throws IOException {
  writePolicy(policy, path);
  Configuration config = new Configuration();
  FileSystem fs = FileSystem.get(path.toUri(), config);
  SequenceFile.Writer writer = null;
  ClusterWritable cw = new ClusterWritable();
  for (int i = 0; i < models.size(); i++) {
    try {
      Cluster cluster = models.get(i);
      cw.setValue(cluster);
      writer = new SequenceFile.Writer(fs, config,
          new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
          ClusterWritable.class);
      Writable key = new IntWritable(i);
      writer.append(key, cw);
    } finally {
      Closeables.closeQuietly(writer);
    }
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:21，代码来源:ClusterClassifier.java

示例5: createCanopyFromVectorsSeq

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Convert vectors to MeanShiftCanopies sequentially
 * 
 * @param input
 *          the Path to the input VectorWritable data
 * @param output
 *          the Path to the initial clusters directory
 * @param measure
 *          the DistanceMeasure
 */
private static void createCanopyFromVectorsSeq(Path input, Path output,
    DistanceMeasure measure) throws IOException {
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(input.toUri(), conf);
  FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
  int part = 0;
  int id = 0;
  for (FileStatus s : status) {
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(
        output, "part-m-" + part++), Text.class, ClusterWritable.class);
    try {
      for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(
          s.getPath(), conf)) {
        MeanShiftCanopy initialCanopy = MeanShiftCanopy.initialCanopy(value.get(),
		      id++, measure);
        ClusterWritable clusterWritable = new ClusterWritable();
        clusterWritable.setValue(initialCanopy);
  writer.append(new Text(), clusterWritable);
      }
    } finally {
      Closeables.closeQuietly(writer);
    }
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:35，代码来源:MeanShiftCanopyDriver.java

示例6: createCanopyFromVectorsMR

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Convert vectors to MeanShiftCanopies using Hadoop
 */
private static void createCanopyFromVectorsMR(Configuration conf, Path input,
    Path output, DistanceMeasure measure) throws IOException,
    InterruptedException, ClassNotFoundException {
  conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
      .getName());
  Job job = new Job(conf);
  job.setJarByClass(MeanShiftCanopyDriver.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(ClusterWritable.class);
  job.setMapperClass(MeanShiftCanopyCreatorMapper.class);
  job.setNumReduceTasks(0);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);

  FileInputFormat.setInputPaths(job, input);
  FileOutputFormat.setOutputPath(job, output);

  if (!job.waitForCompletion(true)) {
    throw new InterruptedException(
        "Mean Shift createCanopyFromVectorsMR failed on input " + input);
  }
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:26，代码来源:MeanShiftCanopyDriver.java

示例7: populateClusterModels

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *            The output path of the clustering.
 * @param conf
 *            The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath,
		Configuration conf) throws IOException {
	List<Cluster> clusterModels = Lists.newArrayList();
	Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
	Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
			finalClustersPath, PathType.LIST, PathFilters.partFilter(),
			null, false, conf);
	while (it.hasNext()) {
		ClusterWritable next = (ClusterWritable) it.next();
		Cluster cluster = next.getValue();
		cluster.configure(conf);
		clusterModels.add(cluster);
	}
	return clusterModels;
}

开发者ID:pgorecki，项目名称:visearch，代码行数:26，代码来源:ImageToTextDriver.java

示例8: readClusters

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public static List<List<Cluster>> readClusters(Configuration conf, Path output)
		throws IOException {
	List<List<Cluster>> Clusters = Lists.newArrayList();
	FileSystem fs = FileSystem.get(output.toUri(), conf);

	for (FileStatus s : fs.listStatus(output, new ClustersFilter())) {
		List<Cluster> clusters = Lists.newArrayList();
		for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(
				s.getPath(), PathType.LIST, PathFilters.logsCRCFilter(),
				conf)) {
			Cluster cluster = value.getValue();
			clusters.add(cluster);
		}
		Clusters.add(clusters);
	}
	return Clusters;
}

开发者ID:tknandu，项目名称:recommender_pilot，代码行数:18，代码来源:ClusterHelper.java

示例9: buildClustersSeq

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Build a directory of Canopy clusters from the input vectors and other
 * arguments. Run sequential execution
 * 
 * @param input
 *          the Path to the directory containing input vectors
 * @param output
 *          the Path for all output directories
 * @param measure
 *          the DistanceMeasure
 * @param t1
 *          the double T1 distance metric
 * @param t2
 *          the double T2 distance metric
 * @param clusterFilter
 *          the int minimum size of canopies produced
 * @return the canopy output directory Path
 */
private static Path buildClustersSeq(Path input, Path output,
    DistanceMeasure measure, double t1, double t2, int clusterFilter)
    throws IOException {
  CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
  Collection<Canopy> canopies = Lists.newArrayList();
  Configuration conf = new Configuration();
  FileSystem fs = FileSystem.get(input.toUri(), conf);

  for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
      input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
    clusterer.addPointToCanopies(vw.get(), canopies);
  }

  Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
  Path path = new Path(canopyOutputDir, "part-r-00000");
  SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
      Text.class, ClusterWritable.class);
  ClusterWritable clusterWritable = new ClusterWritable();
  try {
    for (Canopy canopy : canopies) {
      canopy.computeParameters();
      if (log.isDebugEnabled()) {
        log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}",
            new Object[] { canopy.getIdentifier(),
                AbstractCluster.formatVector(canopy.getCenter(), null),
                canopy.getNumObservations(),
                AbstractCluster.formatVector(canopy.getRadius(), null) });
      }
      if (canopy.getNumObservations() > clusterFilter) {
      	clusterWritable.setValue(canopy);
      	writer.append(new Text(canopy.getIdentifier()), clusterWritable);
      }
    }
  } finally {
    Closeables.closeQuietly(writer);
  }
  return canopyOutputDir;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:57，代码来源:CanopyDriver.java

示例10: buildClustersMR

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Build a directory of Canopy clusters from the input vectors and other
 * arguments. Run mapreduce execution
 * 
 * @param conf
 *          the Configuration
 * @param input
 *          the Path to the directory containing input vectors
 * @param output
 *          the Path for all output directories
 * @param measure
 *          the DistanceMeasure
 * @param t1
 *          the double T1 distance metric
 * @param t2
 *          the double T2 distance metric
 * @param t3
 *          the reducer's double T1 distance metric
 * @param t4
 *          the reducer's double T2 distance metric
 * @param clusterFilter
 *          the int minimum size of canopies produced
 * @return the canopy output directory Path
 */
private static Path buildClustersMR(Configuration conf, Path input,
    Path output, DistanceMeasure measure, double t1, double t2, double t3,
    double t4, int clusterFilter) throws IOException, InterruptedException,
    ClassNotFoundException {
  conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
      .getName());
  conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
  conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
  conf.set(CanopyConfigKeys.T3_KEY, String.valueOf(t3));
  conf.set(CanopyConfigKeys.T4_KEY, String.valueOf(t4));
  conf.set(CanopyConfigKeys.CF_KEY, String.valueOf(clusterFilter));

  Job job = new Job(conf, "Canopy Driver running buildClusters over input: "
      + input);
  job.setInputFormatClass(SequenceFileInputFormat.class);
  job.setOutputFormatClass(SequenceFileOutputFormat.class);
  job.setMapperClass(CanopyMapper.class);
  job.setMapOutputKeyClass(Text.class);
  job.setMapOutputValueClass(VectorWritable.class);
  job.setReducerClass(CanopyReducer.class);
  job.setOutputKeyClass(Text.class);
  job.setOutputValueClass(ClusterWritable.class);
  job.setNumReduceTasks(1);
  job.setJarByClass(CanopyDriver.class);

  FileInputFormat.addInputPath(job, input);
  Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
  FileOutputFormat.setOutputPath(job, canopyOutputDir);
  if (!job.waitForCompletion(true)) {
    throw new InterruptedException("Canopy Job failed processing " + input);
  }
  return canopyOutputDir;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:58，代码来源:CanopyDriver.java

示例11: populateClusterModels

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
 * Populates a list with clusters present in clusters-*-final directory.
 * 
 * @param clusterOutputPath
 *          The output path of the clustering.
 * @param conf
 *          The Hadoop Configuration
 * @return The list of clusters found by the clustering.
 * @throws IOException
 */
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
  List<Cluster> clusterModels = new ArrayList<Cluster>();
  Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
  Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
      PathFilters.partFilter(), null, false, conf);
  while (it.hasNext()) {
    ClusterWritable next = (ClusterWritable) it.next();
    Cluster cluster = next.getValue();
    cluster.configure(conf);
    clusterModels.add(cluster);
  }
  return clusterModels;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:24，代码来源:ClusterClassificationDriver.java

示例12: readFromSeqFiles

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
  Configuration config = new Configuration();
  List<Cluster> clusters = Lists.newArrayList();
  for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
      PathFilters.logsCRCFilter(), config)) {
    Cluster cluster = cw.getValue();
    cluster.configure(conf);
    clusters.add(cluster);
  }
  this.models = clusters;
  modelClass = models.get(0).getClass().getName();
  this.policy = readPolicy(path);
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:14，代码来源:ClusterClassifier.java

示例13: populateClusterModels

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
  List<Cluster> clusters = new ArrayList<Cluster>();
  FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
  FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
  Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
      clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
      null, false, conf);
  while (it.hasNext()) {
    ClusterWritable next = (ClusterWritable) it.next();
    Cluster cluster = next.getValue();
    cluster.configure(conf);
    clusters.add(cluster);
  }
  return clusters;
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:16，代码来源:ClusterClassificationMapper.java

示例14: map

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable point, Context context)
  throws IOException, InterruptedException {
  MeanShiftCanopy canopy = MeanShiftCanopy.initialCanopy(point.get(), nextCanopyId++, measure);
  ClusterWritable clusterWritable = new ClusterWritable();
  clusterWritable.setValue(canopy);
  context.write(new Text(key.toString()), clusterWritable);
}

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:9，代码来源:MeanShiftCanopyCreatorMapper.java

示例15: map

import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
 protected void map(WritableComparable<?> key, ClusterWritable clusterWritable, Context context)
   throws IOException, InterruptedException {
   // canopies use canopyIds assigned when input vectors are processed as vectorIds too
MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
   int vectorId = canopy.getId();
   for (MeanShiftCanopy msc : canopies) {
     for (int containedId : msc.getBoundPoints().toList()) {
       if (vectorId == containedId) {
         context.write(new IntWritable(msc.getId()),
                        new WeightedVectorWritable(1, canopy.getCenter()));
       }
     }
   }
 }

开发者ID:saradelrio，项目名称:Chi-FRBCS-BigDataCS，代码行数:16，代码来源:MeanShiftCanopyClusterMapper.java

注：本文中的org.apache.mahout.clustering.iterator.ClusterWritable类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。