本文整理汇总了Java中org.apache.mahout.clustering.iterator.ClusterWritable类的典型用法代码示例。如果您正苦于以下问题:Java ClusterWritable类的具体用法?Java ClusterWritable怎么用?Java ClusterWritable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ClusterWritable类属于org.apache.mahout.clustering.iterator包,在下文中一共展示了ClusterWritable类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: reduce
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
protected void reduce(Text arg0, Iterable<VectorWritable> values,
Context context) throws IOException, InterruptedException {
for (VectorWritable value : values) {
Vector point = value.get();
canopyClusterer.addPointToCanopies(point, canopies);
}
for (Canopy canopy : canopies) {
ClusterWritable clusterWritable = new ClusterWritable();
canopy.computeParameters();
if (canopy.getNumObservations() > clusterFilter) {
clusterWritable.setValue(canopy);
context.write(new Text(canopy.getIdentifier()), clusterWritable);
}
}
}
示例2: configureWithClusterInfo
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Create a list of SoftClusters from whatever type is passed in as the prior
*
* @param conf
* the Configuration
* @param clusterPath
* the path to the prior Clusters
* @param clusters
* a List<Cluster> to put values into
*/
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, List<Cluster> clusters) {
for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
PathFilters.partFilter(), conf)) {
Class<? extends Writable> valueClass = value.getClass();
if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable) value;
value = clusterWritable.getValue();
valueClass = value.getClass();
}
if (valueClass.equals(Kluster.class)) {
// get the cluster info
Kluster cluster = (Kluster) value;
clusters.add(new SoftCluster(cluster.getCenter(), cluster.getId(), cluster.getMeasure()));
} else if (valueClass.equals(SoftCluster.class)) {
// get the cluster info
clusters.add((SoftCluster) value);
} else if (valueClass.equals(Canopy.class)) {
// get the cluster info
Canopy canopy = (Canopy) value;
clusters.add(new SoftCluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
} else {
throw new IllegalStateException("Bad value class: " + valueClass);
}
}
}
示例3: configureWithClusterInfo
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Create a list of Klusters from whatever Cluster type is passed in as the prior
*
* @param conf
* the Configuration
* @param clusterPath
* the path to the prior Clusters
* @param clusters
* a List<Cluster> to put values into
*/
public static void configureWithClusterInfo(Configuration conf, Path clusterPath, Collection<Cluster> clusters) {
for (Writable value : new SequenceFileDirValueIterable<Writable>(clusterPath, PathType.LIST,
PathFilters.partFilter(), conf)) {
Class<? extends Writable> valueClass = value.getClass();
if (valueClass.equals(ClusterWritable.class)) {
ClusterWritable clusterWritable = (ClusterWritable) value;
value = clusterWritable.getValue();
valueClass = value.getClass();
}
log.debug("Read 1 Cluster from {}", clusterPath);
if (valueClass.equals(Kluster.class)) {
// get the cluster info
clusters.add((Kluster) value);
} else if (valueClass.equals(Canopy.class)) {
// get the cluster info
Canopy canopy = (Canopy) value;
clusters.add(new Kluster(canopy.getCenter(), canopy.getId(), canopy.getMeasure()));
} else {
throw new IllegalStateException("Bad value class: " + valueClass);
}
}
}
示例4: writeToSeqFiles
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public void writeToSeqFiles(Path path) throws IOException {
writePolicy(policy, path);
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(path.toUri(), config);
SequenceFile.Writer writer = null;
ClusterWritable cw = new ClusterWritable();
for (int i = 0; i < models.size(); i++) {
try {
Cluster cluster = models.get(i);
cw.setValue(cluster);
writer = new SequenceFile.Writer(fs, config,
new Path(path, "part-" + String.format(Locale.ENGLISH, "%05d", i)), IntWritable.class,
ClusterWritable.class);
Writable key = new IntWritable(i);
writer.append(key, cw);
} finally {
Closeables.closeQuietly(writer);
}
}
}
示例5: createCanopyFromVectorsSeq
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Convert vectors to MeanShiftCanopies sequentially
*
* @param input
* the Path to the input VectorWritable data
* @param output
* the Path to the initial clusters directory
* @param measure
* the DistanceMeasure
*/
private static void createCanopyFromVectorsSeq(Path input, Path output,
DistanceMeasure measure) throws IOException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter());
int part = 0;
int id = 0;
for (FileStatus s : status) {
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(
output, "part-m-" + part++), Text.class, ClusterWritable.class);
try {
for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(
s.getPath(), conf)) {
MeanShiftCanopy initialCanopy = MeanShiftCanopy.initialCanopy(value.get(),
id++, measure);
ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(initialCanopy);
writer.append(new Text(), clusterWritable);
}
} finally {
Closeables.closeQuietly(writer);
}
}
}
示例6: createCanopyFromVectorsMR
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Convert vectors to MeanShiftCanopies using Hadoop
*/
private static void createCanopyFromVectorsMR(Configuration conf, Path input,
Path output, DistanceMeasure measure) throws IOException,
InterruptedException, ClassNotFoundException {
conf.set(KMeansConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
.getName());
Job job = new Job(conf);
job.setJarByClass(MeanShiftCanopyDriver.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(ClusterWritable.class);
job.setMapperClass(MeanShiftCanopyCreatorMapper.class);
job.setNumReduceTasks(0);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
if (!job.waitForCompletion(true)) {
throw new InterruptedException(
"Mean Shift createCanopyFromVectorsMR failed on input " + input);
}
}
示例7: populateClusterModels
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Populates a list with clusters present in clusters-*-final directory.
*
* @param clusterOutputPath
* The output path of the clustering.
* @param conf
* The Hadoop Configuration
* @return The list of clusters found by the clustering.
* @throws IOException
*/
private static List<Cluster> populateClusterModels(Path clusterOutputPath,
Configuration conf) throws IOException {
List<Cluster> clusterModels = Lists.newArrayList();
Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
finalClustersPath, PathType.LIST, PathFilters.partFilter(),
null, false, conf);
while (it.hasNext()) {
ClusterWritable next = (ClusterWritable) it.next();
Cluster cluster = next.getValue();
cluster.configure(conf);
clusterModels.add(cluster);
}
return clusterModels;
}
示例8: readClusters
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public static List<List<Cluster>> readClusters(Configuration conf, Path output)
throws IOException {
List<List<Cluster>> Clusters = Lists.newArrayList();
FileSystem fs = FileSystem.get(output.toUri(), conf);
for (FileStatus s : fs.listStatus(output, new ClustersFilter())) {
List<Cluster> clusters = Lists.newArrayList();
for (ClusterWritable value : new SequenceFileDirValueIterable<ClusterWritable>(
s.getPath(), PathType.LIST, PathFilters.logsCRCFilter(),
conf)) {
Cluster cluster = value.getValue();
clusters.add(cluster);
}
Clusters.add(clusters);
}
return Clusters;
}
示例9: buildClustersSeq
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Build a directory of Canopy clusters from the input vectors and other
* arguments. Run sequential execution
*
* @param input
* the Path to the directory containing input vectors
* @param output
* the Path for all output directories
* @param measure
* the DistanceMeasure
* @param t1
* the double T1 distance metric
* @param t2
* the double T2 distance metric
* @param clusterFilter
* the int minimum size of canopies produced
* @return the canopy output directory Path
*/
private static Path buildClustersSeq(Path input, Path output,
DistanceMeasure measure, double t1, double t2, int clusterFilter)
throws IOException {
CanopyClusterer clusterer = new CanopyClusterer(measure, t1, t2);
Collection<Canopy> canopies = Lists.newArrayList();
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(input.toUri(), conf);
for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(
input, PathType.LIST, PathFilters.logsCRCFilter(), conf)) {
clusterer.addPointToCanopies(vw.get(), canopies);
}
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0'+ Cluster.FINAL_ITERATION_SUFFIX);
Path path = new Path(canopyOutputDir, "part-r-00000");
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
Text.class, ClusterWritable.class);
ClusterWritable clusterWritable = new ClusterWritable();
try {
for (Canopy canopy : canopies) {
canopy.computeParameters();
if (log.isDebugEnabled()) {
log.debug("Writing Canopy:{} center:{} numPoints:{} radius:{}",
new Object[] { canopy.getIdentifier(),
AbstractCluster.formatVector(canopy.getCenter(), null),
canopy.getNumObservations(),
AbstractCluster.formatVector(canopy.getRadius(), null) });
}
if (canopy.getNumObservations() > clusterFilter) {
clusterWritable.setValue(canopy);
writer.append(new Text(canopy.getIdentifier()), clusterWritable);
}
}
} finally {
Closeables.closeQuietly(writer);
}
return canopyOutputDir;
}
示例10: buildClustersMR
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Build a directory of Canopy clusters from the input vectors and other
* arguments. Run mapreduce execution
*
* @param conf
* the Configuration
* @param input
* the Path to the directory containing input vectors
* @param output
* the Path for all output directories
* @param measure
* the DistanceMeasure
* @param t1
* the double T1 distance metric
* @param t2
* the double T2 distance metric
* @param t3
* the reducer's double T1 distance metric
* @param t4
* the reducer's double T2 distance metric
* @param clusterFilter
* the int minimum size of canopies produced
* @return the canopy output directory Path
*/
private static Path buildClustersMR(Configuration conf, Path input,
Path output, DistanceMeasure measure, double t1, double t2, double t3,
double t4, int clusterFilter) throws IOException, InterruptedException,
ClassNotFoundException {
conf.set(CanopyConfigKeys.DISTANCE_MEASURE_KEY, measure.getClass()
.getName());
conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(t1));
conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(t2));
conf.set(CanopyConfigKeys.T3_KEY, String.valueOf(t3));
conf.set(CanopyConfigKeys.T4_KEY, String.valueOf(t4));
conf.set(CanopyConfigKeys.CF_KEY, String.valueOf(clusterFilter));
Job job = new Job(conf, "Canopy Driver running buildClusters over input: "
+ input);
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CanopyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(VectorWritable.class);
job.setReducerClass(CanopyReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(ClusterWritable.class);
job.setNumReduceTasks(1);
job.setJarByClass(CanopyDriver.class);
FileInputFormat.addInputPath(job, input);
Path canopyOutputDir = new Path(output, Cluster.CLUSTERS_DIR + '0' + Cluster.FINAL_ITERATION_SUFFIX);
FileOutputFormat.setOutputPath(job, canopyOutputDir);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Canopy Job failed processing " + input);
}
return canopyOutputDir;
}
示例11: populateClusterModels
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
/**
* Populates a list with clusters present in clusters-*-final directory.
*
* @param clusterOutputPath
* The output path of the clustering.
* @param conf
* The Hadoop Configuration
* @return The list of clusters found by the clustering.
* @throws IOException
*/
private static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
List<Cluster> clusterModels = new ArrayList<Cluster>();
Path finalClustersPath = finalClustersPath(conf, clusterOutputPath);
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(finalClustersPath, PathType.LIST,
PathFilters.partFilter(), null, false, conf);
while (it.hasNext()) {
ClusterWritable next = (ClusterWritable) it.next();
Cluster cluster = next.getValue();
cluster.configure(conf);
clusterModels.add(cluster);
}
return clusterModels;
}
示例12: readFromSeqFiles
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public void readFromSeqFiles(Configuration conf, Path path) throws IOException {
Configuration config = new Configuration();
List<Cluster> clusters = Lists.newArrayList();
for (ClusterWritable cw : new SequenceFileDirValueIterable<ClusterWritable>(path, PathType.LIST,
PathFilters.logsCRCFilter(), config)) {
Cluster cluster = cw.getValue();
cluster.configure(conf);
clusters.add(cluster);
}
this.models = clusters;
modelClass = models.get(0).getClass().getName();
this.policy = readPolicy(path);
}
示例13: populateClusterModels
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
public static List<Cluster> populateClusterModels(Path clusterOutputPath, Configuration conf) throws IOException {
List<Cluster> clusters = new ArrayList<Cluster>();
FileSystem fileSystem = clusterOutputPath.getFileSystem(conf);
FileStatus[] clusterFiles = fileSystem.listStatus(clusterOutputPath, PathFilters.finalPartFilter());
Iterator<?> it = new SequenceFileDirValueIterator<Writable>(
clusterFiles[0].getPath(), PathType.LIST, PathFilters.partFilter(),
null, false, conf);
while (it.hasNext()) {
ClusterWritable next = (ClusterWritable) it.next();
Cluster cluster = next.getValue();
cluster.configure(conf);
clusters.add(cluster);
}
return clusters;
}
示例14: map
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
protected void map(WritableComparable<?> key, VectorWritable point, Context context)
throws IOException, InterruptedException {
MeanShiftCanopy canopy = MeanShiftCanopy.initialCanopy(point.get(), nextCanopyId++, measure);
ClusterWritable clusterWritable = new ClusterWritable();
clusterWritable.setValue(canopy);
context.write(new Text(key.toString()), clusterWritable);
}
示例15: map
import org.apache.mahout.clustering.iterator.ClusterWritable; //导入依赖的package包/类
@Override
protected void map(WritableComparable<?> key, ClusterWritable clusterWritable, Context context)
throws IOException, InterruptedException {
// canopies use canopyIds assigned when input vectors are processed as vectorIds too
MeanShiftCanopy canopy = (MeanShiftCanopy)clusterWritable.getValue();
int vectorId = canopy.getId();
for (MeanShiftCanopy msc : canopies) {
for (int containedId : msc.getBoundPoints().toList()) {
if (vectorId == containedId) {
context.write(new IntWritable(msc.getId()),
new WeightedVectorWritable(1, canopy.getCenter()));
}
}
}
}