本文整理汇总了Java中org.apache.mahout.clustering.kmeans.KMeansDriver类的典型用法代码示例。如果您正苦于以下问题:Java KMeansDriver类的具体用法?Java KMeansDriver怎么用?Java KMeansDriver使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
KMeansDriver类属于org.apache.mahout.clustering.kmeans包,在下文中一共展示了KMeansDriver类的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: run
import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入依赖的package包/类
@Override
public void run() throws Exception {
Path inputpath=new Path(input);
Path outputpath=new Path(output);
Path randomseedoutpath=new Path(randomseedoutput);
Path seqoutpath = new Path(seqoutput);
InputDriver.runJob(inputpath, seqoutpath, "org.apache.mahout.math.RandomAccessSparseVector");
Configuration conf=new Configuration();
Path clustersSeeds = RandomSeedGenerator.buildRandom(conf, seqoutpath, randomseedoutpath, Constants.kmeans_k, Constants.mahout_distanceclass);
Double convergenceDelta = Constants.kmeans_convergence_delta;
int maxIterations = Constants.kmeans_max_iterations;
boolean runClustering = Constants.kmeans_run_clustering;
double clusterClassificationThreshold = Constants.kmeans_clusterClassificationThreshold;
boolean runSequential = Constants.kmeans_run_sequential;
KMeansDriver.run(conf, seqoutpath, clustersSeeds, outputpath, convergenceDelta, maxIterations, runClustering, clusterClassificationThreshold, runSequential);
}
示例2: runClustering
import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入依赖的package包/类
private static void runClustering(Configuration conf, ConfigFile configFile)
throws IOException, ClassNotFoundException, InterruptedException {
FileSystem fs = FileSystem.get(conf);
Path clusters = new Path(BASE_DIR, new Path("initial-clusters"));
fs.delete(DICTIONARY_DIR, true);
fs.mkdirs(DICTIONARY_DIR);
DistanceMeasure measure = new EuclideanDistanceMeasure();
int k = configFile.get("dictionarySize",100);
double convergenceDelta = configFile.get("dictionaryConvergenceDelta",0.001);
int maxIterations = configFile.get("dictionaryMaxIterations",10);
// Random clusters
clusters = RandomSeedGenerator.buildRandom(conf, DESCRIPTORS_DIR,
clusters, k, measure);
log.info("Random clusters generated, running K-Means, k="+k+" maxIter="+maxIterations);
log.info("KMeansDriver.run(...");
log.info(DESCRIPTORS_DIR.toString());
log.info(clusters.toString());
log.info(DICTIONARY_DIR.toString());
log.info("....)");
KMeansDriver.run(conf, DESCRIPTORS_DIR, clusters, DICTIONARY_DIR,
measure, convergenceDelta, maxIterations, true, 0.0,
VM.RunSequential());
log.info("KMeans done");
}
示例3: runClustering
import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入依赖的package包/类
private static void runClustering(Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
Path input = new Path("kmeans/toy1/in");
Path clusters = new Path("kmeans/toy1/cl");
Path output = new Path("kmeans/toy1/out");
DistanceMeasure measure = new EuclideanDistanceMeasure();
int k = 3;
double convergenceDelta = 0.5;
int maxIterations = 10;
boolean runSequential = true;
// delete output dir
FileSystem.get(conf).delete(output, true);
FileSystem.get(conf).mkdirs(output);
FileSystem.get(conf).delete(clusters, true);
FileSystem.get(conf).mkdirs(clusters);
// Random clusters
log.info("Random clusters points....");
clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, k, measure);
log.info(clusters.toString());
log.info("Running KMeans");
// TODO: ustawić flagę -cl (klasyfikacja?)
log.info(input.toString());
log.info(clusters.toString());
log.info(output.toString());
KMeansDriver.run(conf, input, clusters, output, measure, convergenceDelta,
maxIterations, false, 0.0, runSequential);
log.info("KMeans done");
}
示例4: main
import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入依赖的package包/类
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String hdfsUrl = conf.get("fs.defaultFS");
// part1---------------------------------------------------------------
// Job job0 = Job.getInstance(conf, "siftKeywordsDimension");
// Path output1Path=new Path(hdfsUrl + "/data/recommend/matrix1");
// HadoopUtil.delete(conf, output1Path);
// job0.setJarByClass(TFIDF.class);
// job0.setMapperClass(Mapper_Part1.class);
// job0.setReducerClass(Reduce_Part1.class);
// job0.setMapOutputKeyClass(Text.class);
// job0.setMapOutputValueClass(Text.class);
// job0.setOutputKeyClass(Text.class);
// job0.setOutputValueClass(Text.class);
// job0.setPartitionerClass(CustomPartitioner.class);
// FileInputFormat.addInputPath(job0, new Path(hdfsUrl + "/data/recommend/tfidf3"));
// FileOutputFormat.setOutputPath(job0, output1Path);
// job0.waitForCompletion(true);
// part2---------------------------------------------------------------
// FileSystem fsopen = FileSystem.get(conf);
// FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
// Scanner scan = new Scanner(in);
// List<String> keywordList=new ArrayList<String>();
// while (scan.hasNext()) {
// keywordList.add(scan.next());
// }
//// must before job
// conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
// Job job1 = Job.getInstance(conf, "generateMatrix");
// Path output2Path=new Path(hdfsUrl + "/data/recommend/matrix2");
// HadoopUtil.delete(conf, output2Path);
// job1.setJarByClass(TFIDF.class);
// job1.setMapperClass(Mapper_Part2.class);
// job1.setReducerClass(Reduce_Part2.class);
// job1.setMapOutputKeyClass(Text.class);
// job1.setMapOutputValueClass(Text.class);
// job1.setOutputKeyClass(Text.class);
// job1.setOutputValueClass(NullWritable.class);
//// job1.addCacheFile(new Path("/data/recommend/matrix1/part-r-00000").toUri());
// FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf3"));
// FileOutputFormat.setOutputPath(job1, output2Path);
// job1.waitForCompletion(true);
// part3-------------------聚类并打印--------------------------------------------
Path output3Path=new Path(hdfsUrl + "/data/recommend/cluster2");
HadoopUtil.delete(conf, output3Path);
EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
Path clusterInput = new Path(hdfsUrl + "/data/recommend/matrix2");
Path clusterSeqInput = new Path(hdfsUrl + "/data/recommend/cluster1");
Path clusterOutput = new Path(hdfsUrl + "/data/recommend/cluster2");
int k = 10;
int maxIter = 3;
// 将数据文件转为mahout向量表示(这里要自己写)
// InputDriver.runJob(clusterInput, clusterSeqInput, "org.apache.mahout.math.RandomAccessSparseVector");
// 随机的选择k个作为簇的中心
Path clusters = RandomSeedGenerator.buildRandom(conf, clusterSeqInput,
new Path(clusterOutput,"clusters-0"), k, measure);
KMeansDriver.run(conf,clusterSeqInput,clusters,clusterOutput,0.01,maxIter,true, 0.0, false);
// 调用 ClusterDumper 的 printClusters 方法将聚类结果打印出来。
ClusterDumper clusterDumper = new ClusterDumper(new Path(clusterOutput, "clusters-"
+ (maxIter - 1)), new Path(clusterOutput, "clusteredPoints"));
clusterDumper.printClusters(null);
clusterOutput(conf,new Path(hdfsUrl + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
// clusterOutput2(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
// matrix2Vector(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster1/part-m-00000"));//暂时没用到
}