当前位置: 首页>>代码示例>>Java>>正文


Java KMeansDriver.run方法代码示例

本文整理汇总了Java中org.apache.mahout.clustering.kmeans.KMeansDriver.run方法的典型用法代码示例。如果您正苦于以下问题:Java KMeansDriver.run方法的具体用法?Java KMeansDriver.run怎么用?Java KMeansDriver.run使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.mahout.clustering.kmeans.KMeansDriver的用法示例。


在下文中一共展示了KMeansDriver.run方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: run

import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入方法依赖的package包/类
@Override
public void run() throws Exception {

	Path inputpath=new Path(input);
	Path outputpath=new Path(output);
	Path randomseedoutpath=new Path(randomseedoutput);
	Path seqoutpath = new Path(seqoutput);

	InputDriver.runJob(inputpath, seqoutpath, "org.apache.mahout.math.RandomAccessSparseVector");
	Configuration conf=new Configuration();

	Path clustersSeeds = RandomSeedGenerator.buildRandom(conf, seqoutpath,	randomseedoutpath, Constants.kmeans_k, Constants.mahout_distanceclass);
	Double convergenceDelta = Constants.kmeans_convergence_delta;
	int maxIterations = Constants.kmeans_max_iterations;
	boolean runClustering = Constants.kmeans_run_clustering;
	double clusterClassificationThreshold = Constants.kmeans_clusterClassificationThreshold;
	boolean runSequential = Constants.kmeans_run_sequential;
	
	KMeansDriver.run(conf, seqoutpath, clustersSeeds, outputpath, convergenceDelta, maxIterations, runClustering, clusterClassificationThreshold, runSequential);
}
 
开发者ID:bytegriffin,项目名称:recsys-offline,代码行数:21,代码来源:KmeansJob.java

示例2: runClustering

import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入方法依赖的package包/类
private static void runClustering(Configuration conf, ConfigFile configFile)
		throws IOException, ClassNotFoundException, InterruptedException {

	FileSystem fs = FileSystem.get(conf);
	Path clusters = new Path(BASE_DIR, new Path("initial-clusters"));

	fs.delete(DICTIONARY_DIR, true);
	fs.mkdirs(DICTIONARY_DIR);

	DistanceMeasure measure = new EuclideanDistanceMeasure();
	int k = configFile.get("dictionarySize",100);
	double convergenceDelta = configFile.get("dictionaryConvergenceDelta",0.001);
	int maxIterations = configFile.get("dictionaryMaxIterations",10);

	// Random clusters
	clusters = RandomSeedGenerator.buildRandom(conf, DESCRIPTORS_DIR,
			clusters, k, measure);
	log.info("Random clusters generated, running K-Means, k="+k+" maxIter="+maxIterations);
	
	log.info("KMeansDriver.run(...");
	log.info(DESCRIPTORS_DIR.toString());
	log.info(clusters.toString());
	log.info(DICTIONARY_DIR.toString());
	log.info("....)");

	KMeansDriver.run(conf, DESCRIPTORS_DIR, clusters, DICTIONARY_DIR,
			measure, convergenceDelta, maxIterations, true, 0.0,
			VM.RunSequential());

	log.info("KMeans done");
}
 
开发者ID:pgorecki,项目名称:visearch,代码行数:32,代码来源:KMeans.java

示例3: runClustering

import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入方法依赖的package包/类
private static void runClustering(Configuration conf) throws IOException, ClassNotFoundException, InterruptedException {
	Path input 		= new Path("kmeans/toy1/in");
	Path clusters 	= new Path("kmeans/toy1/cl");
	Path output 	= new Path("kmeans/toy1/out");
	
	DistanceMeasure measure = new EuclideanDistanceMeasure();
	int k = 3;
	double convergenceDelta = 0.5;
	int maxIterations = 10;
	boolean runSequential = true;
	
	// delete output dir
	FileSystem.get(conf).delete(output, true);
	FileSystem.get(conf).mkdirs(output);
	
	FileSystem.get(conf).delete(clusters, true);
	FileSystem.get(conf).mkdirs(clusters);

	// Random clusters
	log.info("Random clusters points....");
	clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, k, measure);
	log.info(clusters.toString());
	
    log.info("Running KMeans");

    // TODO: ustawić flagę -cl (klasyfikacja?)
	log.info(input.toString());
	log.info(clusters.toString());
	log.info(output.toString());
    KMeansDriver.run(conf, input, clusters, output, measure, convergenceDelta,
        maxIterations, false, 0.0, runSequential);
    
    log.info("KMeans done");
}
 
开发者ID:pgorecki,项目名称:visearch,代码行数:35,代码来源:KMeansToy.java

示例4: main

import org.apache.mahout.clustering.kmeans.KMeansDriver; //导入方法依赖的package包/类
@SuppressWarnings("deprecation")
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration();
		String hdfsUrl = conf.get("fs.defaultFS");

//		part1---------------------------------------------------------------
//		Job job0 = Job.getInstance(conf, "siftKeywordsDimension");
//		Path output1Path=new Path(hdfsUrl + "/data/recommend/matrix1");
//		HadoopUtil.delete(conf, output1Path);
//		job0.setJarByClass(TFIDF.class);
//		job0.setMapperClass(Mapper_Part1.class);
//		job0.setReducerClass(Reduce_Part1.class);
//		job0.setMapOutputKeyClass(Text.class);
//		job0.setMapOutputValueClass(Text.class);
//		job0.setOutputKeyClass(Text.class);
//		job0.setOutputValueClass(Text.class);
//		job0.setPartitionerClass(CustomPartitioner.class);
//		FileInputFormat.addInputPath(job0, new Path(hdfsUrl + "/data/recommend/tfidf3"));
//		FileOutputFormat.setOutputPath(job0, output1Path);
//		job0.waitForCompletion(true);

//		part2---------------------------------------------------------------
//		FileSystem fsopen = FileSystem.get(conf);
//		FSDataInputStream in = fsopen.open(new Path(hdfsUrl + "/data/recommend/matrix1/part-r-00000"));
//		Scanner scan = new Scanner(in);
//		List<String> keywordList=new ArrayList<String>();
//		while (scan.hasNext()) {
//			keywordList.add(scan.next());
//		}
////		must before job
//		conf.setStrings("keyword", keywordList.toArray(new String[keywordList.size()]));
//		Job job1 = Job.getInstance(conf, "generateMatrix");
//		Path output2Path=new Path(hdfsUrl + "/data/recommend/matrix2");
//		HadoopUtil.delete(conf, output2Path);
//		job1.setJarByClass(TFIDF.class);
//		job1.setMapperClass(Mapper_Part2.class);
//		job1.setReducerClass(Reduce_Part2.class);
//		job1.setMapOutputKeyClass(Text.class);
//		job1.setMapOutputValueClass(Text.class);
//		job1.setOutputKeyClass(Text.class);
//		job1.setOutputValueClass(NullWritable.class);
////		job1.addCacheFile(new Path("/data/recommend/matrix1/part-r-00000").toUri());
//		FileInputFormat.addInputPath(job1, new Path(hdfsUrl + "/data/recommend/tfidf3"));
//		FileOutputFormat.setOutputPath(job1, output2Path);
//		job1.waitForCompletion(true);
		
//		part3-------------------聚类并打印--------------------------------------------
		Path output3Path=new Path(hdfsUrl + "/data/recommend/cluster2");
		HadoopUtil.delete(conf, output3Path);
		EuclideanDistanceMeasure measure = new EuclideanDistanceMeasure();
		Path clusterInput = new Path(hdfsUrl + "/data/recommend/matrix2");
		Path clusterSeqInput = new Path(hdfsUrl + "/data/recommend/cluster1");
		Path clusterOutput = new Path(hdfsUrl + "/data/recommend/cluster2");
		int k = 10;
		int maxIter = 3;
//		将数据文件转为mahout向量表示(这里要自己写)
//		InputDriver.runJob(clusterInput, clusterSeqInput, "org.apache.mahout.math.RandomAccessSparseVector");
//		 随机的选择k个作为簇的中心
		Path clusters = RandomSeedGenerator.buildRandom(conf, clusterSeqInput, 
				new Path(clusterOutput,"clusters-0"), k, measure);
		KMeansDriver.run(conf,clusterSeqInput,clusters,clusterOutput,0.01,maxIter,true, 0.0, false);
		// 调用 ClusterDumper 的 printClusters 方法将聚类结果打印出来。
		ClusterDumper clusterDumper = new ClusterDumper(new Path(clusterOutput, "clusters-"
				+ (maxIter - 1)), new Path(clusterOutput, "clusteredPoints"));
		clusterDumper.printClusters(null);

		
		clusterOutput(conf,new Path(hdfsUrl + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
//		clusterOutput2(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster2/clusteredPoints/part-m-00000"));
//		matrix2Vector(conf0,new Path(hdfsUrl0 + "/data/recommend/cluster1/part-m-00000"));//暂时没用到

	}
 
开发者ID:hejy12,项目名称:newsRecommender,代码行数:73,代码来源:MatrixAndCluster.java


注:本文中的org.apache.mahout.clustering.kmeans.KMeansDriver.run方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。