本文整理汇总了Java中org.apache.flink.examples.java.clustering.util.KMeansData类的典型用法代码示例。如果您正苦于以下问题:Java KMeansData类的具体用法?Java KMeansData怎么用?Java KMeansData使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
KMeansData类属于org.apache.flink.examples.java.clustering.util包,在下文中一共展示了KMeansData类的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: runKMeans
import org.apache.flink.examples.java.clustering.util.KMeansData; //导入依赖的package包/类
private static void runKMeans(ExecutionEnvironment env) throws Exception {
env.setParallelism(PARALLELISM);
env.getConfig().disableSysoutLogging();
// get input data
DataSet<KMeans.Point> points = KMeansData.getDefaultPointDataSet(env).rebalance();
DataSet<KMeans.Centroid> centroids = KMeansData.getDefaultCentroidDataSet(env).rebalance();
// set number of bulk iterations for KMeans algorithm
IterativeDataSet<KMeans.Centroid> loop = centroids.iterate(20);
DataSet<KMeans.Centroid> newCentroids = points
// compute closest centroid for each point
.map(new KMeans.SelectNearestCenter()).withBroadcastSet(loop, "centroids")
// count and sum point coordinates for each centroid
.map(new KMeans.CountAppender())
.groupBy(0).reduce(new KMeans.CentroidAccumulator())
// compute new centroids from point counts and coordinate sums
.map(new KMeans.CentroidAverager());
// feed new centroids back into next iteration
DataSet<KMeans.Centroid> finalCentroids = loop.closeWith(newCentroids);
DataSet<Tuple2<Integer, KMeans.Point>> clusteredPoints = points
// assign points to final clusters
.map(new KMeans.SelectNearestCenter()).withBroadcastSet(finalCentroids, "centroids");
clusteredPoints.output(new DiscardingOutputFormat<Tuple2<Integer, KMeans.Point>>());
env.execute("KMeans Example");
}
示例2: getCentroidDataSet
import org.apache.flink.examples.java.clustering.util.KMeansData; //导入依赖的package包/类
private static DataSet<Centroid> getCentroidDataSet(ParameterTool params, ExecutionEnvironment env) {
DataSet<Centroid> centroids;
if (params.has("centroids")) {
centroids = env.readCsvFile(params.get("centroids"))
.fieldDelimiter(" ")
.pojoType(Centroid.class, "id", "x", "y");
} else {
System.out.println("Executing K-Means example with default centroid data set.");
System.out.println("Use --centroids to specify file input.");
centroids = KMeansData.getDefaultCentroidDataSet(env);
}
return centroids;
}
示例3: getPointDataSet
import org.apache.flink.examples.java.clustering.util.KMeansData; //导入依赖的package包/类
private static DataSet<Point> getPointDataSet(ParameterTool params, ExecutionEnvironment env) {
DataSet<Point> points;
if (params.has("points")) {
// read points from CSV file
points = env.readCsvFile(params.get("points"))
.fieldDelimiter(" ")
.pojoType(Point.class, "x", "y");
} else {
System.out.println("Executing K-Means example with default point data set.");
System.out.println("Use --points to specify file input.");
points = KMeansData.getDefaultPointDataSet(env);
}
return points;
}
示例4: getPointDataSet
import org.apache.flink.examples.java.clustering.util.KMeansData; //导入依赖的package包/类
private static DataSet<Point> getPointDataSet(ExecutionEnvironment env) {
if(fileOutput) {
// read points from CSV file
return env.readCsvFile(pointsPath)
.fieldDelimiter(' ')
.includeFields(true, true)
.types(Double.class, Double.class)
.map(new TuplePointConverter());
} else {
return KMeansData.getDefaultPointDataSet(env);
}
}
示例5: getCentroidDataSet
import org.apache.flink.examples.java.clustering.util.KMeansData; //导入依赖的package包/类
private static DataSet<Centroid> getCentroidDataSet(ExecutionEnvironment env) {
if(fileOutput) {
return env.readCsvFile(centersPath)
.fieldDelimiter(' ')
.includeFields(true, true, true)
.types(Integer.class, Double.class, Double.class)
.map(new TupleCentroidConverter());
} else {
return KMeansData.getDefaultCentroidDataSet(env);
}
}