本文整理汇总了Python中pyspark.mllib.clustering.KMeans.fit方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.fit方法的具体用法?Python KMeans.fit怎么用?Python KMeans.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.mllib.clustering.KMeans
的用法示例。
在下文中一共展示了KMeans.fit方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: initializeModels
# 需要导入模块: from pyspark.mllib.clustering import KMeans [as 别名]
# 或者: from pyspark.mllib.clustering.KMeans import fit [as 别名]
def initializeModels(self):
try:
if self.kmeansDF:
logger.info("Already loaded this DataFrame")
pass
except AttributeError:
self.kmeansDF = None
commandsDF = self.bashDF.map(lambda row: Row(date=row.date,
source=row.source,
username=row.username,
exec_as=row.exec_as,
srcip=row.srcip,
command=row.command.split(" "))).toDF()
commandsDF.cache()
word2Vec = Word2Vec(vectorSize=100, minCount=1, inputCol="command", outputCol="features")
w2model = word2Vec.fit(commandsDF)
resultDF = w2model.transform(commandsDF)
resultDF.cache()
kmeans = KMeans(k=650, seed=42, featuresCol="features", predictionCol="prediction", maxIter=10, initSteps=3)
kmodel = kmeans.fit(resultDF)
kmeansDF = kmodel.transform(resultDF)
kmeansDF.cache()
kmeansDF.coalesce(1).write.parquet('/user/jleaniz/ml/kmeans', mode='append')
outliers = kmeansDF.groupBy("prediction").count().filter('count < 10').withColumnRenamed("prediction", "cluster")
self.outlierCmds = outliers.join(kmeansDF, kmeansDF.prediction == outliers.cluster)
示例2: len
# 需要导入模块: from pyspark.mllib.clustering import KMeans [as 别名]
# 或者: from pyspark.mllib.clustering.KMeans import fit [as 别名]
# In[2]:
#I.Generate the two dimensional dataset
#with three cluster centroid
np.random.seed(0)
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=3000, centers=centers, cluster_std=0.7)
# In[3]:
#Clustering with KMeans
K_means = KMeans(init='k-means++', n_clusters=3, n_init=10)
K_means.fit(X)
#Return labels and cluster centers
K_means_labels = K_means.labels_
K_means_cluster_centers = K_means.cluster_centers_
K_means_inertia = K_means.inertia_
#find the unique elements
K_means_labels_unique = np.unique(K_means_labels)
# In[4]:
# Plot result
fig = plt.figure(figsize=(7, 7))
colors = ['#4EACC5', '#FF9C34', '#4E9A06']
ax = fig.add_subplot(111)