当前位置: 首页>>代码示例>>Python>>正文


Python AgglomerativeClustering.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.AgglomerativeClustering.fit方法的典型用法代码示例。如果您正苦于以下问题:Python AgglomerativeClustering.fit方法的具体用法?Python AgglomerativeClustering.fit怎么用?Python AgglomerativeClustering.fit使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.AgglomerativeClustering的用法示例。


在下文中一共展示了AgglomerativeClustering.fit方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: train_agglomerative

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def train_agglomerative():
	print "starting agglomerative clustering..."
	model = AgglomerativeClustering(n_clusters=num_clusters, affinity=aggl_affinity,  
	linkage=aggl_linkage)
	model.fit(X)
	labels = model.labels_	
	print labels
开发者ID:ahnqirage,项目名称:avenir,代码行数:9,代码来源:cluster.py

示例2: cluster_agg

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def cluster_agg(cluster_data):
    clstr = AgglomerativeClustering(n_clusters=11, linkage='ward')
    clstr.fit(cluster_data)

    df['tier'] = clstr.labels_
    results = df[['Player', 'tier']]
    return results
开发者ID:Sandy4321,项目名称:nba-analysis-2,代码行数:9,代码来源:cluster.py

示例3: eval_dist

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def eval_dist(linkage='ward'):
    a_score = []
    idx = []
    d = [[] for i in xrange(3)]
    for k in xrange(2, 50 + 1):
        print 'k={}'.format(k)
        est = AgglomerativeClustering(n_clusters=k, linkage=linkage)
        est.fit(x)
        ari_v = metrics.adjusted_rand_score(y, est.labels_)
        ds = calc_distance(k, est.labels_)
        for i in xrange(3):
            d[i].append(ds[i])
        print ari_v
        a_score.append(ari_v)
        idx.append(k)
    fig, axes = plt.subplots(nrows=1, ncols=2)
    axes[0].plot(idx, a_score)
#     plt.xlim(0, 220)
    axes[0].set_ylim(ymin=0)
    axes[0].set_ylabel('ARI')
    axes[0].set_xlabel('# of clusters')
#     plt.savefig('figs/hc_ari.png')
#     plt.show()
#     plt.close()
    labels = ['Minimum', 'Maximum', 'Average']
#     for i in xrange(3):
#         axes[1].plot(idx, d[i], label=labels[i])
    axes[1].plot(idx, d[1])
    axes[1].legend()
    axes[1].set_ylabel('distance')
    axes[1].set_xlabel('# of clusters')
#     plt.savefig('figs/hc_distance.png')
    plt.show()
开发者ID:harrylclc,项目名称:ist557,代码行数:35,代码来源:hcm.py

示例4: __generate_dummy_data

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def __generate_dummy_data():
    from sklearn.cluster import AgglomerativeClustering
    import itertools
    X = np.array([[
         -5.27453240e-01,  -6.14130238e-01,  -1.63611427e+00,
         -9.26556498e-01,   7.82296885e-01,  -1.06286220e+00,
         -1.24368729e+00,  -1.16151964e+00,  -2.25816923e-01,
         -3.32354552e-02],
       [ -2.01273137e-01,   5.25758359e-01,   1.37940072e+00,
         -7.63256657e-01,  -1.27275323e+00,  -1.31618084e+00,
         -7.00167331e-01,   2.21410669e+00,   9.15456567e-01,
          7.93076923e-01],
       [  1.53249104e-01,  -5.48642411e-01,  -1.06559060e+00,
         -3.05253203e-01,  -1.93393495e+00,   1.39827978e-01,
          1.73359830e-01,   2.85576854e-02,  -1.19427027e+00,
          1.04395610e+00],
       [  1.00595172e+02,   1.01661346e+02,   1.00115635e+02,
          9.86884249e+01,   9.86506406e+01,   1.02214982e+02,
          1.01144087e+02,   1.00642778e+02,   1.01635339e+02,
          9.88981171e+01],
       [  1.01506262e+02,   1.00525318e+02,   9.93021764e+01,
          9.92514163e+01,   1.01199015e+02,   1.01771241e+02,
          1.00464097e+02,   9.97482396e+01,   9.96888274e+01,
          9.88297336e+01]])
    model = AgglomerativeClustering(linkage="average", affinity="cosine")
    model.fit(X)
    ii = itertools.count(X.shape[0])
    DEBUG(str([{'node_id': next(ii), 'left': x[0], 'right':x[1]} for x in model.children_]))
    return model, model.labels_
开发者ID:zbarzik,项目名称:ctwc_16s,代码行数:31,代码来源:ctwc__cluster_rank.py

示例5: knn_connectivity

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
 def knn_connectivity(self, X):
     knn_graph = kneighbors_graph(X, 30, include_self=False)
 
     for connectivity in (None, knn_graph):
             n_clusters = 4
             plt.figure(figsize=(10, 4))
             for index, linkage in enumerate(('average', 'complete', 'ward')):
                 plt.subplot(1, 3, index + 1)
                 model = AgglomerativeClustering(linkage=linkage,
                                             connectivity=connectivity,
                                             n_clusters=n_clusters)
                 t0 = time.time()
                 model.fit(X)
                 elapsed_time = time.time() - t0
                 plt.scatter(X[:, 0], X[:, 1], c=model.labels_,
                         cmap=plt.cm.spectral)
                 plt.title('linkage=%s (time %.2fs)' % (linkage, elapsed_time),
                       fontdict=dict(verticalalignment='top'))
                 plt.axis('equal')
                 plt.axis('off')
 
                 plt.subplots_adjust(bottom=0, top=.89, wspace=0,
                                 left=0, right=1)
                 plt.suptitle('n_cluster=%i, connectivity=%r' %
                          (n_clusters, connectivity is not None), size=17)
 
 
     plt.show()
开发者ID:ShriyaGupta,项目名称:CompBio,代码行数:30,代码来源:HeirarchichalClustering.py

示例6: wardHierarchical

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def wardHierarchical(img):
    connectivity = grid_to_graph(*img.shape)
    print("Compute structured hierarchical clustering...")
    st = time.time()
    n_clusters = 15  # number of regions
    ward = AgglomerativeClustering(n_clusters=n_clusters, linkage='ward',
                                   connectivity=connectivity)
    
    face = sp.misc.imresize(img, 0.10) / 255.
    X = np.reshape(img, (-1, 1))
    ward.fit(X)
    label = np.reshape(ward.labels_, face.shape)
    print("Elapsed time: ", time.time() - st)
    print("Number of pixels: ", label.size)
    print("Number of clusters: ", np.unique(label).size)


    plt.figure(figsize=(5, 5))
    plt.imshow(face, cmap=plt.cm.gray)
    for l in range(n_clusters):
        plt.contour(label == l, contours=1,
                    colors=[plt.cm.spectral(l / float(n_clusters)), ])
    plt.xticks(())
    plt.yticks(())
    plt.show()
开发者ID:carlomt,项目名称:dicom_tools,代码行数:27,代码来源:wardHierarchical.py

示例7: plot_mfi

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
    def plot_mfi(self, outputfile='embeddings.pdf', nb_clusters=8, weights='NA'):
        # collect embeddings for mfi:
        X = np.asarray([self.w2v_model[w] for w in self.mfi \
                            if w in self.w2v_model], dtype='float32')
        # dimension reduction:
        tsne = TSNE(n_components=2)
        coor = tsne.fit_transform(X) # unsparsify

        plt.clf()
        sns.set_style('dark')
        sns.plt.rcParams['axes.linewidth'] = 0.4
        fig, ax1 = sns.plt.subplots()  

        labels = self.mfi
        # first plot slices:
        x1, x2 = coor[:,0], coor[:,1]
        ax1.scatter(x1, x2, 100, edgecolors='none', facecolors='none')
        # clustering on top (add some colouring):
        clustering = AgglomerativeClustering(linkage='ward',
                            affinity='euclidean', n_clusters=nb_clusters)
        clustering.fit(coor)
        # add names:
        for x, y, name, cluster_label in zip(x1, x2, labels, clustering.labels_):
            ax1.text(x, y, name, ha='center', va="center",
                     color=plt.cm.spectral(cluster_label / 10.),
                     fontdict={'family': 'Arial', 'size': 8})
        # control aesthetics:
        ax1.set_xlabel('')
        ax1.set_ylabel('')
        ax1.set_xticklabels([])
        ax1.set_xticks([])
        ax1.set_yticklabels([])
        ax1.set_yticks([])
        sns.plt.savefig(outputfile, bbox_inches=0)
开发者ID:PonteIneptique,项目名称:pandora,代码行数:36,代码来源:pretraining.py

示例8: agglomerative_clusters

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
	def agglomerative_clusters(self, word_vectors):
	
		#Pre-calculate BallTree object
		starting = time.time()
		Ball_Tree = BallTree(word_vectors, leaf_size = 200, metric = "minkowski")
		print("BallTree object in " + str(time.time() - starting))
		
		#Pre-calculate k_neighbors graph
		starting = time.time()
		connectivity_graph = kneighbors_graph(Ball_Tree, 
						n_neighbors = 1, 
						mode = "connectivity", 
						metric = "minkowski", 
						p = 2, 
						include_self = False, 
						n_jobs = workers
						)
		print("Pre-compute connectivity graph in " + str(time.time() - starting))

		#Agglomerative clustering
		starting = time.time()
		Agl = AgglomerativeClustering(n_clusters = 100, 
										affinity = "minkowski", 
										connectivity = connectivity_graph, 
										compute_full_tree = True, 
										linkage = "average"
										)
		
		Agl.fit(word_vectors)
		print("Agglomerative clustering in " + str(time.time() - starting))
		
		clusters = Agl.labels_
		
		return clusters
开发者ID:jonathandunn,项目名称:c2xg,代码行数:36,代码来源:Word_Classes.py

示例9: clustering

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def clustering(data, params):

    # parse parameters

    for item in params:
        if isinstance(params[item], str):
            exec(item+'='+'"'+params[item]+'"')
        else:
            exec(item+'='+str(params[item]))

    # apply Agglomerative Clustering to reduced data

    clusters = AgglomerativeClustering(n_clusters=n_clusters,
                                       affinity=affinity, linkage=linkage)
    clusters.fit(data)

    # Agglomerative Clustering does not give centers of clusters
    # so lets try the mean of each cluster

    cluster_centers = []
    for i in range(n_clusters):
        mask = (clusters.labels_ == i)
        cluster_centers.append(mean(data[mask], axis=0))
    cluster_centers = array(cluster_centers)

    return [cluster_centers, clusters.labels_]
开发者ID:emilleishida,项目名称:MLSNeSpectra,代码行数:28,代码来源:AgglomerativeClustering.py

示例10: test_connectivity_propagation

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_connectivity_propagation():
    # Check that connectivity in the ward tree is propagated correctly during
    # merging.
    X = np.array(
        [
            (0.014, 0.120),
            (0.014, 0.099),
            (0.014, 0.097),
            (0.017, 0.153),
            (0.017, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.152),
            (0.018, 0.149),
            (0.018, 0.144),
        ]
    )
    connectivity = kneighbors_graph(X, 10, include_self=False)
    ward = AgglomerativeClustering(n_clusters=4, connectivity=connectivity, linkage="ward")
    # If changes are not propagated correctly, fit crashes with an
    # IndexError
    ward.fit(X)
开发者ID:albertotb,项目名称:scikit-learn,代码行数:29,代码来源:test_hierarchical.py

示例11: test_agglomerative_clustering_with_distance_threshold

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_agglomerative_clustering_with_distance_threshold(linkage):
    # Check that we obtain the correct number of clusters with
    # agglomerative clustering with distance_threshold.
    rng = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    n_samples = 100
    X = rng.randn(n_samples, 50)
    connectivity = grid_to_graph(*mask.shape)
    # test when distance threshold is set to 10
    distance_threshold = 10
    for conn in [None, connectivity]:
        clustering = AgglomerativeClustering(
            n_clusters=None,
            distance_threshold=distance_threshold,
            connectivity=conn, linkage=linkage)
        clustering.fit(X)
        clusters_produced = clustering.labels_
        num_clusters_produced = len(np.unique(clustering.labels_))
        # test if the clusters produced match the point in the linkage tree
        # where the distance exceeds the threshold
        tree_builder = _TREE_BUILDERS[linkage]
        children, n_components, n_leaves, parent, distances = \
            tree_builder(X, connectivity=conn, n_clusters=None,
                         return_distance=True)
        num_clusters_at_threshold = np.count_nonzero(
            distances >= distance_threshold) + 1
        # test number of clusters produced
        assert num_clusters_at_threshold == num_clusters_produced
        # test clusters produced
        clusters_at_threshold = _hc_cut(n_clusters=num_clusters_produced,
                                        children=children,
                                        n_leaves=n_leaves)
        assert np.array_equiv(clusters_produced,
                              clusters_at_threshold)
开发者ID:kevin-coder,项目名称:scikit-learn-fork,代码行数:36,代码来源:test_hierarchical.py

示例12: classify_core

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
    def classify_core(self, N_CLUSTERS, clusterType, data_for_trial_type, begin_time, end_time):

        BEGIN_TIME_FRAME = begin_time*self.griddy.TIME_GRID_SPACING
        END_TIME_FRAME = end_time*self.griddy.TIME_GRID_SPACING

        data = data_for_trial_type[:,BEGIN_TIME_FRAME:END_TIME_FRAME,self.griddy.VEL_X]

        labels = None
        if clusterType == 'kmeans':
            kmeans = KMeans(n_clusters=N_CLUSTERS)
            kmeans.fit(data)
            labels = kmeans.labels_
        elif clusterType == 'affinity_propagation':
            ap = AffinityPropagation(damping=0.75)
            ap.fit(data)
            labels = ap.labels_
            N_CLUSTERS = np.max(self.labels)+1
        elif clusterType == 'DBSCAN':
            dbscan = DBSCAN()
            dbscan.fit(data)
            labels = dbscan.labels_
            N_CLUSTERS = np.max(labels)+1
            print 'N_CLUSTERS=' + str(N_CLUSTERS)
        elif clusterType == 'AgglomerativeClustering':
            ac = AgglomerativeClustering(n_clusters=N_CLUSTERS)
            ac.fit(data)
            labels = ac.labels_
        else:
            print 'ERROR: clusterType: ' + clusterType + ' is not recognized'

        return (labels, N_CLUSTERS)
开发者ID:SashaRayshubskiy,项目名称:osmotropotaxis_analysis_python,代码行数:33,代码来源:fly_trajectory_classifier.py

示例13: programmer_3

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def programmer_3():

    standardizedfile = "data/standardized.xls"
    k = 3
    data = pd.read_excel(standardizedfile, index_col=u"基站编号")

    # 层次聚类
    model = AgglomerativeClustering(n_clusters=k, linkage="ward")
    model.fit(data)

    # 详细输入原始数据及对应类别
    r = pd.concat([data, pd.Series(model.labels_, index=data.index)], axis=1)
    r.columns = list(data.columns) + [u"聚类类别"]

    # 绘制聚类图,并且用不同样式进行画图
    style = ["ro-", "go-", "bo-"]
    xlabels = [u"工作日人均停留时间", u"凌晨人均停留时间", u"周末人均停留时间", u"日均人流量"]
    pic_output = "tmp/type_"

    for i in range(k):
        plt.figure()
        tmp = r[r[u"聚类类别"] == i].iloc[:, :4]
        for j in range(len(tmp)):
            plt.plot(range(1, 5), tmp.iloc[j], style[i])

        plt.xticks(range(1, 5), xlabels, rotation=20)

        plt.title(u"商圈类别%s" % (i + 1))
        # 调整底部
        plt.subplots_adjust(bottom=0.15)
        plt.savefig(u"%s%s.png" % (pic_output, i + 1))
开发者ID:Ctipsy,项目名称:python_data_analysis_and_mining_action,代码行数:33,代码来源:code.py

示例14: test_compute_full_tree

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def test_compute_full_tree():
    """Test that the full tree is computed if n_clusters is small"""
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)
    connectivity = kneighbors_graph(X, 5, include_self=False)

    # When n_clusters is less, the full tree should be built
    # that is the number of merges should be n_samples - 1
    agc = AgglomerativeClustering(n_clusters=2, connectivity=connectivity)
    agc.fit(X)
    n_samples = X.shape[0]
    n_nodes = agc.children_.shape[0]
    assert_equal(n_nodes, n_samples - 1)

    # When n_clusters is large, greater than max of 100 and 0.02 * n_samples.
    # we should stop when there are n_clusters.
    n_clusters = 101
    X = rng.randn(200, 2)
    connectivity = kneighbors_graph(X, 10, include_self=False)
    agc = AgglomerativeClustering(n_clusters=n_clusters,
                                  connectivity=connectivity)
    agc.fit(X)
    n_samples = X.shape[0]
    n_nodes = agc.children_.shape[0]
    assert_equal(n_nodes, n_samples - n_clusters)
开发者ID:foresthz,项目名称:scikit-learn,代码行数:27,代码来源:test_hierarchical.py

示例15: clustering_tweets_hc

# 需要导入模块: from sklearn.cluster import AgglomerativeClustering [as 别名]
# 或者: from sklearn.cluster.AgglomerativeClustering import fit [as 别名]
def clustering_tweets_hc(labeled_tweets, num_cluster):
    vectorizer = cst_vectorizer.StemmedTfidfVectorizer(**param)
    tweet_vec = vectorizer.fit_transform(labeled_tweets).toarray()
    # print(tweet_vec)
    n_clusters = num_cluster

    from sklearn.neighbors import kneighbors_graph

    knn_graph = kneighbors_graph(tweet_vec, 1, include_self=False)
    # print(knn_graph)

    connectivity = knn_graph
    from sklearn.cluster import AgglomerativeClustering

    model = AgglomerativeClustering(linkage='ward', connectivity=connectivity, n_clusters=n_clusters)
    model.fit(tweet_vec)
    c = model.labels_
    # print(c,len(c))

    clustered_tweets = []
    for i in range(0, num_cluster):
        similar_indices = (c == i).nonzero()[0]
        sent = ''
        for sid in similar_indices:
            sent = labeled_tweets[sid] + ' ' + sent
        clustered_tweets.append(sent)
    return clustered_tweets
开发者ID:candlewill,项目名称:texts_sentiment_analysis,代码行数:29,代码来源:km_cluster.py


注:本文中的sklearn.cluster.AgglomerativeClustering.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。