Python Ward.fit方法代码示例

本文整理汇总了Python中sklearn.cluster.Ward.fit方法的典型用法代码示例。如果您正苦于以下问题：Python Ward.fit方法的具体用法？Python Ward.fit怎么用？Python Ward.fit使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.Ward的用法示例。

在下文中一共展示了Ward.fit方法的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_connectivity_popagation

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def test_connectivity_popagation():
    """
    Check that connectivity in the ward tree is propagated correctly during
    merging.
    """
    from sklearn.neighbors import NearestNeighbors

    X = np.array(
        [
            (0.014, 0.120),
            (0.014, 0.099),
            (0.014, 0.097),
            (0.017, 0.153),
            (0.017, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.153),
            (0.018, 0.152),
            (0.018, 0.149),
            (0.018, 0.144),
        ]
    )
    nn = NearestNeighbors(n_neighbors=10).fit(X)
    connectivity = nn.kneighbors_graph(X)
    ward = Ward(n_clusters=4, connectivity=connectivity)
    # If changes are not propagated correctly, fit crashes with an
    # IndexError
    ward.fit(X)

开发者ID:VirgileFritsch，项目名称:scikit-learn，代码行数:34，代码来源:test_hierarchical.py

示例2: __hieclu

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
	def __hieclu(self):
		#use Hierarchical clustering
		print 'using hierarchical clustering......'
		ac = Ward(n_clusters = self.k)
		ac.fit(self.data_matrix)
		result = ac.fit_predict(self.data_matrix)
		return result

开发者ID:chenzheng128，项目名称:evaluate_cluster，代码行数:9，代码来源:evaluate_class.py

示例3: hieclu

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def hieclu(data_matrix, k):
	#use Hierarchical clustering
	print 'using hierarchical clustering......'
	ac = Ward(n_clusters=k)
	ac.fit(data_matrix)
	result = ac.fit_predict(data_matrix)
	return result

开发者ID:chenzheng128，项目名称:evaluate_cluster，代码行数:9，代码来源:evaluate.py

示例4: test_ward_clustering

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    assert_true(np.size(np.unique(clustering.labels_)) == 10)

开发者ID:AlexLerman，项目名称:scikit-learn，代码行数:13，代码来源:test_hierarchical.py

示例5: test_connectivity_fixing_non_lil

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def test_connectivity_fixing_non_lil():
    """
    Check non regression of a bug if a non item assignable connectivity is
    provided with more than one component.
    """
    # create dummy data
    x = np.array([[0, 0], [1, 1]])
    # create a mask with several components to force connectivity fixing
    m = np.array([[True, False], [False, True]])
    c = grid_to_graph(n_x=2, n_y=2, mask=m)
    w = Ward(connectivity=c)
    w.fit(x)

开发者ID:bbabenko，项目名称:scikit-learn，代码行数:14，代码来源:test_hierarchical.py

示例6: test_ward_clustering

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    # test caching
    clustering = Ward(n_clusters=10, connectivity=connectivity,
                      memory=mkdtemp())
    clustering.fit(X)
    labels = clustering.labels_
    assert_true(np.size(np.unique(labels)) == 10)
    # Turn caching off now
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    # Check that we obtain the same solution with early-stopping of the
    # tree building
    clustering.compute_full_tree = False
    clustering.fit(X)
    np.testing.assert_array_equal(clustering.labels_, labels)
    clustering.connectivity = None
    clustering.fit(X)
    assert_true(np.size(np.unique(clustering.labels_)) == 10)
    # Check that we raise a TypeError on dense matrices
    clustering = Ward(n_clusters=10,
                      connectivity=connectivity.todense())
    assert_raises(TypeError, clustering.fit, X)
    clustering = Ward(n_clusters=10,
                      connectivity=sparse.lil_matrix(
                          connectivity.todense()[:10, :10]))
    assert_raises(ValueError, clustering.fit, X)

开发者ID:2011200799，项目名称:scikit-learn，代码行数:36，代码来源:test_hierarchical.py

示例7: spectral_cluster

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def spectral_cluster(data, n_clusters, method='sl'):
    # 获取拉普拉斯矩阵
    if method == 'NJW':
        lap_matrix = get_lap_matrix_njw(data, 0.1)
        eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]

    elif method == 'self-tuning':
        lap_matrix = get_lap_matrix_self_tuning(data)
        eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
        idx = eigenvalues.argsort()[::-1]
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]

    else:
        lap_matrix = get_lap_matrix_sl(data, 0.1)
        eigenvalues, eigenvectors = np.linalg.eig(lap_matrix)
        idx = eigenvalues.argsort()
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]

    #print(eigenvalues)
    # 获取前n_clusters个特征向量
    x_matrix = eigenvectors[:, 0:n_clusters]
    # 归一化特征向量矩阵
    y_matrix = normal_eigen(x_matrix)

    # 调用自己写的k_means函数
    """
    k_dist_dic, k_centers_dic, cluster_group = kmeans.k_means(y_matrix, n_clusters)
    mat_plot_cluster_sample(data, cluster_group, method)
    """
    # 调用自己写的bi_k_means函数
    """center_list, cluster_assign = bikmeans.exe_bi_k_means(y_matrix, n_clusters)
    labels = cluster_assign[:, 0]
    mat_plot_cluster_sample(data, labels. method)

    # 调用sklearn中的KMeans函数，效果比自己写的强了好多
    k_means = KMeans(n_clusters)
    k_means.fit(y_matrix)
    #k_centers = k_means.cluster_centers_
    #mat_plot_cluster_sample(data, k_means.labels_, method)
    """
    # 调用sklearn中的hierarchical 聚类方法进行聚类
    hie_cluster = Ward(n_clusters)
    hie_cluster.fit(y_matrix)
    mat_plot_cluster_sample(data, hie_cluster.labels_, method)

开发者ID:Yayong-guan，项目名称:mlcode，代码行数:51，代码来源:spectral_cluster.py

示例8: ward

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
 def ward(self, X, n_clusters, plot=True):
     k_means = Ward(n_clusters=n_clusters, copy=False, compute_full_tree=True, memory="cache")
     k_means.fit(X)
     labels = k_means.labels_
     
     pl.close('all')
     pl.figure(1)
     pl.clf()
     
     if plot:
         colors = "rbgcmybgrcmybgrcmybgrcm" * 10
         X2d = RandomizedPCA(n_components=2).fit_transform(X)
         for i in xrange(len(X2d)):
             x = X2d[i]
             pl.plot(x[0], x[1], "o", markerfacecolor=colors[labels[i]], markeredgecolor=colors[labels[i]], alpha=0.035)
         pl.show()
     
     return k_means.labels_

开发者ID:zaycev，项目名称:n7，代码行数:20，代码来源:cluster.py

示例9: cluster_ward

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
 def cluster_ward(self, calpha=True):
     '''
     cluster the positively predicted residues using the Ward method.
     Returns a list of cluster labels the same length as the number of positively predicted residues.
     '''
     
     if calpha:
         data_atoms = self.positive_surface_residues.ca
     #else:
     #    data_atoms = self.positive_surface_residues.select('ca or sidechain').copy()
     if data_atoms.getCoords().shape[0] < 4:
         print self.pdbid, data_atoms.getCoords().shape
         return {}
     connectivity = kneighbors_graph(data_atoms.getCoords(), 5)
     ward = Ward(n_clusters=self.WARD_N_CLUSTERS, connectivity=connectivity)
     ward.fit(data_atoms.getCoords())
     resnums = data_atoms.getResnums()
     reslabels = ward.labels_
     clusters = sorted([resnums[reslabels==i] for i in set(reslabels)], key=len, reverse=True)
     return dict(enumerate(clusters))

开发者ID:asaladin，项目名称:peptalk，代码行数:22，代码来源:peptalk.py

示例10: compute_clusters

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def compute_clusters(dataset,features_vector):
    """
    Apply clustering method
    """

    labels = dataset.target
    true_k = np.unique(labels).shape[0]
    
    # Run clustering method
    print "Performing clustering with method ", cmd_options.clust_method.upper()
    print

    if(cmd_options.clust_method == "hclust"):
        result = features_vector.toarray()
        ward = Ward(n_clusters=true_k)
        ward.fit(result) 

        return ward

    if(cmd_options.clust_method == "kmeans"):
        km = KMeans(n_clusters=true_k, init='k-means++', max_iter=1000, verbose=1)
        km.fit(features_vector)

        return km

开发者ID:arianpasquali，项目名称:textmining，代码行数:26，代码来源:text_clustering.py

示例11: ward

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
def ward(X, n_clust):
    "H"

    ward = Ward(n_clusters=n_clust)
    ward.fit(X)
    return ward

开发者ID:lixiangchun，项目名称:MCP05，代码行数:8，代码来源:utils.py

示例12: Ward

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
from sklearn.cluster import Ward

ward = Ward(n_clusters=3)

n_samples = np.logspace(.5, 3, 9)
n_features = np.logspace(1, 3.5, 7)
N_samples, N_features = np.meshgrid(n_samples,
                                    n_features)
scikits_time = np.zeros(N_samples.shape)
scipy_time = np.zeros(N_samples.shape)

for i, n in enumerate(n_samples):
    for j, p in enumerate(n_features):
        X = np.random.normal(size=(n, p))
        t0 = time.time()
        ward.fit(X)
        scikits_time[j, i] = time.time() - t0
        t0 = time.time()
        hierarchy.ward(X)
        scipy_time[j, i] = time.time() - t0

ratio = scikits_time / scipy_time

pl.figure("scikit-learn Ward's method benchmark results")
pl.imshow(np.log(ratio), aspect='auto', origin="lower")
pl.colorbar()
pl.contour(ratio, levels=[1, ], colors='k')
pl.yticks(range(len(n_features)), n_features.astype(np.int))
pl.ylabel('N features')
pl.xticks(range(len(n_samples)), n_samples.astype(np.int))
pl.xlabel('N samples')

开发者ID:2011200799，项目名称:scikit-learn，代码行数:33，代码来源:bench_plot_ward.py

示例13: print

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
print("Homogeneity k-means: %0.3f" % metrics.homogeneity_score(labels, km.labels_))
print("Completeness k-means: %0.3f" % metrics.completeness_score(labels, km.labels_))
print("V-measure k-means: %0.3f" % metrics.v_measure_score(labels, km.labels_))
print("Silhouette Coefficient k-means: %0.3f" % metrics.silhouette_score(clustering, km.labels_, sample_size = 8000))

# DBSCAN
# Structured hierarchical clustering
db = DBSCAN()
db.fit(clustering)
print 'DBSCAN clusters created..'

print("Homogeneity DBSCAN: %0.3f" % metrics.homogeneity_score(labels, db.labels_))
print("Completeness DBSCAN: %0.3f" % metrics.completeness_score(labels, db.labels_))
print("V-measure DBSCAN: %0.3f" % metrics.v_measure_score(labels, db.labels_))
print("Silhouette Coefficient DBSCAN: %0.3f" % metrics.silhouette_score(clustering, db.labels_, sample_size = 5000))

# Structured hierarchical clustering
ward = Ward(n_clusters = 9)
ward.fit(clustering)
print 'Hierarchical clusters created..'

print("Homogeneity hierarchical: %0.3f" % metrics.homogeneity_score(labels, ward.labels_))
print("Completeness hierarchical: %0.3f" % metrics.completeness_score(labels, ward.labels_))
print("V-measure hierarchical: %0.3f" % metrics.v_measure_score(labels, ward.labels_))
print("Silhouette Coefficient hierarchical: %0.3f" % metrics.silhouette_score(clustering, ward.labels_, sample_size = 5000))

开发者ID:colibri17，项目名称:TextCategorization，代码行数:27，代码来源:main.py

示例14: encode

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
    def encode(self, interm_rep, neighborhood_size = 26,
               clust_ratio=10,
               encoding='geometrical',
               similarity_measure='pearson',
               threshold=0.3, n_jobs=1, **kwds):
        """
        Parameters
        ----------
        interm_rep: IntermRep
            IntermRep object containing the arr_xyz and arr_voxel matrixes.
        neighborhood_size: int
            Number of neighbors each voxel will be connected to.
        clust_ratio: int
            The number of clusters will be equal to n/clust_ratio, where n is
            the number of voxels.
        encoding: string
            Type of encoding. 'geometrical' and 'functional' are allowed.
        similarity_measure: string
            Similarity measure used to compare the representative value of each
            parcel (cluster). 'pearson' or the measures available in scikit-learn
            are allowed.
        threshold: float
            Threshold applied to the similarity values in order to define the
            edges in the graph.

        Returns
        -------
        g: Graph
            Networkx graph representing the graph encoding of the data.
        """

        #computing the connectivity matrix, each voxel is connected to
        #"neighborhood_size" neighbors.
        #
        conn = kneighbors_graph(interm_rep.arr_xyz, n_neighbors=neighborhood_size)
#        conn_n = kneighbors_graph(interm_rep.arr_xyz, n_neighbors=neighborhood_size)
#        conn_r = radius_neighbors_graph(interm_rep.arr_xyz, radius=10)
#        conn = conn_n * conn_r

        #Hierarchical clustering algorithm. The number of clusters is defined
        #accoring to the parameter "clust_ratio".
        ward = Ward(n_clusters=len(interm_rep.arr_xyz)/clust_ratio, connectivity=conn)
        #ward = Ward(n_clusters=60, connectivity=conn)

        #Type of encoding: geometrical (only xyz data is used) or
        # functional (voxel time series is used).
        if encoding=='geometrical':
            ward.fit(interm_rep.arr_xyz)
        elif encoding=='functional':
            ward.fit(interm_rep.arr_voxels)

        labels = ward.labels_

        #Plotting the voxels with the cluster labels.
        #pp.plot_clustering_intermediate_representation(interm_rep, labels*10)


        #Computing the unique cluster indentifiers
        l_unique = np.unique(labels)

        mean_voxels = np.zeros((len(l_unique), interm_rep.arr_voxels.shape[1]))
        mean_xyz = np.zeros((len(l_unique), interm_rep.arr_xyz.shape[1]))

        cont = 0
        for i in l_unique:
            #Taking the possitions corresponding to the same cluster.
            pos = np.where(labels == i)[0]
            #Taking data from these possitions and computing the mean time serie
            m_voxel = interm_rep.arr_voxels[pos].mean(0)
            #Taking the xyz from these positions and computing the mean value
            m_xyz = interm_rep.arr_xyz[pos].mean(0)

            mean_voxels[cont] = m_voxel
            mean_xyz[cont] = m_xyz

            cont += 1


        #plotting the voxels time series for each cluster
        #pp.plot_interm_representation_time_series(ir.IntermRep(mean_voxels, mean_xyz))

        #The new intermediate representation is given by mean_voxels and
        # mean_xyz.

        #Computing similarity matrix and applying the threshold
        adj_mat = np.zeros((len(mean_voxels), len(mean_voxels)),
                           dtype = np.byte)
        for j in range(len(mean_voxels) - 1):
            for k in range(j + 1, len(mean_voxels)):
                if similarity_measure == 'pearson':
                    aux = st.pearsonr(mean_voxels[j], mean_voxels[k])[0]
                else:
                    aux = skpw.pairwise_kernel(mean_voxels[j], mean_voxels[k],
                                               metric = similarity_measure,
                                               n_jobs = n_jobs)
                if aux >= threshold:
                    adj_mat[j,k] = 1
                    adj_mat[k,j] = 1


#.........这里部分代码省略.........

开发者ID:svegapons，项目名称:PyBDGK，代码行数:103，代码来源:GE_NeighConst_HCA.py

示例15: KMeans

# 需要导入模块: from sklearn.cluster import Ward [as 别名]
# 或者: from sklearn.cluster.Ward import fit [as 别名]
        print i
        train = pd.concat([train, pd.get_dummies(raw_train[i])], axis=1)
        
freq = train.groupby('Report ID').sum()
freq = freq.drop('Has Combined Queries', 1)


# Train Model #############################

num_cluster = 12

kmean = KMeans(n_clusters=num_cluster, max_iter=400, verbose = 0, n_jobs = 2, n_init=20, tol=1e-6)
model_kmean = kmean.fit(freq)
        
ward = Ward(n_clusters=num_cluster)
model_ward = ward.fit(freq)


from sklearn.neighbors import kneighbors_graph
connectivity = kneighbors_graph(freq, n_neighbors=4)

#ward = Ward(n_clusters=num_cluster, connectivity = connectivity)
#model_ward = ward.fit(freq)

# Visualization #####################################################

import mpl_toolkits.mplot3d.axes3d as p3
import pylab as pl
from sklearn.datasets.samples_generator import make_friedman3

def plot(model, data, name):

开发者ID:simengy，项目名称:Report-Clustering，代码行数:33，代码来源:report.py

注：本文中的sklearn.cluster.Ward.fit方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。