当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.n_clusters方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.n_clusters方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.n_clusters方法的具体用法?Python KMeans.n_clusters怎么用?Python KMeans.n_clusters使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.n_clusters方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: estimate_clusters

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import n_clusters [as 别名]
def estimate_clusters(data):
    features, _, labels = data
    scores = []
    estimator = KMeans()
    n_clusters = features.shape[1]
    for n in range(1, n_clusters):
        estimator.n_clusters = n
        score = np.mean(cross_val_score(estimator, features, labels, scoring='adjusted_rand_score'))
        scores.append([n, score])
    df = pd.DataFrame.from_records(scores, columns=['clusters', 'score'])
    df['algo'] = 'kmeans'
    return df
开发者ID:babineaum,项目名称:bag-of-algorithms,代码行数:14,代码来源:kmeans_eval.py

示例2: clusterize_data

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import n_clusters [as 别名]
def clusterize_data(data, k=None, range_k=list(range(2,10)),algorithm='k-means'):
    """
    Clusterize the data by the algorithm with the specified value of 'k'. If the parameter 'k' is None it finds the "best" partition within 
    the determinaded range of the number of clusters. The clustering algorithm is iterativily  with all the values of 'k' in the 'range_k' variable 
    and each partition is evaluated by the silhouette index. The one that result in a better index value is returned.

    Parameters:

        data: array of floats [n_samples,n_features]

            The data to be clustered.

        k: integer, greather than 2. default: None

            The number of clusters of the data. If this value is None tham the value is indicated by the silhouette index.

        range_k: list of integers. default [2,...,9]

            The list of the possible number of clusters of data. The lowest value can not be smaller than 2. The greathest value can not be greather than n_samples -1.

        algorithm: string, defaul:'k-means'

            The clustering algorithm to be used. Allowed: ['k-means','hierarchical-average','hierarchical-complete','hierarchical-single']

    Returns:

        labels: list of integers [n_samples,]

            A list of integers assigning each sample to a cluster.
    """

    # Check the input algorithm
    allowed_algs = ['k-means','hierarchical-average','hierarchical-complete','hierarchical-single']
    if algorithm not in allowed_algs:
        raise ValueError('Algorithm not allowed: \'' + algorithm + '\'. Allowed ones: [' + ','.join(allowed_algs) + ']')

    # Check the number of clusters input
    if k is not None:
        if  k < 2:
            raise ValueError('Invalid value of "k". It should be greather than 2')
        else:
            range_k = [k]

    # Set the classifier
    km = None
    Z = None
    if 'k-means' in algorithm:
        km = KMeans()
    else:
        # calculates the matrix distance and obtain the linkage matrix
        D = squareform(pdist(data))
        type_linkage = algorithm.split('-')[1]
        Z = hierarchy.linkage(D,type_linkage)

    labels_k = []
    silhouette_k = []


    # For each value of k clusterize by kmeans and evaluates the silhouette.
    for k in range_k:
        l_k = None
        if 'hierarchical' not in algorithm:
            km.n_clusters = k
            l_k = km.fit_predict(data) 
        else:
            l_k = hierarchy.fcluster(Z,k,criterion='maxclust')

        s_k = silhouette_score(data,l_k)
        labels_k.append(l_k)
        silhouette_k.append(s_k)

    # Finds the labels with the best [maximum] silhouette index and return it.
    return labels_k[np.argmax(silhouette_k)]
开发者ID:pedropazzini,项目名称:factor_analysis,代码行数:75,代码来源:factor_analysis_script.py

示例3: cell_assemblies

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import n_clusters [as 别名]
def cell_assemblies(binned_counts, null_method = 'analytical', wts_method = 'pca', rm_diagonal=False):
    if null_method == 'bin_shuffle' and wts_method == 'av':
        raise Exception('AV method needs analytical null_method')


    T, nn = binned_counts.shape

    zsc_binned_spikes = zsc(binned_counts)

    #Get correlation matrix:
    C = np.mat(zsc_binned_spikes).T*np.mat(zsc_binned_spikes) / float(zsc_binned_spikes.shape[0])

    # Analytical distribuiton of eigenvalues: 
    if null_method == 'analytical':
        q = T/float(nn) 
        sig_sq = 1 #Zscored
        lamb_max = sig_sq*(1+np.sqrt(1/q))**2
        lamb_min = sig_sq*(1-np.sqrt(1/q))**2

        lambda_vect = np.arange(lamb_min, lamb_max, .01)
        p_lambda = q/(2*np.pi) * (np.sqrt((lamb_max - lambda_vect)*(lambda_vect - lamb_min))/(lambda_vect))
        null_cutoff = lamb_max
        #plt.plot(lambda_vect, p_lambda)

    # Time shuffle method: 
    elif null_method == 'bin_shuffle':
        eigs = []
        bins = np.linspace(0., 1.5, 50)
        for sim in range(100):
            shuff_bin = np.zeros_like(zsc_binned_spikes)
            for n in range(nn):
                tmp = zsc_binned_spikes[:, n]
                np.random.shuffle(tmp)
                shuff_bin[:, n] = tmp
            C_shuf = (shuff_bin.T * np.mat(shuff_bin))/T
            e = np.linalg.eigvalsh(C_shuf)
            n, x  = np.histogram(e, bins)
            print 'sim: ', sim
            eigs.append(list(n))
        tmp = np.cumsum(np.sum(np.vstack((eigs)), axis=0))
        tmp2 = tmp / float(np.max(tmp))
        null_ = np.nonzero(tmp2==1.)[0]
        if len(null_)> 0:
            null_cutoff = bins[null_[0]]
        else:
            raise Exception('No null cutoff, method: bin_shuffle')

        # plt.plot(bins[1:], np.mean(np.vstack((eigs)), axis=0))
        # plt.plot(lambda_vect, p_lambda, 'r-')


    C_adj = C.copy()
    if rm_diagonal:
        for i in range(C.shape[0]): C_adj[i,i]=0

    w, v = np.linalg.eig(C_adj)
    eig_sig = w > null_cutoff
    vect_sig = v[:, eig_sig]

    ############# PCA METHOD ################
    if wts_method== 'pca':

        #Calculate activation strength
        R = np.zeros((T, np.sum(eig_sig)))
        for t in range(T):
            for e in range(np.sum(eig_sig)):
                R[t, e] = zsc_binned_spikes[t,:]*vect_sig[:,e]*vect_sig[:,e].T*np.mat(zsc_binned_spikes[t,:]).T
        return R, vect_sig, w[eig_sig], 0, 0

    ############# AV METHOD ################
    elif wts_method == 'av':
        PAS = vect_sig*vect_sig.T
        N = PAS*C

        #Length of 'neuron vector':
        N_len = np.linalg.norm(N, axis=1)

        #Number of sig neurons: 
        sig_ix = np.nonzero(np.logical_or(w > lamb_max, w < lamb_min))[0]
        nn_sig = len(sig_ix)

        M = np.zeros((nn, nn_sig))
        for i in range(nn):
            for ij, j in enumerate(sig_ix):
                M[i, ij] = N[:, i].T * N[:, j]

        from sklearn.cluster import KMeans
        KM = KMeans()
        KM.n_clusters = 2
        KM.fit(M.reshape(1, -1).T)
        s = KM.predict(M.reshape(1, -1).T)
        #Check which is higher, group 1 or 0
        ix1 = np.nonzero(s==1.) 
        ix0 = np.nonzero(s==0.)

        if np.mean(s[ix1])>np.mean(s[ix0]):
            #Use ix1:
            ix_corr = ix1
        else:
            ix_corr = ix0
#.........这里部分代码省略.........
开发者ID:pkhanna104,项目名称:fa_analysis,代码行数:103,代码来源:ganguly_analysis_w_sg_data.py

示例4: test

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import n_clusters [as 别名]
def test(board):
    k = KMeans()
    k.n_clusters = 8
    k.fit(norm(board))
    return k 
开发者ID:powerresearch,项目名称:Poker-Bot-Reformed,代码行数:7,代码来源:make_toy_data.py


注:本文中的sklearn.cluster.KMeans.n_clusters方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。