当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.fit_predict方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.fit_predict方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.fit_predict方法的具体用法?Python KMeans.fit_predict怎么用?Python KMeans.fit_predict使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.fit_predict方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: kmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
 def kmeans(self, concepts, labels, m, method = "kmeans"):
     ### Do kmeans for vectors of concepts, return a list of cluter assigned
     ### rtype  List[int]
     ### possible methods: kmeans, agg-ward, agg-complete, agg-average
     X = []
     k = len(set(labels))
     for concept in concepts:
         X.append(m[concept])
     if method == 'kmeans':
         km = KMeans(n_clusters=k, random_state=0)
     elif method == 'agg-ward':
         km = AgglomerativeClustering(n_clusters=k, affinity='cosine')
     elif method == 'agg-complete':
         km = AgglomerativeClustering(n_clusters=k, affinity='cosine', linkage='complete')
     elif method == 'agg-average':
         km = AgglomerativeClustering(n_clusters=k, affinity='cosine', linkage='average')
     elif method == 'agg_ward':
         km = AgglomerativeClustering(n_clusters=k)
     elif method == 'agg_complete':
         km = AgglomerativeClustering(n_clusters=k, linkage='complete')
     elif method == 'agg_average':
         km = AgglomerativeClustering(n_clusters=k, linkage='average')
     elif method == 'test':
         km = AgglomerativeClustering(n_clusters=k, linkage='average', affinity='l2')
     km.fit_predict(X)
     return km.labels_
开发者ID:pkumusic,项目名称:HCE,代码行数:28,代码来源:conceptcat_senna.py

示例2: clustering_by_kmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def clustering_by_kmeans(vectorizer, X, true_k):
    print "Clustering in " + str(true_k) + " groups by K-means..."
    km = KMeans(n_clusters=true_k, init='k-means++', max_iter=500, n_init=1)
    km.fit_predict(X)

    print "Measuring..."

    print("Homogeneity: %0.3f" % metrics.homogeneity_score(documents, km.labels_))
    print("Completeness: %0.3f" % metrics.completeness_score(documents, km.labels_))
    print("V-measure: %0.3f" % metrics.v_measure_score(documents, km.labels_))  #V-measure is an entropy-based measure which explicitly measures how successfully the criteria of homogeneity and completeness have been satisfied.
    print("Adjusted Rand-Index: %.3f"   % metrics.adjusted_rand_score(documents, km.labels_))
    print("Silhouette Coefficient: %0.3f" % metrics.silhouette_score(X, km.labels_, sample_size=1000))
    #print top terms per cluster clusters

    clusters = km.labels_.tolist()  # 0 iff term is in cluster0, 1 iff term is in cluster1 ...  (lista de termos)
    #print "Lista de termos pertencentes aos clusters " + str(clusters)
    print "Total de " + str(len(km.labels_)) + " documents"

    #Example to get all documents in cluster 0
    #cluster_0 = np.where(clusters==0) # don't forget import numpy as np
    #print cluster_0
    #cluster_0 now contains all indices of the documents in this cluster, to get the actual documents you'd do:
    #X_cluster_0 = documents[cluster_0]
    terms = vectorizer.get_feature_names()

    #print terms
    measuring_kmeans(true_k,clusters)
开发者ID:eubr-bigsea,项目名称:Tweets-cluster,代码行数:29,代码来源:tf_idf.py

示例3: kmeans_predict_center_vectors

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
 def kmeans_predict_center_vectors(self, contexts, k):
     #print(contexts)
     if not contexts:
         return None
     # get the data *(maybe some x not exist in dict. so we may need to filter it out)
     X = []
     for context in contexts:
         if context in self.m:
             ## TODO: to see if we need to normalize it.
             #X.append(self.normalize_vector(self.m[context]))
             X.append(self.m[context])
     if not contexts:
         return None
     #X = map(lambda x: self.m[x], contexts)
     if k < len(X):
         kmeans = KMeans(n_clusters = k, random_state = 1)
     else:
         #kmeans = KMeans(n_clusters = len(X), random_state = 1)
         return False
     kmeans.fit_predict(X)
     #print af
     cluster_centers = kmeans.cluster_centers_
     label_counter = Counter(kmeans.labels_).most_common()
     center_vectors = [[],[]]
     for (indice, count) in label_counter:
         center_vectors[0].append(cluster_centers[indice])
         center_vectors[1].append(count)
     #print center_vectors
     return center_vectors      
开发者ID:pkumusic,项目名称:HCE,代码行数:31,代码来源:classification_af.py

示例4: kmeans_logistic

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def kmeans_logistic():

    # Case 2: for unsupervised/semisupervised
    article_df2               = getScrapedContent(False)
    df2_content               = article_df2[['content','date']]
    df2_date                  = article_df2['date']
    sp_df                     = getHistoricalVolatility()
    X, y_vol                  = combineHistVolColumn(df2_content, sp_df)

    # generate vectorized clfv
    tfidf, clfv = generate_tfidf(X['content'])

    clf = KMeans(n_clusters=10, init='k-means++', max_iter=100) #, n_init=1)
    clf.fit_predict(clfv)
    labels = clf.labels_

    # with open ('kmeans_km_model.pkl', 'wb') as fid:
    #     cPickle.dump(clf, fid)

    print '\nSilouette score :', str(silhouette_score(clfv, labels, metric='euclidean')) + '\n'

    X_train, X_test, y_train, y_test = train_test_split(clfv, labels, test_size=0.4, random_state=42)        

    clf_lr = LogisticRegression()
    clf_lr.fit(X_train, y_train)

    y_pred = clf_lr.predict(X_test)

    with open ('km_lr_tfi_model.pkl', 'wb') as fid:
        cPickle.dump((clf_lr, tfidf), fid)
    
    displayScore(clf_lr, X_train, y_train, X_test, y_test, y_pred)
开发者ID:ethancheung2013,项目名称:VolatilityPrediction,代码行数:34,代码来源:sentimentmodel.py

示例5: categorise_dataset

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def categorise_dataset(contents):
    iris_setosa = []
    iris_versicolor = []
    iris_virginica = []
    for each_tuple in contents:
        if each_tuple[4] == 'Iris-virginica':
            iris_virginica.append(each_tuple[:4])
        elif each_tuple[4] == 'Iris-versicolor':
            iris_versicolor.append(each_tuple[:4])
        elif each_tuple[4] == 'Iris-setosa':
            iris_setosa.append(each_tuple[:4])

    kwargs = {
        'n_init': 5,
        # depends on number of cores in your machine.
        'n_jobs': 3,
        'n_clusters': 3,
    }
    kmeans = KMeans()
    kmeans.set_params(**kwargs)
    # apply kmeans
    iris_setosa_centroids_indices = kmeans.fit_predict(np.array(iris_setosa))
    iris_setosa_centroids = kmeans.cluster_centers_

    iris_versicolor_centroids_indices = kmeans.fit_predict(np.array(iris_versicolor))
    iris_versicolor_centroids = kmeans.cluster_centers_

    iris_virginica_centroids_indices = kmeans.fit_predict(np.array(iris_virginica))
    iris_virginica_centroids = kmeans.cluster_centers_
    return (iris_setosa_centroids,
            iris_versicolor_centroids,
            iris_virginica_centroids)
开发者ID:sreeram-boyapati,项目名称:general-codes,代码行数:34,代码来源:iris_kmeans.py

示例6: cluster

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
    def cluster(D, k=3, verbose=False):
        """Cluster LDS's via Multi-Dimensional Scaling and KMeans.

        Strategy:
            1. Build NxN matrix of pairwise similarities
            2. Run MDS to embed data in R^2
            3. Run KMeans with k cluster centers
            4. Find samples closest to the k centers

        Paramters:
        ----------
        D: numpy.ndarray, shape = (N, N)
            Precomputed distance matrix.

        k: int (default: 3)
            Number of desired cluster centers.

        verbose: boolean
            Enable verbose output.

        Returns:
        --------
        eData: numpy.ndarray, shape (N, k)
            N d-dimensional samples embedded in R^d.

        ids: numpy.ndarray, shape = (k,)
            List of indices identifying the k representatives.
        """

        assert D.shape[0] == D.shape[1], "OOps (distance matrix not square)!"

        # build MDS for precomputed similarity matrix
        mds = MDS(metric=True, n_components=2, verbose=True,
                  dissimilarity="precomputed")

        def __symmetrize(A):
            return A + A.T - np.diag(A.diagonal())

        # run MDS on symmetrized similarity matrix
        eData = mds.fit(__symmetrize(D)).embedding_

        kmObj = KMeans(k)
        kmObj.fit_predict(eData)

        ids = np.zeros((k,), dtype=np.int)
        for i in range(k):
            # sanity check
            cDat = eData[np.where(kmObj.labels_ == i)[0],:]
            assert len(cDat) > 0, "Oops, empty cluster ..."

            kCen = kmObj.cluster_centers_[i,:]
            x = euclidean_distances(eData, kCen)
            ids[i] = int(np.argsort(x.ravel())[0])

        # return distance matrix and ID's of representative LDS's
        return (eData, ids)
开发者ID:KitwareMedical,项目名称:pydstk,代码行数:58,代码来源:system.py

示例7: showKMeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def showKMeans(X, N):
    scores = []
    for number in xrange(N / 6, N / 2):
        clustering = KMeans(n_clusters=number, max_iter=MAX_ITER, n_init=N_INIT, n_jobs=N_JOBS )
        clustering.fit_predict(X)
        scores.append(clustering.score(X))
    plt.plot(scores)
    plt.xlabel(XLABEL)
    plt.ylabel(YLABEL)
    plt.show()
开发者ID:jeka3230,项目名称:Pattern-recognition,代码行数:12,代码来源:Clustering.py

示例8: Kmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
    def Kmeans(self, fase):
        # lista_duracao, lista_nome_fase, lista_cpi_fase, lista_id_projeto_fase, lista_real_acum_fase,
        # lista_est_acum_fase, lista_real_acum_projeto, lista_est_acum_projeto, lista_perfil_equipe_fase,
        # lista_num_atividades, lista_cpi_projeto
        cluster = np.array(zip(fase[:,4], fase[:,5], fase[:, 8], fase[:, 9], fase[:]))

        ap = KMeans(n_clusters= 5, init = 'k-means++')
        ap.fit_predict(cluster)

        clusters_centers_indices = ap.cluster_centers_
        labels = ap.labels_
        n_clusters_ = len(clusters_centers_indices)

        # print(clusters_centers_indices)
        # print(labels)

        # print "Projeto"
        # agrupamento = list()
        # for j in range(0, n_clusters_):
        #     i = 0
        #     for label in labels:
        #         if(j == label):
        #            print str(cluster[i]) + " id " + str(fase[i][3]) + " label " + str (labels[i])+ " cpi " + str(fase[i][2])
        #         i+=1
        cluster_0, cluster_1, cluster_2, cluster_3, cluster_4 = [], [], [], [], []
        lista_1, lista_2, lista_3, lista_4, lista_5  = [], [], [], [], []
        cpi_1, cpi_2, cpi_3, cpi_4, cpi_5 = [], [], [], [], []
        i=0
        for i in range(len(cluster)):
            if (labels[i] == 0):
                cluster_0.append(cluster)
                lista_1.append(fase[i][3])
                cpi_1.append(fase[i][2])
            elif (labels[i]== 1):
                cluster_1.append(cluster)
                lista_2.append(fase[i][3])
                cpi_2.append(fase[i][2])
            elif (labels[i]== 2):
                cluster_2.append(cluster)
                lista_3.append(fase[i][3])
                cpi_3.append(fase[i][2])
            elif (labels[i]== 3):
                cluster_3.append(cluster)
                lista_4.append(fase[i][3])
                cpi_4.append(fase[i][2])
            elif (labels[i]== 4):
                cluster_4.append(cluster)
                lista_5.append(fase[i][3])
                cpi_5.append(fase[i][2])
        # print "cluster 1"+str(lista_1)+ "cluster 2" +str(lista_2)+ "cluster 3" +str(lista_3)+ "cluster 4" +str(lista_4)+ "cluster 5" +str(lista_5)
        # print "cluster 1" + str(cpi_1) + "cluster 2" + str(cpi_2) + "cluster 3" + str(cpi_3) + "cluster 4" + str(cpi_4) + "cluster 5" + str(cpi_5)

        cluster_fase = np.array(list(zip(cluster_0, cluster_1, cluster_2, cluster_3, cluster_4)))
        return cluster_fase
开发者ID:anaFernandes,项目名称:EVM,代码行数:56,代码来源:K_Means.py

示例9: cluster

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def cluster():
    normalizedFoods = foods[learningColumns] / foods[learningColumns].max()
    km = KMeans()
    km.fit_predict(normalizedFoods)

    #print(km.labels_)

    foods["Cluster"] = km.labels_
    #print km.cluster_centers_
    clusterGroups = foods.groupby("Cluster")
    print(clusterGroups[learningColumns].mean())
开发者ID:lgaud,项目名称:NutrientDensity,代码行数:13,代码来源:Foods.py

示例10: set_obs

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
    def set_obs(self, pts, draw=False, n_resets=20):

        d = pts.shape[1]
        cmplx_pts = self.discretize_cmplx(pts_per_simplex=10)

        pts_h = np.c_[pts, np.ones(pts.shape[0])]

        kmeans = KMeans(init="k-means++", n_clusters=self.N)
        kmeans.fit_predict(pts)
        centroids = np.c_[kmeans.cluster_centers_, np.ones(self.N)]

        s_centers = []
        for s in self.cmplx.simplices.values():
            s_centers.append(s.global_coords([0.5]))
        s_centers = np.c_[np.array(s_centers), np.ones(self.N)]

        best_cost = np.inf
        best_R = None

        import time

        obs_sigma = self.obs_sigma
        self.obs_sigma = 1
    
        for n in range(n_resets):
            start = time.time()
            s_indices = range(self.N)
            k_indices = range(self.N)
            np.random.shuffle(s_indices)
            np.random.shuffle(k_indices)
            X = centroids[k_indices[:d+1]]
            Y = s_centers[s_indices[:d+1]]
            R = np.linalg.lstsq(X, Y)[0]
            pts_w = np.dot(pts_h, R)[:, :-1]
            self._set_obs(pts, pts_w)
            if draw:
                ## for debugging
                self.draw(block=True, show=False, outf='../figs/debug/registered_{}.png'.format(n))
            warped_cmplx = self.warp_cmplx()
            distmat = ssd.cdist(warped_cmplx, pts, 'sqeuclidean')
            cost = np.sum(np.min(distmat, axis=1))
            # cost = self.gp_ll()
            if cost < best_cost:
                best_cost = cost
                best_R = R

            print n, best_cost, cost, time.time() - start, -self.gp_ll()
        
        pts_w = np.dot(pts_h, best_R)[:, :-1]
        self.obs_sigma = obs_sigma
        self._set_obs(pts, pts_w)
        
        if draw:
            self.draw(block=True)
开发者ID:dhadfieldmenell,项目名称:BayesGeom,代码行数:56,代码来源:bayesmesh.py

示例11: build_clusters

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def build_clusters(df, n_clusters=4):
    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit_predict(df[['count_entries_norm', 'max_norm']])
    cluster_data = {}
    cluster_data['labels'] = kmeans.labels_
    cluster_data['cluster_centers'] = kmeans.cluster_centers_
    clusters = defaultdict(list)
    for i, label in enumerate(kmeans.labels_):
        bv = df.index[i]
        clusters[label].append(bv)
    cluster_data['clusters'] = clusters
    return cluster_data
开发者ID:evethandar,项目名称:habakkuk,代码行数:14,代码来源:topic_clustering.py

示例12: cluster_kmeans_signal

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
	def cluster_kmeans_signal(self, signal, nClusters, featureName):
		"""
		Not used
		:param signal: matrix representing a signal, needs to be applied reshape(-1, 1)
		:param nClusters: number of clusters
		:return: predicted labels
		"""
		#(TODO): save model for clustering for each feature
		model = KMeans(n_clusters= nClusters)
		model.fit_predict(signal)
		self.models[featureName] = model
		return model.labels_
开发者ID:mascetti101,项目名称:behaviour_profiling,代码行数:14,代码来源:WordData.py

示例13: __init__

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
 def __init__(self,data,**kwargs):
     """
     Wrapper for kmeans clustering
     """
     clusterer = KMeans(**kwargs)
     clusterer.fit_predict(data)
     self.centroid = clusterer.cluster_centers_
     self.labels = clusterer.labels_
     self.output = pd.DataFrame(columns=data.columns.tolist()+['cluster_size'])
     
     for k in np.unique(self.labels):
         self.output.loc[k]=data[self.labels==k].median(axis=0)
         self.output.loc[k]['cluster_size'] = np.sum(self.labels==k)
开发者ID:davidpng,项目名称:Erythroid-Maturation,代码行数:15,代码来源:clustering.py

示例14: summarize

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
    def summarize(self, document_path):
        allwords = {}
        sentences = []

        with open(document_path, 'r') as f:
            index = 0
            for line in f:
                s = Util.tokenize(line, Summarizer.non_space)
                sentence = []
                for w in s:
                    sentence.append(w)
                    if Summarizer.sentence_terminator.search(w):
                        sent = Sentence(sentence, Summarizer.punctuation, Summarizer.stopwords, Summarizer.stemmer)
                        sentences.append(sent)
                        for t in sent.stemmed:
                            if t not in allwords:
                                allwords[t] = index
                                index += 1
                        sentence = []

        matrix = np.zeros((len(sentences), len(allwords)))
        for i, sent in enumerate(sentences):
            for t in sent.stemmed:
                matrix[i, allwords[t]] = Util.tfidf(t, self.tf, self.df, Summarizer.NUM_DOCS)

        # Normalize
        normalizer = np.reshape(np.sum(matrix**2, axis=1)**0.5, (len(matrix), 1))
        matrix /= normalizer

        model = KMeans(n_clusters=Cluster.NUM_CLUSTERS, tol=1e-9)
        model.fit_predict(np.nan_to_num(matrix))
        labels = model.labels_

        totalWords = 0
        selected = []

        # From each cluster, pick the sentence that is nearest to the cluster
        # centroid
        for i in range(Cluster.NUM_CLUSTERS):
            member_indices = np.where(labels == i)
            distances = np.dot(matrix[member_indices], model.cluster_centers_[i])
            closest_index = np.argmin(distances, 0)
            # 'closest_index' is the index into the member_indices array
            member_index = member_indices[0][closest_index]
            selected.append((member_index, sentences[member_index]))  # stash the index of the sentence as well
            totalWords += sentences[member_index].getLength()
            if totalWords > 100:
                break

        # return the selected sentences in their order of appearance in the document
        return [s[1] for s in sorted(selected, key=lambda x: x[0])]
开发者ID:ChenluJi,项目名称:cs224n-project,代码行数:53,代码来源:cluster.py

示例15: segment_labeling

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_predict [as 别名]
def segment_labeling(x, boundaries, c_method='kmeans', k=5):
    x_sync = librosa.feature.sync(x.T, boundaries)

    if c_method == 'kmeans':
        c = KMeans(n_clusters=k, n_init=100)
        seg_labels = c.fit_predict(x_sync.T)
    elif c_method == 'agglomerative':
        z = hierarchy.linkage(x_sync.T, method='ward')
        t = k * np.max(z[:, 2])
        seg_labels = hierarchy.fcluster(z, t=t, criterion='distance')
    else:
        c = KMeans(n_clusters=k, n_init=100)
        seg_labels = c.fit_predict(x_sync.T)

    return seg_labels
开发者ID:wangsix,项目名称:segment,代码行数:17,代码来源:segment.py


注:本文中的sklearn.cluster.KMeans.fit_predict方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。