当前位置: 首页>>代码示例>>Python>>正文


Python KMeans.fit_transform方法代码示例

本文整理汇总了Python中sklearn.cluster.KMeans.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.fit_transform方法的具体用法?Python KMeans.fit_transform怎么用?Python KMeans.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.KMeans的用法示例。


在下文中一共展示了KMeans.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: mfcc_clustering

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def mfcc_clustering(file_name, n_clusters):
    """
    From Prem
    :return:
    """

    clusterer = KMeans(n_clusters=n_clusters)

    print(file_name)
    mix, sr = librosa.load(file_name)
    mix_stft = librosa.stft(mix)
    comps, acts = find_template(mix_stft, sr, 100, 101, 0, mix_stft.shape[1])
    cluster_comps = librosa.feature.mfcc(S=comps)[1:14]
    save_mfcc_img(file_name[:-4] + "_mfcc.png", np.flipud(cluster_comps))
    clusterer.fit_transform(cluster_comps.T)
    labels = clusterer.labels_
    # print(labels)
    sources = []

    for cluster_index in range(n_clusters):
        indices = np.where(labels == cluster_index)[0]
        template, residual = extract_template(comps[:, indices], mix_stft)
        t = librosa.istft(template)
        sources.append(t)

    return np.array(sources)
开发者ID:ethman,项目名称:prediction,代码行数:28,代码来源:mfcc_exp1.py

示例2: get_domi_color_new_image

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def get_domi_color_new_image(image, n_clusters=2):
    '''
    INPUT:
        image: numpy array
        n_clusters: integer

    OUTPUT:
        domi_color: numpy array
    '''
    
    if len(image.shape) == 3:
        image = transform.resize(image, (300,300,3))
    else:
        return -1

    # Flatten the image matrix:
    nrow, ncol, depth = image.shape 
    lst_of_pixels = [image[irow][icol] for irow in range(nrow) for icol in range(ncol)]

    # Clustering the colors of each pixel:
    kmean = KMeans(n_clusters=n_clusters)
    kmean.fit_transform(lst_of_pixels)
    domi_colors = kmean.cluster_centers_

    # Get the dominant color of the furniture (darker than the background):
    if np.mean(domi_colors[0]) < np.mean(domi_colors[1]):
        domi_color = domi_colors[0]
    else:
        domi_color = domi_colors[1]
    return domi_color
开发者ID:harpik-p,项目名称:furniture,代码行数:32,代码来源:image_processing.py

示例3: run_kmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def run_kmeans(vector=None, links=[], iters=500, clusters=8):
    km = KMeans(n_clusters=clusters, max_iters=iters)
    km.fit_transform(vec)
    clusters = defaultdict(list)
    for i in xrange(len(links)):
        clusters[km.labels[i]].append(links[i])
    for x in clusters:
        print x, clusters[x]
    return km.labels_
开发者ID:oleeson,项目名称:bigdata-final,代码行数:11,代码来源:functions.py

示例4: get_kmean_clusters

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
 def get_kmean_clusters(self,X):
     '''
     Returns labels of kmeans clustering
     INPUTS: X = feature matrix as 2d numpy float array
     OUTPUTS: KMeans cluster labels as 1d numpy array of strings
     '''
     kmeans = KMeans(5)
     kmeans.fit_transform(X)
     return kmeans.labels_ 
开发者ID:nhu2000,项目名称:carl_capstone,代码行数:11,代码来源:Modeler.py

示例5: wrapper_scikit

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def wrapper_scikit(K):
    pics_t = np.empty((pics.shape[0],np.power(pics.shape[1],2)))
    for i in range(pics_t.shape[0]):
        pics_t[i] = pics[i].flatten()
    time1 = time.time()
    kmean = KMeans(init='random', n_clusters=K)
    kmean.fit_transform(pics_t)
    time2 = time.time()
    return (time2-time1)*1000.
开发者ID:cs207-project,项目名称:algo_paper,代码行数:11,代码来源:problem3.py

示例6: findElbow

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def findElbow(features, n = 10):
    error = []
    for i in xrange(n):
        km = KMeans(n_clusters = i + 1)
        km.fit_transform(features)
        error.append(kmeansError(features, km))
    plt.figure(figsize=(10,10))
    plt.plot(range(1,n + 1),error,'k',linewidth=10)
    plt.plot(range(1,n + 1),error,'ko',markersize=25)
    plt.show()
开发者ID:kskk02,项目名称:evergreen_kaggle,代码行数:12,代码来源:evergreen.py

示例7: get_kmean_model

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def get_kmean_model(X, true_k, n_init=10, verbose=False):
    
   
    km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100,
                n_init=n_init, verbose=verbose)


    km.fit_transform(X)
   
    
    return km
开发者ID:ferranc,项目名称:blogproject,代码行数:13,代码来源:my_functions.py

示例8: train_model

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def train_model(texts, points, num_classses, model_dir, text_encoding='utf-8'):
	""" Given an iterable of (text, lat, lon) items, cluster the points into #num_classes and use
	them as labels, then extract unigram features, train a classifier and save it in models/model_name
	for future use. 

	Args:
	texts -- an iterable (e.g. a list) of texts e.g. ['this is the first text', 'this is the second text'].
	points -- an iterable (e.g. a list) of tuples in the form of (lat, lon) where coordinates are of type float e.g. [(1.2343, -10.239834r),(5.634534, -12.47563)]
	num_classes -- the number of desired clusters/labels/classes of the model.
	model_name -- the name of the directory within models/ that the model will be saved.
	"""
	
	if os.path.exists(model_dir):
		logging.error("Model directory " + model_dir + " already exists, please try another address.")
		sys.exit(-1)
	else:
		os.mkdir(model_dir)
	
	from sklearn.cluster import KMeans
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.linear_model.stochastic_gradient import SGDClassifier
	
	kmeans = KMeans(n_clusters=num_classses, random_state=0)
	points_arr = numpy.array(points)
	kmeans.fit_transform(points_arr)
	cluster_centers = kmeans.cluster_centers_
	sample_clusters = kmeans.labels_
	label_coordinate = {}
	for i in range(cluster_centers.shape[0]):
		lat, lon = cluster_centers[i, 0], cluster_centers[i, 1]
		label_coordinate[i] = (lat, lon)
	
	logging.info('extracting features from text...')
	vectorizer = TfidfVectorizer(encoding=text_encoding, stop_words='english', ngram_range=(1,1), max_df=0.5, min_df=0, binary=True, norm='l2', use_idf=True, smooth_idf=True, sublinear_tf=True)
	X_train = vectorizer.fit_transform(texts)
	Y_train = sample_clusters
	vectorizer.stop_words_ = None
	logging.info('the number of samples is %d and the number of features is %d' % (X_train.shape[0], X_train.shape[1]))
	
	logging.info('training the classifier...')
	logging.warn('Note that alpha (regularisation strength) should be tuned based on the performance on validation data.')
	clf = SGDClassifier(loss='log', penalty='elasticnet', alpha=5e-5, l1_ratio=0.9, fit_intercept=True, n_iter=5, n_jobs=2, random_state=0, learning_rate="optimal")
	clf.fit(X_train, Y_train)
	clf.coef_ = csr_matrix(clf.coef_)
	
	logging.info('retrieving address of the given points using geopy (requires internet access).')
	coordinate_address = retrieve_location_from_coordinates(label_coordinate.values())

	logging.info('dumping the the vectorizer, clf (trained model), label_coordinates and coordinate_locations into pickle files in ' + model_dir)
	dump_model(clf, vectorizer, coordinate_address, label_coordinate, model_dir)
开发者ID:afshinrahimi,项目名称:pigeo,代码行数:52,代码来源:pigeo.py

示例9: kmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def kmeans(embedding,n_components, mask):
    import numpy as np
    from sklearn.cluster import KMeans
    
    all_vertex=range(embedding.shape[0])
    masked_embedding = np.delete(embedding, mask, 0)
    cortex=np.delete(all_vertex, mask)
    
    est = KMeans(n_clusters=n_components, n_jobs=-2, init='k-means++', n_init=300)
    est.fit_transform(masked_embedding)
    labels = est.labels_
    kmeans_results = labels.astype(np.float)
    kmeans_recort = recort(len(all_vertex), kmeans_results, cortex, 1)
    return kmeans_recort
开发者ID:margulies,项目名称:myelinconnect,代码行数:16,代码来源:clustering.py

示例10: best_lda_cluster_wine

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
 def best_lda_cluster_wine(self):
     dh = data_helper()
     dh = data_helper()
     X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
     
     scl = RobustScaler()
     X_train_scl = scl.fit_transform(X_train)
     X_test_scl = scl.transform(X_test)
     
     ##
     ## K-Means
     ##
     km = KMeans(n_clusters=4, algorithm='full')
     X_train_transformed = km.fit_transform(X_train_scl)
     X_test_transformed = km.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
     
     ##
     ## GMM
     ##
     gmm = GaussianMixture(n_components=4, covariance_type='full')
     X_train_transformed = km.fit_transform(X_train_scl)
     X_test_transformed = km.transform(X_test_scl)
     
     # save
     filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
     pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
     pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
     pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
     
     filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
     pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
开发者ID:rbaxter1,项目名称:CS7641,代码行数:50,代码来源:part3.py

示例11: run

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def run(lines,vectorizerCls):

    print(TIMENOW(),'VECTORIZE','-'*42)      
    vectorizer=vectorizerCls(stop_words=['le','de','la','les','je','un','une','des','est','et','il','elle','du','ai','au',])
    data =vectorizer.fit_transform(lines)
    num_samples, num_features = data.shape
    print("#samples: %d, #features: %d" % (num_samples, num_features)) #samples: 5, #features: 25 #samples: 2, #features: 37
    print(TIMENOW(),'KMEANS','-'*42)      
    km   =KMeans(n_clusters=n_clusters)
    res  =km.fit_transform(data)
    labels = km.labels_
    labels_shape = km.labels_.shape
    print ("labels : ", labels)
    print ("labels_shape : ", labels_shape)

    print(TIMENOW(),'DONE','-'*42)  
        
    print("Top terms per cluster:")
    order_centroids = km.cluster_centers_.argsort()[:, ::-1]
    terms = vectorizer.get_feature_names()
    result = dict()
    for i in range(n_clusters):
        result[i]=list()
        print("Cluster %d:" % i, end='')
        for ind in order_centroids[i, :25]:
            print(' %s' % terms[ind], end='\n')
            result[i].append(terms[ind])
        print()    
    return result
开发者ID:imaspol,项目名称:Laura,代码行数:31,代码来源:MachineLearning.py

示例12: decompose_map

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def decompose_map(map1, method, r=40, out='inter'):
    map1.reset_solution()
    if method == 'EIG':
        map1.decompose('EIG', dim_num=r)
    elif method == 'PCA':
        map1.decompose('PCA', dim_num=r)
    elif method == 'ICE':
        map1.decompose('ICE', dim_num=r)
    elif method == 'K-means':
        from k_means_pdist import kmeanssample
        DIST = -np.array(map1.contact_map) ## simi to dist
        centres, xtoc, dist = kmeanssample(DIST, np.eye(DIST.shape[0]), r, nsample=0, delta=0.001, maxiter=20, verbose=0)
        map1.contact_group = -np.matrix(dist) ## dist to simi
    elif method == '3D-K-means':
        km = KMeans(n_clusters=r)
        dfile = 'pdb.txt'
        pb, vx = map1.get_locations(dfile, st=1, ch=0, po=1, nm=2, add=0)
        pb, vy = map1.get_locations(dfile, st=1, ch=0, po=1, nm=3, add=0)
        pb, vz = map1.get_locations(dfile, st=1, ch=0, po=1, nm=4, add=0)
        X = np.zeros((map1.contact_map.shape[0], 3))
        C = np.zeros(map1.contact_map.shape[0])
        for i,x,y,z in zip(pb,vx,vy,vz):
            X[i,0] = x
            X[i,1] = y
            X[i,2] = z
            C[i] += 1
        C[C==0] = 1
        X /= C[:,np.newaxis]
        map1.contact_group = -np.matrix(km.fit_transform(X))
    elif method == 'NMF':
        map1.decompose('NND', dim_num=r)
        map1.decompose('NMF-Gaussian', dim_num=r)
        map1.contact_group = np.dot(map1.contact_group, map1.group_map)
    elif method == 'BNMF':
        map1.decompose('NND', dim_num=r)
        map1.decompose('NMF-PoissonManifoldEqual', dim_num=r, par_lam=0)
        map1.contact_group = np.dot(map1.contact_group, map1.group_map)
    elif method == 'Random':
        n = map1.contact_map.shape[0]
        map1.contact_group = np.zeros((n,r))
        from math import ceil
        size = int(ceil(n/float(r)))
        for i in xrange(n):
            map1.contact_group[i, i/size] = 1
    elif method == 'Armatus':
        from run_armatus import Armatus
        map1.save()
        map2 = Armatus('../tools/armatus2.1/armatus', name=map1.name)
        map2.load()
        map2.decompose()
        map1.contact_group = map2.contact_group
    elif method == 'TAD':
        from run_domaincall import DomainCall
        map1.save()
        map2 = DomainCall('../tools/domaincall/', name=map1.name)
        map2.load()
        map2.decompose()
        map1.contact_group = map2.contact_group
    else:
        raise ValueError('Unknow method name '+method)
开发者ID:huxihao,项目名称:BNMF,代码行数:62,代码来源:compare_cluster.py

示例13: KinKmeans

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def KinKmeans(var, nk=False, tol=1e-4, n_init=100):
    '''
    Uses pseudo-F to estimate the best number of K in K-Means
    From MJCarvalho GapStatistics

    :param numpy var: Numpy array with input data
    :param int nk: Initial number of K
    :param float tol: Tolerance for K-Means
    :param int n_init: Number of initializations for K-Means

    :return int: Number of K and f statistic
    '''

    from sklearn.cluster import KMeans

    Nd = np.size(var, axis=0)
    S = np.zeros(Nd)
    f = np.zeros(Nd)
    alpha = np.zeros(Nd)

    if not nk:
        term = 3
    else:
        term = nk

    kink = [0]
    i = 0
    while len(kink) <= term:
        ## Kmeans
        kmeans = KMeans(init='k-means++', n_clusters=i+1,
                        n_init=n_init, tol=tol)

        T = kmeans.fit_transform(var, y=None)
        I = np.nansum(T**2, axis=0)
        S[i] = np.nansum(I, axis=0)
        ## Det. Alpha
        if i == 1:
            alpha[i] = 1.0 - (3.0/(4.0*Nd))
        elif i > 1:
            alpha[i] = alpha[i-1] + (1-alpha[i-1])/6.0
        ## Det. f(k)
        if i == 0:
            f[i] = 1
        else:
            f[i] = S[i] / (alpha[i] * S[i-1])

        if not nk:
            kink = np.arange(len(f))[
                np.r_[True, f[1:] < f[:-1]] &
                np.r_[f[:-1] <= f[1:], True] |
                np.r_[True, f[1:] <= f[:-1]] &
                np.r_[f[:-1] < f[1:], True]
            ]

        else:
            kink.append(0)
        i += 1

    return kink[1], f
开发者ID:jcmt,项目名称:climtools,代码行数:61,代码来源:climtools.py

示例14: clusterGoalies

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def clusterGoalies(df, idx, numOfClusters):
	model = KMeans(n_clusters=numOfClusters, n_init=20)
	distMat = model.fit_transform(df)
	resultList = [[] for i in range(numOfClusters)]
	for i, rowList in enumerate(distMat):
		minIndex = min(enumerate(rowList), key = lambda x: x[1])[0]
		resultList[minIndex].append(idx[i])
	return resultList
开发者ID:2dvodcast,项目名称:Data-Science-1,代码行数:10,代码来源:goaltending.py

示例15: make_cluster

# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def make_cluster(df):
    cluster_df = pd.DataFrame()
    clusters = KMeans(n_clusters=4)
    distance_matrix = clusters.fit_transform(cust_data_transform)
    cluster_df["cluster"] = clusters.labels_
    # Finding the euclidean distance from the point to its cluster center
    cluster_df["dist"] = [min(x) for x in distance_matrix]
    return cluster_df, clusters.cluster_centers_
开发者ID:dbluiett,项目名称:more_than_gut,代码行数:10,代码来源:clusters.py


注:本文中的sklearn.cluster.KMeans.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。