本文整理汇总了Python中sklearn.cluster.KMeans.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python KMeans.fit_transform方法的具体用法?Python KMeans.fit_transform怎么用?Python KMeans.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.KMeans
的用法示例。
在下文中一共展示了KMeans.fit_transform方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: mfcc_clustering
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def mfcc_clustering(file_name, n_clusters):
"""
From Prem
:return:
"""
clusterer = KMeans(n_clusters=n_clusters)
print(file_name)
mix, sr = librosa.load(file_name)
mix_stft = librosa.stft(mix)
comps, acts = find_template(mix_stft, sr, 100, 101, 0, mix_stft.shape[1])
cluster_comps = librosa.feature.mfcc(S=comps)[1:14]
save_mfcc_img(file_name[:-4] + "_mfcc.png", np.flipud(cluster_comps))
clusterer.fit_transform(cluster_comps.T)
labels = clusterer.labels_
# print(labels)
sources = []
for cluster_index in range(n_clusters):
indices = np.where(labels == cluster_index)[0]
template, residual = extract_template(comps[:, indices], mix_stft)
t = librosa.istft(template)
sources.append(t)
return np.array(sources)
示例2: get_domi_color_new_image
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def get_domi_color_new_image(image, n_clusters=2):
'''
INPUT:
image: numpy array
n_clusters: integer
OUTPUT:
domi_color: numpy array
'''
if len(image.shape) == 3:
image = transform.resize(image, (300,300,3))
else:
return -1
# Flatten the image matrix:
nrow, ncol, depth = image.shape
lst_of_pixels = [image[irow][icol] for irow in range(nrow) for icol in range(ncol)]
# Clustering the colors of each pixel:
kmean = KMeans(n_clusters=n_clusters)
kmean.fit_transform(lst_of_pixels)
domi_colors = kmean.cluster_centers_
# Get the dominant color of the furniture (darker than the background):
if np.mean(domi_colors[0]) < np.mean(domi_colors[1]):
domi_color = domi_colors[0]
else:
domi_color = domi_colors[1]
return domi_color
示例3: run_kmeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def run_kmeans(vector=None, links=[], iters=500, clusters=8):
km = KMeans(n_clusters=clusters, max_iters=iters)
km.fit_transform(vec)
clusters = defaultdict(list)
for i in xrange(len(links)):
clusters[km.labels[i]].append(links[i])
for x in clusters:
print x, clusters[x]
return km.labels_
示例4: get_kmean_clusters
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def get_kmean_clusters(self,X):
'''
Returns labels of kmeans clustering
INPUTS: X = feature matrix as 2d numpy float array
OUTPUTS: KMeans cluster labels as 1d numpy array of strings
'''
kmeans = KMeans(5)
kmeans.fit_transform(X)
return kmeans.labels_
示例5: wrapper_scikit
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def wrapper_scikit(K):
pics_t = np.empty((pics.shape[0],np.power(pics.shape[1],2)))
for i in range(pics_t.shape[0]):
pics_t[i] = pics[i].flatten()
time1 = time.time()
kmean = KMeans(init='random', n_clusters=K)
kmean.fit_transform(pics_t)
time2 = time.time()
return (time2-time1)*1000.
示例6: findElbow
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def findElbow(features, n = 10):
error = []
for i in xrange(n):
km = KMeans(n_clusters = i + 1)
km.fit_transform(features)
error.append(kmeansError(features, km))
plt.figure(figsize=(10,10))
plt.plot(range(1,n + 1),error,'k',linewidth=10)
plt.plot(range(1,n + 1),error,'ko',markersize=25)
plt.show()
示例7: get_kmean_model
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def get_kmean_model(X, true_k, n_init=10, verbose=False):
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100,
n_init=n_init, verbose=verbose)
km.fit_transform(X)
return km
示例8: train_model
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def train_model(texts, points, num_classses, model_dir, text_encoding='utf-8'):
""" Given an iterable of (text, lat, lon) items, cluster the points into #num_classes and use
them as labels, then extract unigram features, train a classifier and save it in models/model_name
for future use.
Args:
texts -- an iterable (e.g. a list) of texts e.g. ['this is the first text', 'this is the second text'].
points -- an iterable (e.g. a list) of tuples in the form of (lat, lon) where coordinates are of type float e.g. [(1.2343, -10.239834r),(5.634534, -12.47563)]
num_classes -- the number of desired clusters/labels/classes of the model.
model_name -- the name of the directory within models/ that the model will be saved.
"""
if os.path.exists(model_dir):
logging.error("Model directory " + model_dir + " already exists, please try another address.")
sys.exit(-1)
else:
os.mkdir(model_dir)
from sklearn.cluster import KMeans
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model.stochastic_gradient import SGDClassifier
kmeans = KMeans(n_clusters=num_classses, random_state=0)
points_arr = numpy.array(points)
kmeans.fit_transform(points_arr)
cluster_centers = kmeans.cluster_centers_
sample_clusters = kmeans.labels_
label_coordinate = {}
for i in range(cluster_centers.shape[0]):
lat, lon = cluster_centers[i, 0], cluster_centers[i, 1]
label_coordinate[i] = (lat, lon)
logging.info('extracting features from text...')
vectorizer = TfidfVectorizer(encoding=text_encoding, stop_words='english', ngram_range=(1,1), max_df=0.5, min_df=0, binary=True, norm='l2', use_idf=True, smooth_idf=True, sublinear_tf=True)
X_train = vectorizer.fit_transform(texts)
Y_train = sample_clusters
vectorizer.stop_words_ = None
logging.info('the number of samples is %d and the number of features is %d' % (X_train.shape[0], X_train.shape[1]))
logging.info('training the classifier...')
logging.warn('Note that alpha (regularisation strength) should be tuned based on the performance on validation data.')
clf = SGDClassifier(loss='log', penalty='elasticnet', alpha=5e-5, l1_ratio=0.9, fit_intercept=True, n_iter=5, n_jobs=2, random_state=0, learning_rate="optimal")
clf.fit(X_train, Y_train)
clf.coef_ = csr_matrix(clf.coef_)
logging.info('retrieving address of the given points using geopy (requires internet access).')
coordinate_address = retrieve_location_from_coordinates(label_coordinate.values())
logging.info('dumping the the vectorizer, clf (trained model), label_coordinates and coordinate_locations into pickle files in ' + model_dir)
dump_model(clf, vectorizer, coordinate_address, label_coordinate, model_dir)
示例9: kmeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def kmeans(embedding,n_components, mask):
import numpy as np
from sklearn.cluster import KMeans
all_vertex=range(embedding.shape[0])
masked_embedding = np.delete(embedding, mask, 0)
cortex=np.delete(all_vertex, mask)
est = KMeans(n_clusters=n_components, n_jobs=-2, init='k-means++', n_init=300)
est.fit_transform(masked_embedding)
labels = est.labels_
kmeans_results = labels.astype(np.float)
kmeans_recort = recort(len(all_vertex), kmeans_results, cortex, 1)
return kmeans_recort
示例10: best_lda_cluster_wine
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def best_lda_cluster_wine(self):
dh = data_helper()
dh = data_helper()
X_train, X_test, y_train, y_test = dh.get_wine_data_lda_best()
scl = RobustScaler()
X_train_scl = scl.fit_transform(X_train)
X_test_scl = scl.transform(X_test)
##
## K-Means
##
km = KMeans(n_clusters=4, algorithm='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_kmeans_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_kmeans_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
##
## GMM
##
gmm = GaussianMixture(n_components=4, covariance_type='full')
X_train_transformed = km.fit_transform(X_train_scl)
X_test_transformed = km.transform(X_test_scl)
# save
filename = './' + self.save_dir + '/wine_gmm_lda_x_train.txt'
pd.DataFrame(X_train_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_x_test.txt'
pd.DataFrame(X_test_transformed).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_train.txt'
pd.DataFrame(y_train).to_csv(filename, header=False, index=False)
filename = './' + self.save_dir + '/wine_gmm_lda_y_test.txt'
pd.DataFrame(y_test).to_csv(filename, header=False, index=False)
示例11: run
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def run(lines,vectorizerCls):
print(TIMENOW(),'VECTORIZE','-'*42)
vectorizer=vectorizerCls(stop_words=['le','de','la','les','je','un','une','des','est','et','il','elle','du','ai','au',])
data =vectorizer.fit_transform(lines)
num_samples, num_features = data.shape
print("#samples: %d, #features: %d" % (num_samples, num_features)) #samples: 5, #features: 25 #samples: 2, #features: 37
print(TIMENOW(),'KMEANS','-'*42)
km =KMeans(n_clusters=n_clusters)
res =km.fit_transform(data)
labels = km.labels_
labels_shape = km.labels_.shape
print ("labels : ", labels)
print ("labels_shape : ", labels_shape)
print(TIMENOW(),'DONE','-'*42)
print("Top terms per cluster:")
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
result = dict()
for i in range(n_clusters):
result[i]=list()
print("Cluster %d:" % i, end='')
for ind in order_centroids[i, :25]:
print(' %s' % terms[ind], end='\n')
result[i].append(terms[ind])
print()
return result
示例12: decompose_map
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def decompose_map(map1, method, r=40, out='inter'):
map1.reset_solution()
if method == 'EIG':
map1.decompose('EIG', dim_num=r)
elif method == 'PCA':
map1.decompose('PCA', dim_num=r)
elif method == 'ICE':
map1.decompose('ICE', dim_num=r)
elif method == 'K-means':
from k_means_pdist import kmeanssample
DIST = -np.array(map1.contact_map) ## simi to dist
centres, xtoc, dist = kmeanssample(DIST, np.eye(DIST.shape[0]), r, nsample=0, delta=0.001, maxiter=20, verbose=0)
map1.contact_group = -np.matrix(dist) ## dist to simi
elif method == '3D-K-means':
km = KMeans(n_clusters=r)
dfile = 'pdb.txt'
pb, vx = map1.get_locations(dfile, st=1, ch=0, po=1, nm=2, add=0)
pb, vy = map1.get_locations(dfile, st=1, ch=0, po=1, nm=3, add=0)
pb, vz = map1.get_locations(dfile, st=1, ch=0, po=1, nm=4, add=0)
X = np.zeros((map1.contact_map.shape[0], 3))
C = np.zeros(map1.contact_map.shape[0])
for i,x,y,z in zip(pb,vx,vy,vz):
X[i,0] = x
X[i,1] = y
X[i,2] = z
C[i] += 1
C[C==0] = 1
X /= C[:,np.newaxis]
map1.contact_group = -np.matrix(km.fit_transform(X))
elif method == 'NMF':
map1.decompose('NND', dim_num=r)
map1.decompose('NMF-Gaussian', dim_num=r)
map1.contact_group = np.dot(map1.contact_group, map1.group_map)
elif method == 'BNMF':
map1.decompose('NND', dim_num=r)
map1.decompose('NMF-PoissonManifoldEqual', dim_num=r, par_lam=0)
map1.contact_group = np.dot(map1.contact_group, map1.group_map)
elif method == 'Random':
n = map1.contact_map.shape[0]
map1.contact_group = np.zeros((n,r))
from math import ceil
size = int(ceil(n/float(r)))
for i in xrange(n):
map1.contact_group[i, i/size] = 1
elif method == 'Armatus':
from run_armatus import Armatus
map1.save()
map2 = Armatus('../tools/armatus2.1/armatus', name=map1.name)
map2.load()
map2.decompose()
map1.contact_group = map2.contact_group
elif method == 'TAD':
from run_domaincall import DomainCall
map1.save()
map2 = DomainCall('../tools/domaincall/', name=map1.name)
map2.load()
map2.decompose()
map1.contact_group = map2.contact_group
else:
raise ValueError('Unknow method name '+method)
示例13: KinKmeans
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def KinKmeans(var, nk=False, tol=1e-4, n_init=100):
'''
Uses pseudo-F to estimate the best number of K in K-Means
From MJCarvalho GapStatistics
:param numpy var: Numpy array with input data
:param int nk: Initial number of K
:param float tol: Tolerance for K-Means
:param int n_init: Number of initializations for K-Means
:return int: Number of K and f statistic
'''
from sklearn.cluster import KMeans
Nd = np.size(var, axis=0)
S = np.zeros(Nd)
f = np.zeros(Nd)
alpha = np.zeros(Nd)
if not nk:
term = 3
else:
term = nk
kink = [0]
i = 0
while len(kink) <= term:
## Kmeans
kmeans = KMeans(init='k-means++', n_clusters=i+1,
n_init=n_init, tol=tol)
T = kmeans.fit_transform(var, y=None)
I = np.nansum(T**2, axis=0)
S[i] = np.nansum(I, axis=0)
## Det. Alpha
if i == 1:
alpha[i] = 1.0 - (3.0/(4.0*Nd))
elif i > 1:
alpha[i] = alpha[i-1] + (1-alpha[i-1])/6.0
## Det. f(k)
if i == 0:
f[i] = 1
else:
f[i] = S[i] / (alpha[i] * S[i-1])
if not nk:
kink = np.arange(len(f))[
np.r_[True, f[1:] < f[:-1]] &
np.r_[f[:-1] <= f[1:], True] |
np.r_[True, f[1:] <= f[:-1]] &
np.r_[f[:-1] < f[1:], True]
]
else:
kink.append(0)
i += 1
return kink[1], f
示例14: clusterGoalies
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def clusterGoalies(df, idx, numOfClusters):
model = KMeans(n_clusters=numOfClusters, n_init=20)
distMat = model.fit_transform(df)
resultList = [[] for i in range(numOfClusters)]
for i, rowList in enumerate(distMat):
minIndex = min(enumerate(rowList), key = lambda x: x[1])[0]
resultList[minIndex].append(idx[i])
return resultList
示例15: make_cluster
# 需要导入模块: from sklearn.cluster import KMeans [as 别名]
# 或者: from sklearn.cluster.KMeans import fit_transform [as 别名]
def make_cluster(df):
cluster_df = pd.DataFrame()
clusters = KMeans(n_clusters=4)
distance_matrix = clusters.fit_transform(cust_data_transform)
cluster_df["cluster"] = clusters.labels_
# Finding the euclidean distance from the point to its cluster center
cluster_df["dist"] = [min(x) for x in distance_matrix]
return cluster_df, clusters.cluster_centers_