本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit_transform方法的具体用法?Python MiniBatchKMeans.fit_transform怎么用?Python MiniBatchKMeans.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.MiniBatchKMeans
的用法示例。
在下文中一共展示了MiniBatchKMeans.fit_transform方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: processAttributes_surf
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def processAttributes_surf(filePattern):
targets_data = []
surf_features = []
counter = 0
for f in glob.glob(filePattern):
counter+=1
print 'Reading image: ', counter, f
target = 1 if 'cat' in f else 0
targets_data.append(target)
image = mh.imread(f, as_grey=True)
surf_features.append(surf.surf(image)[:, 5:])
X_train_surf_features = np.concatenate(surf_features)
# Clusters
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
x_data = []
for instance in surf_features:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters-len(features))))
x_data.append(features)
return x_data, targets_data
示例2: clusterSurfFeatures
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(surf_all_hist, n_clusters):
#
all_hists = []
for imagename in surf_all_hist:
all_hists.append(surf_all_hist[imagename])
#
X_train_surf_features = np.concatenate(all_hists)
#
print 'Clustering', len(X_train_surf_features), 'features (k=' + str(n_clusters) + ')'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
#
final_features = {}
for imagename in surf_all_hist:
instance = surf_all_hist[imagename]
#
clusters = estimator.predict(instance)
features = np.bincount(clusters)
#
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters-len(features))))
#print features
#
final_features[imagename] = features
return final_features
示例3: catsAnddogs
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def catsAnddogs():
import numpy as np
import mahotas as mh
from mahotas.features import surf
from sklearn.linear_model import LogisticRegression
import glob
from sklearn.cluster import MiniBatchKMeans
all_instance_filenames = []
all_instance_targets = []
for f in glob.glob('./data/train/*.jpg'):
target = 1 if 'cat' in f else 0
all_instance_filenames.append(f)
all_instance_targets.append(target)
surf_features = []
counter = 0
for f in all_instance_filenames:
print 'reading image:',f
image = mh.imread(f,as_grey=True)
surf_features.append(surf.surf(image)[:,5:])
train_len = int(len(all_instance_filenames)*.6)
X_train_surf_features = np.concatenate(surf_features[:train_len])
X_test_surf_features = np.concatenate(surf_features[train_len:])
y_train = all_instance_targets[:train_len]
y_test = all_instance_targets[train_len:]
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
X_train = []
for instance in surf_features[:train_len]:
clusters = estimator.predict(instance)
features = np.bincount(clustes)
if len(features) < n_clusters:
features = np.append(features,np.zeros((1,n_clusters-len(features))))
X_train.append(features)
X_test = []
for instance in surf_features[train_len:]:
clusters = estimator.predict(instance)
features = np.bincount(clustes)
if len(features) < n_clusters:
features = np.append(features,np.zeros((1,n_clusters-len(features))))
X_test.append(features)
clf = LogisticRegression(C=0.001,penalty='l2')
clf.fit_transform(X_train,y_train)
predictions = clf.predict(X_test)
print classification_report(y_test,predictions)
print 'precision:', precision_score(y_test,predictions)
print 'recall:', recall_score(y_test,predictions)
print 'accuracy:', accuracy_score(y_test,predictions)
示例4: correct_y
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def correct_y(X,Y):
# Correct wrongly assigned ZIP codes
print "Correcting wrong ZIP codes..."
[N, Nfeats]=X.shape
NZIP=857
# use K-means clustering to make it faster
cluster=MiniBatchKMeans(NZIP,init_size=2000,max_iter=500)
cluster_distance = cluster.fit_transform(X)
cluster_values = cluster.predict(X)
clstr=np.zeros((N,2))
min_dist=1000*np.ones(NZIP)
Y_min=np.zeros(NZIP)
# clstr contains for each line cluster and cluster distance to center
for i in xrange(N):
idx = int(cluster_values[i])
clstr[i][0]=idx
clstr[i][1]=cluster_distance[i][idx]
if (clstr[i][1]<min_dist[idx]) :
min_dist[idx]=clstr[i][1]
Y_min[idx]=Y[i]
counter=0
for i in xrange(N):
idx = int(clstr[i][0])
if ((clstr[i][1]<1.5) & (int(Y[i]/1000)==int(Y_min[idx]/1000))) :
Y[i]= Y_min[idx]
counter+=1
print "%s ZIP codes corrected.", counter
return(Y)
示例5: clusterSurfFeatures
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(X_train_surf_features, n_clusters, all_instance_filenames):
print "Clustering", len(X_train_surf_features), "features (k=" + str(n_clusters) + ")"
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
#
x_data = []
instance_no = 0
saved_features = {}
for instance in surf_features:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
imagename = all_instance_filenames[instance_no]
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters - len(features))))
# print features
#
imagename = os.path.basename(imagename)
saved_features[imagename] = ";".join(str(x) for x in features)
instance_no += 1
return saved_features
示例6: clusterSurfFeatures
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(definitive_surf_features, n_clusters, all_instance_filenames):
X_train_surf_features = np.concatenate(definitive_surf_features)
print 'Clustering', len(X_train_surf_features), 'features (k=' + str(n_clusters) + ')'
#
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
#
x_data = []
instance_no = 0
saved_features = {}
for instance in definitive_surf_features:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
imagename = all_instance_filenames[instance_no]
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters-len(features))))
#print features
#
imagename = os.path.basename(imagename)
saved_features[imagename] = ';'.join(str(x) for x in features)
instance_no += 1
return saved_features
示例7: do_kmeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def do_kmeans(X, X_wrds):
k_means = MiniBatchKMeans(n_clusters=K, n_init = 2, batch_size=10000, init = 'random', max_iter=10000, verbose=1)
print 'starting kmeans'
Y = k_means.fit_transform(X)
print 'done K-means'
fw = codecs.open(kmeans_out_file, 'w', encoding='utf-8')
cl = {}
i = 0
for i in range(len(X)):
cl_id = int(k_means.predict(X[i]))
if cl_id not in cl:
cl[cl_id] = []
cl[cl_id].append(X_wrds[i])
if i % 10000 == 0:
print 'done-', i
for cl_id in cl:
line = ','.join(cl[cl_id])
fw.write(line)
fw.write('\n')
fw.close()
示例8: analyze
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def analyze(path, max_k, h5_file):
category = os.path.basename(h5_file.replace('.h5', ''))
print('Processing category {}'.format(category))
data = pd.read_hdf(h5_file, 'data')
x = range(1,max_k)
y = []
for k in x:
kmeans = MiniBatchKMeans(n_clusters=k)
try:
distances = kmeans.fit_transform(np.vstack(data.state))
# transform() returns euclidean distance. The cost function of kmeans is the sum of all
# squared distances.
y.append(np.sum(np.min(distances, axis=1)**2))
except:
print('Category {} has only {} samples, skipping rest of kmeans.'.format(category, len(data)))
break
plt.clf()
plt.plot(x[:len(y)],y)
plt.title(category)
plt.savefig('{}/kmeans_distances_from_centroids_{}.png'.format(path, category), dpi=300)
示例9: int
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
int(explained_variance * 100)))
print()
dimReFileName = './%s/%s_%sData_dimRe.txt' % (person,person,dataType)
json.dump(X.tolist(),open(dimReFileName,'w'))
quit()
##################################################
# Do the actual clustering
if opts.minibatch:
km = MiniBatchKMeans(n_clusters=true_k, init='k-means++', n_init=1,
init_size=1000, batch_size=1000, verbose=opts.verbose)
else:
km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1,
verbose=opts.verbose)
print("Clustering sparse data with %s" % km)
t0 = time()
km.fit_transform(X)
if not opts.use_hashing:
print("Top terms per cluster:")
if opts.n_components:
original_space_centroids = svd.inverse_transform(km.cluster_centers_)
order_centroids = original_space_centroids.argsort()[:, ::-1]
else:
order_centroids = km.cluster_centers_.argsort()[:, ::-1]
#print str(order_centroids[, :10])
global terms
for i in range(true_k):
示例10: str
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
import matplotlib.pyplot as plt
import matplotlib.colors as colors
from sklearn.cluster import MiniBatchKMeans
from numpy import *
from itertools import cycle
dataFn1 = str(sys.argv[1])
data = loadtxt(dataFn1)
noOfClusters1 = int(sys.argv[2])
labelFn1 = str(sys.argv[3])
mbk = MiniBatchKMeans(init='k-means++', n_clusters=noOfClusters1, batch_size=1000,n_init=10,max_no_improvement=10, verbose=0, random_state=0)
mbk.fit_transform(data)
#print mbk.labels_
#print type(mbk.labels_)
f = open(labelFn1, 'w')
for item in mbk.labels_:
f.write('%s\n' % item)
#mbk_means_labels_unique = unique(mbk.labels_)
#fig = plt.figure(figsize=(12, 4))
#fig.subplots_adjust(left=0.04, right=0.98, bottom=0.1, top=0.9)
#ax = fig.add_subplot(1,1,1)
# Use all colors that matplotlib provides by default.
示例11: TruncatedSVD
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
if args.lsa:
lsa = TruncatedSVD(args.lsa)
data = lsa.fit_transform(data)
data = Normalizer(copy=False).fit_transform(data)
print("Finished building matrix ({}x{}, {} elements). Time taken: {}".format(data.shape[0], data.shape[1], data.nnz, time.time() - t0))
t0 = time.time()
clustering_complete = False
n_clusters = args.n_clusters
while not clustering_complete:
km = MiniBatchKMeans(n_clusters=n_clusters)
res = km.fit_transform(data)
clustering_complete = True
if args.max_dist:
for i in xrange(res.shape[0]):
m = res.item(i, 0)
for j in xrange(1, res.shape[1]):
if res.item(i, j) < m:
m = res.item(i, j)
if m > args.max_dist:
clustering_complete = False
n_clusters += 1
print('Distance too big ({}). Increasing cluster number to {}'.format(m, n_clusters))
break
print("Clustering complete. Time taken: {}".format(time.time() - t0))
示例12: int
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
a = np.asarray(surf_features)
a.tofile('surf_features.csv', sep=',', format='%10.5f')
#
train_len = int(len(all_instance_filenames) * .70)
X_train_surf_features = np.concatenate(surf_features[:train_len])
X_test_surf_feautres = np.concatenate(surf_features[train_len:])
y_train = all_instance_targets[:train_len]
y_test = all_instance_targets[train_len:]
#
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)
#
X_train = []
for instance in surf_features[:train_len]:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
if len(features) < n_clusters:
features = np.append(features, np.zeros((1, n_clusters-len(features))))
X_train.append(features)
X_test = []
for instance in surf_features[train_len:]:
clusters = estimator.predict(instance)
features = np.bincount(clusters)
示例13: MiniBatchKMeans
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
#min_max_scaler = preprocessing.MinMaxScaler()
#patched = min_max_scaler.fit_transform(patched)
###############################################################################
# Constants and Set Values
###############################################################################
X_train = patched
X_test = patched2
###############################################################################
# Main Functions
###############################################################################
estimator = MiniBatchKMeans(n_clusters = 5)
estimator.fit_transform(X_train)
y_train = estimator.labels_
print len(y_train)
clf = LogisticRegression(C=0.001, penalty ='l2')
clf.fit_transform(X_train,y_train)
predictions = clf.predict(X_test)
print len(predictions)
print classification_report(y_train,predictions)
print 'Precision: ', precision_score(y_train,predictions)
print 'Recall: ', recall_score(y_train, predicitons)
print 'Accuracy: ', accuracy_score(y_train,predictions)
示例14: range
# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
plt.clf()
plt.plot(x[:len(y)],y)
plt.title(category)
plt.savefig('{}/kmeans_distances_from_centroids_{}.png'.format(path, category), dpi=300)
files = glob.glob('{}/*.h5'.format(args.data_folder))
if args.all:
data = []
for h5_file in files:
_data = pd.read_hdf(h5_file, 'data')#.state[:args.n]
data.extend(_data.state)
y = []
x = range(1, args.k)
for k in x:
kmeans = MiniBatchKMeans(n_clusters=k)
distances = kmeans.fit_transform(np.vstack(data))
y.append(np.mean(np.min(distances, axis=1)))
plt.plot(x,y)
plt.savefig('{}/kmeans_distances_from_centroids_global.png'.format(args.png_folder), dpi=300)
else:
par_analyze = partial(analyze, args.png_folder, args.k)
pool = mp.Pool()
pool.map(par_analyze, files)