当前位置: 首页>>代码示例>>Python>>正文


Python MiniBatchKMeans.fit_transform方法代码示例

本文整理汇总了Python中sklearn.cluster.MiniBatchKMeans.fit_transform方法的典型用法代码示例。如果您正苦于以下问题:Python MiniBatchKMeans.fit_transform方法的具体用法?Python MiniBatchKMeans.fit_transform怎么用?Python MiniBatchKMeans.fit_transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在sklearn.cluster.MiniBatchKMeans的用法示例。


在下文中一共展示了MiniBatchKMeans.fit_transform方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: processAttributes_surf

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def processAttributes_surf(filePattern):
	targets_data = []
	surf_features = []
	counter = 0
	for f in glob.glob(filePattern):
		counter+=1
		print 'Reading image: ', counter, f

		target = 1 if 'cat' in f else 0
		targets_data.append(target)
		
		image = mh.imread(f, as_grey=True)
		surf_features.append(surf.surf(image)[:, 5:])

	X_train_surf_features = np.concatenate(surf_features)
	
	# Clusters
	n_clusters = 300
	print 'Clustering', len(X_train_surf_features), 'features'
	estimator = MiniBatchKMeans(n_clusters=n_clusters)
	estimator.fit_transform(X_train_surf_features)

	x_data = []
	for instance in surf_features:
		clusters = estimator.predict(instance)
		features = np.bincount(clusters)
		if len(features) < n_clusters:
			features = np.append(features, np.zeros((1, n_clusters-len(features))))

		x_data.append(features)

	return x_data, targets_data
开发者ID:mbonaventura,项目名称:aa2015,代码行数:34,代码来源:attribute_extraction.py

示例2: clusterSurfFeatures

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(surf_all_hist, n_clusters):
	#
	all_hists = []
	for imagename in surf_all_hist:		
		all_hists.append(surf_all_hist[imagename])
	#
	X_train_surf_features = np.concatenate(all_hists)
	#		
	print 'Clustering', len(X_train_surf_features), 'features (k=' + str(n_clusters) + ')'
	estimator = MiniBatchKMeans(n_clusters=n_clusters)
	estimator.fit_transform(X_train_surf_features)
	#	
	final_features = {}
	for imagename in surf_all_hist:
		instance = surf_all_hist[imagename]
		#
		clusters = estimator.predict(instance)
		features = np.bincount(clusters)
		#
		if len(features) < n_clusters:
			features = np.append(features, np.zeros((1, n_clusters-len(features))))
		#print features
		#		
		final_features[imagename] = features		
	return final_features
开发者ID:mbonaventura,项目名称:aa2015,代码行数:27,代码来源:cats-and-dogs-finder.py

示例3: catsAnddogs

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def catsAnddogs():
  import numpy as np 
  import mahotas as mh
  from mahotas.features import surf 
  from sklearn.linear_model import LogisticRegression
  import glob
  from sklearn.cluster import MiniBatchKMeans


  all_instance_filenames = []
  all_instance_targets = []

  for f in glob.glob('./data/train/*.jpg'):
    target = 1 if 'cat' in f else 0
    all_instance_filenames.append(f)
    all_instance_targets.append(target)

  surf_features = []
  counter = 0
  for f in all_instance_filenames:
    print 'reading image:',f
    image = mh.imread(f,as_grey=True)
    surf_features.append(surf.surf(image)[:,5:])

  train_len = int(len(all_instance_filenames)*.6)
  X_train_surf_features = np.concatenate(surf_features[:train_len])
  X_test_surf_features = np.concatenate(surf_features[train_len:])
  y_train = all_instance_targets[:train_len]
  y_test = all_instance_targets[train_len:]

  n_clusters = 300
  print 'Clustering', len(X_train_surf_features), 'features'
  estimator = MiniBatchKMeans(n_clusters=n_clusters)
  estimator.fit_transform(X_train_surf_features)

  X_train = []
  for instance in surf_features[:train_len]:
    clusters = estimator.predict(instance)
    features = np.bincount(clustes)
    if len(features) < n_clusters:
      features = np.append(features,np.zeros((1,n_clusters-len(features))))
    X_train.append(features)

  X_test = []
  for instance in surf_features[train_len:]:
    clusters = estimator.predict(instance)
    features = np.bincount(clustes)
    if len(features) < n_clusters:
      features = np.append(features,np.zeros((1,n_clusters-len(features))))
    X_test.append(features)    

  clf = LogisticRegression(C=0.001,penalty='l2')
  clf.fit_transform(X_train,y_train)
  predictions = clf.predict(X_test)
  print classification_report(y_test,predictions)
  print 'precision:', precision_score(y_test,predictions)
  print 'recall:', recall_score(y_test,predictions)
  print 'accuracy:', accuracy_score(y_test,predictions)
开发者ID:marcinwal,项目名称:ThoughtfulMachineLearning,代码行数:60,代码来源:clusteringKmeans.py

示例4: correct_y

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def correct_y(X,Y):
	# Correct wrongly assigned ZIP codes
	print "Correcting wrong ZIP codes..."
	[N, Nfeats]=X.shape
	NZIP=857
	# use K-means clustering to make it faster
	cluster=MiniBatchKMeans(NZIP,init_size=2000,max_iter=500)
	cluster_distance = cluster.fit_transform(X)
	cluster_values = cluster.predict(X)
	clstr=np.zeros((N,2))
	min_dist=1000*np.ones(NZIP)
	Y_min=np.zeros(NZIP)
	# clstr contains for each line cluster and cluster distance to center
	for i in xrange(N):
		idx = int(cluster_values[i])	
		clstr[i][0]=idx
		clstr[i][1]=cluster_distance[i][idx]
		if (clstr[i][1]<min_dist[idx]) :
			min_dist[idx]=clstr[i][1]
			Y_min[idx]=Y[i]
	counter=0
	for i in xrange(N):
		idx = int(clstr[i][0])
		if ((clstr[i][1]<1.5) & (int(Y[i]/1000)==int(Y_min[idx]/1000))) :	
			Y[i]= Y_min[idx]
			counter+=1
	print "%s ZIP codes corrected.", counter
	return(Y)
开发者ID:swook,项目名称:KungFuLearning,代码行数:30,代码来源:train.py

示例5: clusterSurfFeatures

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(X_train_surf_features, n_clusters, all_instance_filenames):
    print "Clustering", len(X_train_surf_features), "features (k=" + str(n_clusters) + ")"
    estimator = MiniBatchKMeans(n_clusters=n_clusters)
    estimator.fit_transform(X_train_surf_features)
    #
    x_data = []
    instance_no = 0
    saved_features = {}
    for instance in surf_features:
        clusters = estimator.predict(instance)
        features = np.bincount(clusters)
        imagename = all_instance_filenames[instance_no]
        if len(features) < n_clusters:
            features = np.append(features, np.zeros((1, n_clusters - len(features))))
            # print features
            #
        imagename = os.path.basename(imagename)
        saved_features[imagename] = ";".join(str(x) for x in features)
        instance_no += 1
    return saved_features
开发者ID:mbonaventura,项目名称:aa2015,代码行数:22,代码来源:cats-and-dogs-surf-train2json.py

示例6: clusterSurfFeatures

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def clusterSurfFeatures(definitive_surf_features, n_clusters, all_instance_filenames):
	X_train_surf_features = np.concatenate(definitive_surf_features)
	print 'Clustering', len(X_train_surf_features), 'features (k=' + str(n_clusters) + ')'
	#
	estimator = MiniBatchKMeans(n_clusters=n_clusters)
	estimator.fit_transform(X_train_surf_features)
	#
	x_data = []
	instance_no = 0
	saved_features = {}
	for instance in definitive_surf_features:
		clusters = estimator.predict(instance)
		features = np.bincount(clusters)		
		imagename = all_instance_filenames[instance_no]
		if len(features) < n_clusters:
			features = np.append(features, np.zeros((1, n_clusters-len(features))))
		#print features
		#
		imagename = os.path.basename(imagename)
		saved_features[imagename] = ';'.join(str(x) for x in features)
		instance_no += 1
	return saved_features
开发者ID:mbonaventura,项目名称:aa2015,代码行数:24,代码来源:cats-and-dogs-surf-cluster.py

示例7: do_kmeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def do_kmeans(X, X_wrds):
	k_means = MiniBatchKMeans(n_clusters=K, n_init = 2, batch_size=10000, init = 'random',  max_iter=10000, verbose=1)
	print 'starting kmeans'
	Y = k_means.fit_transform(X)
        print 'done K-means'

	fw = codecs.open(kmeans_out_file, 'w', encoding='utf-8')
	cl = {}
	i  = 0
	for i in range(len(X)):
	     cl_id = int(k_means.predict(X[i]))
	     if cl_id not in cl:
		     cl[cl_id] = []
             cl[cl_id].append(X_wrds[i])
	     if i % 10000 == 0:
	         print 'done-', i
	for cl_id in cl:
		line = ','.join(cl[cl_id])
		fw.write(line)
	        fw.write('\n')
	fw.close()
开发者ID:iesl,项目名称:fuse_ttl,代码行数:23,代码来源:kmeans.py

示例8: analyze

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
def analyze(path, max_k, h5_file):
    category = os.path.basename(h5_file.replace('.h5', ''))
    print('Processing category {}'.format(category))
    data = pd.read_hdf(h5_file, 'data')
    
    x = range(1,max_k)
    y = []
    for k in x:
        kmeans = MiniBatchKMeans(n_clusters=k)
        try:
            distances = kmeans.fit_transform(np.vstack(data.state))
            # transform() returns euclidean distance. The cost function of kmeans is the sum of all
            # squared distances.
            y.append(np.sum(np.min(distances, axis=1)**2)) 
        except:
            print('Category {} has only {} samples, skipping rest of kmeans.'.format(category, len(data)))
            break

    plt.clf()
    plt.plot(x[:len(y)],y)
    plt.title(category)
    plt.savefig('{}/kmeans_distances_from_centroids_{}.png'.format(path, category), dpi=300)
开发者ID:axeltidemann,项目名称:propeller,代码行数:24,代码来源:states_kmeans.py

示例9: int

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
        int(explained_variance * 100)))

    print()
dimReFileName = './%s/%s_%sData_dimRe.txt' % (person,person,dataType)
json.dump(X.tolist(),open(dimReFileName,'w'))
quit()
##################################################
# Do the actual clustering

if opts.minibatch:
    km = MiniBatchKMeans(n_clusters=true_k, init='k-means++', n_init=1,
                         init_size=1000, batch_size=1000, verbose=opts.verbose)
else:
    km = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1,
                verbose=opts.verbose)

print("Clustering sparse data with %s" % km)
t0 = time()
km.fit_transform(X)

if not opts.use_hashing:
    print("Top terms per cluster:")

    if opts.n_components:
        original_space_centroids = svd.inverse_transform(km.cluster_centers_)
        order_centroids = original_space_centroids.argsort()[:, ::-1]
    else:
        order_centroids = km.cluster_centers_.argsort()[:, ::-1]
    #print str(order_centroids[, :10])
    global terms
    for i in range(true_k):
开发者ID:b01502102,项目名称:Data-Science-Final-Project,代码行数:33,代码来源:debug.py

示例10: str

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
import matplotlib.pyplot as plt
import matplotlib.colors as colors

from sklearn.cluster import MiniBatchKMeans
from numpy import *
from itertools import cycle

dataFn1 = str(sys.argv[1])
data = loadtxt(dataFn1)

noOfClusters1 = int(sys.argv[2])
labelFn1 = str(sys.argv[3])

mbk = MiniBatchKMeans(init='k-means++', n_clusters=noOfClusters1, batch_size=1000,n_init=10,max_no_improvement=10, verbose=0, random_state=0)

mbk.fit_transform(data) 
#print mbk.labels_
#print type(mbk.labels_)

f = open(labelFn1, 'w')
for item in mbk.labels_:
	f.write('%s\n' % item)


#mbk_means_labels_unique = unique(mbk.labels_)

#fig = plt.figure(figsize=(12, 4))
#fig.subplots_adjust(left=0.04, right=0.98, bottom=0.1, top=0.9)
#ax = fig.add_subplot(1,1,1)

# Use all colors that matplotlib provides by default.
开发者ID:abhisheknkar,项目名称:IISc_MIComputations,代码行数:33,代码来源:Cluster.py

示例11: TruncatedSVD

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
if args.lsa:
    lsa = TruncatedSVD(args.lsa)
    data = lsa.fit_transform(data)
    data = Normalizer(copy=False).fit_transform(data)

print("Finished building matrix ({}x{}, {} elements). Time taken: {}".format(data.shape[0], data.shape[1], data.nnz, time.time() - t0))
t0 = time.time()

clustering_complete = False

n_clusters = args.n_clusters

while not clustering_complete:

    km = MiniBatchKMeans(n_clusters=n_clusters)
    res = km.fit_transform(data)

    clustering_complete = True

    if args.max_dist:
        for i in xrange(res.shape[0]):
            m = res.item(i, 0)
            for j in xrange(1, res.shape[1]):
                if res.item(i, j) < m:
                    m = res.item(i, j)
            if m > args.max_dist:
                clustering_complete = False
                n_clusters += 1
                print('Distance too big ({}). Increasing cluster number to {}'.format(m, n_clusters))
                break
print("Clustering complete. Time taken: {}".format(time.time() - t0))
开发者ID:usakey,项目名称:Any,代码行数:33,代码来源:clusterer.py

示例12: int

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
a = np.asarray(surf_features)
a.tofile('surf_features.csv', sep=',', format='%10.5f')

#

train_len = int(len(all_instance_filenames) * .70)
X_train_surf_features = np.concatenate(surf_features[:train_len])
X_test_surf_feautres = np.concatenate(surf_features[train_len:])
y_train = all_instance_targets[:train_len]
y_test = all_instance_targets[train_len:]

#
n_clusters = 300
print 'Clustering', len(X_train_surf_features), 'features'
estimator = MiniBatchKMeans(n_clusters=n_clusters)
estimator.fit_transform(X_train_surf_features)

#
X_train = []
for instance in surf_features[:train_len]:
	clusters = estimator.predict(instance)
	features = np.bincount(clusters)
	if len(features) < n_clusters:
		features = np.append(features, np.zeros((1, n_clusters-len(features))))

	X_train.append(features)

X_test = []
for instance in surf_features[train_len:]:
	clusters = estimator.predict(instance)
	features = np.bincount(clusters)
开发者ID:mbonaventura,项目名称:aa2015,代码行数:33,代码来源:cats-and-dogs.py

示例13: MiniBatchKMeans

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
#min_max_scaler = preprocessing.MinMaxScaler()
#patched = min_max_scaler.fit_transform(patched)

###############################################################################
# Constants and Set Values
###############################################################################

X_train = patched
X_test = patched2

###############################################################################
# Main Functions
###############################################################################

estimator = MiniBatchKMeans(n_clusters = 5)
estimator.fit_transform(X_train)
y_train = estimator.labels_

print len(y_train)

clf = LogisticRegression(C=0.001, penalty ='l2')
clf.fit_transform(X_train,y_train)
predictions = clf.predict(X_test)

print len(predictions)

print classification_report(y_train,predictions)
print 'Precision: ', precision_score(y_train,predictions)
print 'Recall: ', recall_score(y_train, predicitons)
print 'Accuracy: ', accuracy_score(y_train,predictions)
开发者ID:jra32,项目名称:skidmarks,代码行数:32,代码来源:semisupcluster.py

示例14: range

# 需要导入模块: from sklearn.cluster import MiniBatchKMeans [as 别名]
# 或者: from sklearn.cluster.MiniBatchKMeans import fit_transform [as 别名]
    plt.clf()
    plt.plot(x[:len(y)],y)
    plt.title(category)
    plt.savefig('{}/kmeans_distances_from_centroids_{}.png'.format(path, category), dpi=300)

files = glob.glob('{}/*.h5'.format(args.data_folder))

if args.all:
    data = []
    for h5_file in files:
        _data = pd.read_hdf(h5_file, 'data')#.state[:args.n]
        data.extend(_data.state)

    y = []
    x = range(1, args.k)
    for k in x:
        kmeans = MiniBatchKMeans(n_clusters=k)
        distances = kmeans.fit_transform(np.vstack(data))
        y.append(np.mean(np.min(distances, axis=1)))

    plt.plot(x,y)
    plt.savefig('{}/kmeans_distances_from_centroids_global.png'.format(args.png_folder), dpi=300)
    
else:
    par_analyze = partial(analyze, args.png_folder, args.k)
    pool = mp.Pool()
    pool.map(par_analyze, files)


开发者ID:axeltidemann,项目名称:propeller,代码行数:29,代码来源:states_kmeans.py


注:本文中的sklearn.cluster.MiniBatchKMeans.fit_transform方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。