本文整理汇总了Python中scipy.cluster.vq.vq函数的典型用法代码示例。如果您正苦于以下问题:Python vq函数的具体用法?Python vq怎么用?Python vq使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了vq函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_centroid_mask
def _get_centroid_mask(overtones):
flat = overtones.reshape((len(overtones) * 48, overtones.shape[2]))
f0flat = flat[np.argmax(flat, 1) == 0]
f0flat = f0flat[np.max(f0flat, 1) > 0]
f0flat = (f0flat.T / np.max(f0flat, 1)).T
centroids, distortion = kmeans(f0flat, 24)
codes, dists = vq(f0flat, centroids)
#centroids = centroids[np.bincount(codes) > np.median(np.bincount(codes))]
flat_norm = (flat.T / np.max(flat, 1)).T
codes, dists = vq(flat_norm, centroids)
flat_filtered = np.copy(flat)
for i, (s, c) in enumerate(zip(flat, codes)):
if c < 0 or c > len(centroids):
continue
centroid = centroids[c]
centroid_denorm = centroid * np.max(s)
flat_filtered[i, 1:] -= centroid_denorm[1:]
flat_filtered[i, 1:] = np.maximum(flat_filtered[i, 1:], 0)
overtones_filtered = flat_filtered.reshape(overtones.shape)
return overtones_filtered
示例2: getImageDescriptor
def getImageDescriptor(model, im, conf):
im = standardizeImage(im)
height, width = im.shape[:2]
numWords = model.vocab.shape[1]
frames, descrs = getPhowFeatures(im, conf.phowOpts)
# quantize appearance
if model.quantizer == 'vq':
binsa, _ = vq(descrs.T, model.vocab.T)
elif model.quantizer == 'kdtree':
raise ValueError('quantizer kdtree not implemented')
else:
raise ValueError('quantizer {0} not known or understood'.format(model.quantizer))
hist = []
for n_spatial_bins_x, n_spatial_bins_y in zip(model.numSpatialX, model.numSpatialX):
binsx, distsx = vq(frames[0, :], linspace(0, width, n_spatial_bins_x))
binsy, distsy = vq(frames[1, :], linspace(0, height, n_spatial_bins_y))
# binsx and binsy list to what spatial bin each feature point belongs to
if (numpy.any(distsx < 0)) | (numpy.any(distsx > (width/n_spatial_bins_x+0.5))):
print ("something went wrong")
import pdb; pdb.set_trace()
if (numpy.any(distsy < 0)) | (numpy.any(distsy > (height/n_spatial_bins_y+0.5))):
print ("something went wrong")
import pdb; pdb.set_trace()
# combined quantization
number_of_bins = n_spatial_bins_x * n_spatial_bins_y * numWords
temp = arange(number_of_bins)
# update using this: http://stackoverflow.com/questions/15230179/how-to-get-the-linear-index-for-a-numpy-array-sub2ind
temp = temp.reshape([n_spatial_bins_x, n_spatial_bins_y, numWords])
bin_comb = temp[binsx, binsy, binsa]
hist_temp, _ = histogram(bin_comb, bins=range(number_of_bins+1), density=True)
hist.append(hist_temp)
hist = hstack(hist)
hist = array(hist, 'float32') / sum(hist)
return hist
示例3: read_unclustered_data
def read_unclustered_data(filename, num_clusters, cl_type="kMeans"):
"""Return dictionary of cluster id to array of points.
Given a filename in the format of lat, lng
generate k clusters based on arguments. Outputs a dictionary with
the cluster id as the key mapped to a list of lat, lng pts
"""
request_points = []
with open(filename, 'rb') as input_file:
input_file.next() # Skip the header row
for line in input_file:
lat, lng = line.split(',')
request_points.append((float(lat), float(lng)))
request_points = array(request_points)
if cl_type == "kMeans":
# computing K-Means with K = num_clusters
centroids, _ = kmeans(request_points, int(num_clusters))
# assign each sample to a cluster
idx, _ = vq(request_points, centroids)
else:
# computeing kMedoids using distance matrix
centroids = get_kmedoids(request_points, int(num_clusters))
# assign each sample to a cluster
idx, _ = vq(request_points, centroids)
# map cluster lat, lng to cluster index
cluster_points = defaultdict(list)
for i in xrange(len(request_points)):
lat, lng = request_points[i]
cluster_points[idx[i]].append((lat, lng))
return cluster_points
示例4: bic_kmeans
def bic_kmeans(features, **kwargs):
'''
Run kmeans on features with **kwargs given to scipy.cluster.vq.kmeans for
different numbers of clusters, k. Choose, finally, the clustering that
minimizes the Beysian Information Criterion or BIC.
'''
max_k = int(2*numpy.log(len(features)))
base_distances = vq(features,
numpy.array([numpy.average(features, axis=0)]))[1]
base_std = numpy.std(base_distances)
centers_list = []
bic_list = []
distances_list = []
for k in range(1, max_k+1):
centers = kmeans(features, k, **kwargs)[0]
clusters, distances = vq(features, centers)
bic = calculate_bic(clusters, distances, base_std)
centers_list.append(centers)
distances_list.append(distances)
bic_list.append(bic)
best_index = numpy.argmin(bic_list)
return centers_list[best_index], distances_list[best_index]
示例5: _get_larger_chroms
def _get_larger_chroms(ref_file):
"""Retrieve larger chromosomes, avoiding the smaller ones for plotting.
"""
from scipy.cluster.vq import kmeans, vq
all_sizes = []
for c in ref.file_contigs(ref_file):
all_sizes.append(float(c.size))
all_sizes.sort()
# separate out smaller chromosomes and haplotypes with kmeans
centroids, _ = kmeans(np.array(all_sizes), 2)
idx, _ = vq(np.array(all_sizes), centroids)
little_sizes = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx, all_sizes)))
little_sizes = [x[1] for x in little_sizes]
# create one more cluster with the smaller, removing the haplotypes
centroids2, _ = kmeans(np.array(little_sizes), 2)
idx2, _ = vq(np.array(little_sizes), centroids2)
little_sizes2 = tz.first(tz.partitionby(lambda xs: xs[0], zip(idx2, little_sizes)))
little_sizes2 = [x[1] for x in little_sizes2]
# get any chromosomes not in haplotype/random bin
thresh = max(little_sizes2)
larger_chroms = []
for c in ref.file_contigs(ref_file):
if c.size > thresh:
larger_chroms.append(c.name)
return larger_chroms
示例6: run
def run(self, features, number_of_clusters='3', restarts=10):
if number_of_clusters != 'Use BIC':
k = int(number_of_clusters)
if k == 1:
result = numpy.zeros(len(features), dtype=numpy.int32)
return [result]
return [vq(features, kmeans(features, k, iter=restarts)[0])[0]]
else:
return [vq(features, bic_kmeans(features, iter=restarts)[0])[0]]
示例7: sphere_tissue_image
def sphere_tissue_image(size=100, n_points=12):
center = np.array([size/2,size/2,size/2],float)
radius = size/4.
points = {}
for p in range(n_points):
theta = np.random.rand()*2.*np.pi
phi = np.random.rand()*np.pi - np.pi/2.
points[p+3] = center + radius*np.array([np.cos(theta)*np.cos(phi),np.sin(theta)*np.cos(phi),np.sin(phi)])
points = array_dict(points)
point_target_area = 4.*np.pi*np.power(radius,2.)/float(n_points)
point_target_distance = np.power(point_target_area/np.pi,0.5)
sigma_deformation = (size/100.)*(20./n_points)
omega_forces = dict(distance=0.1*size/100., repulsion=100.0*np.power(size/100.,2))
for iterations in xrange(100):
point_vectors = np.array([points[p]- points.values() for p in points.keys()])
point_distances = np.array([vq(points.values(),np.array([points[p]]))[1] for p in points.keys()])
point_vectors = point_vectors/(point_distances[...,np.newaxis]+1e-7)
point_distance_forces = omega_forces['distance']*((point_distances-point_target_distance)[...,np.newaxis]*point_vectors/point_target_distance).sum(axis=1)
point_repulsion_forces = omega_forces['repulsion']*np.power(point_target_distance,2)*(point_vectors/(np.power(point_distances,2)+1e-7)[...,np.newaxis]).sum(axis=1)
point_forces = np.zeros((len(points),3))
point_forces += point_distance_forces
point_forces += point_repulsion_forces
point_forces = np.minimum(1.0,sigma_deformation/np.linalg.norm(point_forces,axis=1))[:,np.newaxis] * point_forces
new_points = points.values() + point_forces
new_points = center+ radius*((new_points-center)/np.linalg.norm((new_points-center),axis=1)[:,np.newaxis])
points = array_dict(new_points,points.keys())
points[2] = center
coords = np.transpose(np.mgrid[0:size,0:size,0:size],(1,2,3,0)).reshape((np.power(size,3),3)).astype(int)
labels = points.keys()[vq(coords,points.values())[0]]
ext_coords = coords[vq(coords,np.array([center]))[1]>size/3.]
img = np.ones((size,size,size),np.uint8)
img[tuple(np.transpose(coords))] = labels
img[tuple(np.transpose(ext_coords))] = 1
img = SpatialImage(img,resolution=(60./size,60./size,60./size))
return img
示例8: performance_measure
def performance_measure(reference_set,experimental_set,measure='jaccard_index'):
VP = (vq(experimental_set,reference_set)[1]==0).sum()
FP = (vq(experimental_set,reference_set)[1]>0).sum()
FN = (vq(reference_set,experimental_set)[1]>0).sum()
if measure == 'true_positive':
return VP
elif measure == 'precision':
return VP/float(VP+FP)
elif measure == 'recall':
return VP/float(VP+FN)
elif measure == 'dice_index':
return 2*VP / float(2*VP+FP+FN)
elif measure == 'jaccard_index':
return VP/float(VP+FP+FN)
示例9: vectorQuantization
def vectorQuantization (features, bits, debug=False):
from scipy.cluster.vq import vq
D = len(features[0])
np_features = np.array(features)
nom_features = np.empty(np_features.shape, dtype=str)
for i in range(D):
column = np_features[:,i]
max_val = np.max(column)
min_val = np.min(column)
bits = bits
denom = bits
step = (max_val - min_val)/denom
partition = [0]*(denom+1)
codebook = [0]*(denom+1)
for j in range(denom+1):
partition[j] = (min_val+(step*j))
codebook[j] = j
column = np.array(column)
partition = np.array(partition)
if debug:
print('****')
print(column)
print(partition)
tmp = vq(column,partition)
nom_col = [str(int(x)+1) for x in tmp[0]]
if debug:
print tmp[0]
print nom_col
print '****'
nom_features[:,i] = nom_col
return nom_features
示例10: classify_kmeans
def classify_kmeans(infile, clusternumber):
'''
apply kmeans
'''
#Load infile in data array
driver = gdal.GetDriverByName('GTiff')
driver.Register()
ds = gdal.Open(infile, gdal.GA_Update)
databand = ds.GetRasterBand(1)
#Read input raster into array
data = ds.ReadAsArray()
#replace no data value with numpy.nan
#data[data==-999.0]=numpy.nan
pixel = numpy.reshape(data,(data.shape[0]*data.shape[1]))
centroids, variance = kmeans(pixel, clusternumber)
code, distance = vq(pixel,centroids)
centers_idx = numpy.reshape(code,(data.shape[0],data.shape[1]))
clustered = centroids[centers_idx]
# Write outraster to file
databand.WriteArray(clustered)
databand.FlushCache()
#Close file
databand = None
clustered = None
ds = None
示例11: select
def select(file, output, clusters=None):
"""
Select clusters containing real motifs and discard the rest
Parameters
----------
file : An hdf5 file containing clustered motif matches as generated by birdwerdz.hdf.classify
output : Name of output file which will contain only motifs from selected
clusters. If same as input file, will delete motifs from the file
clusters : Clusters to select
"""
if file == output:
mode = 'r+'
else:
mode = 'w-'
with h5py.File(output, mode) as out:
if file != output:
with h5py.File(file, 'r+') as src:
for entry in src.values():
out['/'].copy(entry,entry.name)
for entry in out.values():
if not isinstance(entry,h5py.Group) or 'motifs' not in entry.keys():
continue
amp_vecs= entry['motifs']['spectrogram'].sum(1)
cluster_path = 'cluster_mean_spectrograms'
id,_ = vq(amp_vecs, out[cluster_path][:].sum(1))
new_motifs=np.delete(entry['motifs'], np.where(
[i not in clusters for i in id])[0])
del entry['motifs']
entry.create_dataset('motifs',data=new_motifs)
示例12: clustering2
def clustering2(img,clusters):
"another clustering method - no major differences"
#Reshaping image in list of pixels to allow kmean Algorithm
#From 1792x1792x3 to 1792^2x3
pixels = np.reshape(img,(img.shape[0]*img.shape[1],3))
centroids,_ = kmeans2(pixels,3,iter=3,minit= 'random')
#print ("Centroids : ",centroids.dtype,centroids.shape,type(centroids))
#print centroids
# quantization
#Assigns a code from a code book to each observation
#code : A length N array holding the code book index for each observation.
#dist : The distortion (distance) between the observation and its nearest code.
code,_ = vq(pixels,centroids)
#print ("Code : ",code.dtype,code.shape,type(code))
#print code
# reshaping the result of the quantization
reshaped = np.reshape(code,(img.shape[0],img.shape[1]))
#print ("reshaped : ",reshaped.dtype,reshaped.shape,type(reshaped))
clustered = centroids[reshaped]
#print ("clustered : ",clustered.dtype,clustered.shape,type(clustered))
#scatter3D(centroids)
return clustered
示例13: clustering_scipy_kmeans
def clustering_scipy_kmeans(features, n_clust = 8):
"""
"""
whitened = whiten(features)
print whitened.shape
initial = [kmeans(whitened,i) for i in np.arange(1,12)]
plt.plot([var for (cent,var) in initial])
plt.show()
#cent, var = initial[3]
##use vq() to get as assignment for each obs.
#assignment,cdist = vq(whitened,cent)
#plt.scatter(whitened[:,0], whitened[:,1], c=assignment)
#plt.show()
codebook, distortion = kmeans(whitened, n_clust)
print codebook, distortion
assigned_label, dist = vq(whitened, codebook)
for ii in range(8):
plt.subplot(4,2,ii+1)
plt.plot(codebook[ii])
plt.show()
centroid, label = kmeans2(whitened, n_clust, minit = 'points')
print centroid, label
for ii in range(8):
plt.subplot(4,2,ii)
plt.plot(centroid[ii])
plt.show()
示例14: kmeans
def kmeans(features, projection, ite = 50, k = 4, threshold = 1e-5):
""" perform k_keamns clustering and return a the result as a subsapce clustering object """
from scipy.cluster.vq import kmeans, vq
import datetime
from measures import spatial_coherence
centroids, distance = kmeans(features, k, iter=ite, thresh=threshold)
code, _ = vq(features, centroids)
run_ = datetime.datetime.now().strftime("%y_%m_%d_%H_%M")
params = "projection_size=%d, k=%d" %(len(projection), k)
clusters = clusters_from_code(code, k, projection)
clustering_id = "(%s)_(%s)_(%s)_(%s)" %("exhaustive_kmeans", params, run_, projection)
#print clustering_id
km_clt = KMClustering(algorithm ="exhaustive_kmeans", parameters = params, run = run_,
clustering_id = clustering_id, clusters = clusters, ccontains_noise = False, cclustering_on_dimension = True)
measures = {'spatial_coherence': spatial_coherence(km_clt, len(features))[0], 'distortion': distance}
km_clt.update_measures(measures)
return km_clt
示例15: new_labelled_page
def new_labelled_page(no_of_samples:int, window_size:int, page_scale:int or tuple, labelled_centroids:[tuple], page_paths:[str]):
### Duplication from above
weighter = gaussian_weighter(window_size)
windower = f.partial(win_centred_on, window=window_size)
shifter = f.partial(point_shift, window=window_size)
scaler = img_scaler(page_scale)
make_observations = compose(prepare_features, real_fft)
img, label = open_image_label(*page_paths)
img, label = scaler(img, label)
f_img = prepare_fft_image(img, window_size)
access_img = img_accessor(img, identity)
access_label = img_accessor(label, identity)
access_f_img = img_accessor(f_img, compose(windower, shifter))
### End of duplication
labels = [a[0] for a in labelled_centroids]
centroids = np.asarray([a[1] for a in labelled_centroids])
new_label = np.zeros_like(label)
for s in img_slices(new_label.shape, 80):
unlabelled_samples = sample_all_in_area(s, applier(identity, compose(make_observations, access_f_img)))
coords = [a[0] for a in unlabelled_samples]
observations = np.asarray([a[1] for a in unlabelled_samples])
codes, dist = vq.vq(observations, centroids)
for i, code in zip(coords, codes):
new_label[i] = labels[code]
return new_label