本文整理汇总了Python中scipy.cluster方法的典型用法代码示例。如果您正苦于以下问题:Python scipy.cluster方法的具体用法?Python scipy.cluster怎么用?Python scipy.cluster使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy
的用法示例。
在下文中一共展示了scipy.cluster方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _signal_recompose_sum
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _signal_recompose_sum(components, clusters):
# Reorient components
components = components.T
# Reconstruct Time Series from correlated components
clusters = [np.where(clusters == cluster)[0] for cluster in np.unique(clusters)]
if len(clusters) == 0:
raise ValueError("Not enough clusters of components detected. Please decrease the " "`threshold`.")
# Initialize components matrix
recomposed = np.zeros((len(components), len(clusters)))
for i, indices in enumerate(clusters):
recomposed[:, i] = components[:, indices].sum(axis=1)
return recomposed.T
# =============================================================================
# Clustering Methods
# =============================================================================
# Weighted Correlation
# ----------------------------------------------------------------------------
示例2: process_options
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def process_options(args):
options = argparser().parse_args(args)
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
if options.eps <= 0.0:
raise ValueError('eps must be > 0')
wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
words, vectors = wv.words(), wv.vectors()
if options.whiten:
logging.info('normalize features to unit variance')
vectors = scipy.cluster.vq.whiten(vectors)
return words, vectors, options
示例3: compute_group_overlap_score
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def compute_group_overlap_score(ref_labels, pred_labels,
threshold_overlap_pred=0.5,
threshold_overlap_ref=0.5):
"""How well do the pred_labels explain the ref_labels?
A predicted cluster explains a reference cluster if it is contained within the reference
cluster with at least 50% (threshold_overlap_pred) of its points and these correspond
to at least 50% (threshold_overlap_ref) of the reference cluster.
"""
ref_unique, ref_counts = np.unique(ref_labels, return_counts=True)
ref_dict = dict(zip(ref_unique, ref_counts))
pred_unique, pred_counts = np.unique(pred_labels, return_counts=True)
pred_dict = dict(zip(pred_unique, pred_counts))
summary = []
for true in ref_unique:
sub_pred_unique, sub_pred_counts = np.unique(pred_labels[true == ref_labels], return_counts=True)
relative_overlaps_pred = [sub_pred_counts[i] / pred_dict[n] for i, n in enumerate(sub_pred_unique)]
relative_overlaps_ref = [sub_pred_counts[i] / ref_dict[true] for i, n in enumerate(sub_pred_unique)]
pred_best_index = np.argmax(relative_overlaps_pred)
summary.append(1 if (relative_overlaps_pred[pred_best_index] >= threshold_overlap_pred and
relative_overlaps_ref[pred_best_index] >= threshold_overlap_ref)
else 0)
# print(true, sub_pred_unique[pred_best_index], relative_overlaps_pred[pred_best_index],
# relative_overlaps_ref[pred_best_index], summary[-1])
return sum(summary)/len(summary)
示例4: hierarch_cluster
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def hierarch_cluster(M):
"""Cluster matrix using hierarchical clustering.
Parameters
----------
M : np.ndarray
Matrix, for example, distance matrix.
Returns
-------
Mclus : np.ndarray
Clustered matrix.
indices : np.ndarray
Indices used to cluster the matrix.
"""
import scipy as sp
import scipy.cluster
link = sp.cluster.hierarchy.linkage(M)
indices = sp.cluster.hierarchy.leaves_list(link)
Mclus = np.array(M[:, indices])
Mclus = Mclus[indices, :]
if False:
pl.matshow(Mclus)
pl.colorbar()
return Mclus, indices
示例5: performClusteringLinkage
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def performClusteringLinkage(segmentBKTable, segmentCVTable, N_init, linkageCriterion,linkageMetric ):
from scipy.cluster.hierarchy import linkage
from scipy import cluster
if linkageMetric == 'jaccard':
observations = segmentBKTable
elif linkageMetric == 'cosine':
observations = segmentCVTable
else:
observations = segmentCVTable
clusteringTable = np.zeros([np.size(segmentCVTable,0),N_init])
Z = linkage(observations,method=linkageCriterion,metric=linkageMetric)
for i in np.arange(N_init):
clusteringTable[:,i] = cluster.hierarchy.cut_tree(Z,N_init-i).T+1
k=N_init
print('done')
return clusteringTable, k
示例6: dendrogram
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def dendrogram(data, threshold, layer_directory):
colnames = data.columns
data = np.array(data)
Z = hierarchy.linkage(data.T, 'single', 'cosine')
plt.figure(figsize=(15, 9))
dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
plt.title("Clustering of Samples Based on Mutational Signatures" )
plt.ylabel("Cosine Distance")
plt.xlabel("Sample IDs")
#plt.ylim((0,1))
plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
# which datapoints goes to which cluster
# The indices of the datapoints will be displayed as the ids
Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
dataframe = dataframe.set_index("Sample Names")
#print(dataframe)
dictionary = {"clusters":Y, "informations":dn}
return dataframe
######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
示例7: sigma_bin_walls
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def sigma_bin_walls(sigma, bins):
import scipy, scipy.cluster, scipy.cluster.vq as vq
std = np.std(sigma)
if np.isclose(std, 0): return pimms.imm_array([0, np.max(sigma)])
cl = sorted(std * vq.kmeans(sigma/std, bins)[0])
cl = np.mean([cl[:-1],cl[1:]], axis=0)
return pimms.imm_array(np.concatenate(([0], cl, [np.max(sigma)])))
示例8: _signal_recompose_wcorr
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _signal_recompose_wcorr(components, threshold=0.5, metric="chebyshev"):
""""""
# Calculate the w-correlation matrix.
wcorr = _signal_recompose_get_wcorr(components, show=False)
# Find clusters in correlation matrix
pairwise_distances = scipy.cluster.hierarchy.distance.pdist(wcorr, metric=metric)
linkage = scipy.cluster.hierarchy.linkage(pairwise_distances, method="complete")
threshold = threshold * pairwise_distances.max()
clusters = scipy.cluster.hierarchy.fcluster(linkage, threshold, "distance")
return clusters
示例9: process_options
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def process_options(args):
options = argparser().parse_args(args)
if options.max_rank is not None and options.max_rank < 1:
raise ValueError('max-rank must be >= 1')
if options.k is not None and options.k < 2:
raise ValueError('cluster number must be >= 2')
if options.method == MINIBATCH_KMEANS and not with_sklearn:
logging.warning('minibatch kmeans not available, using kmeans (slow)')
options.method = KMEANS
if options.jobs != 1 and (options.method != KMEANS or not with_sklearn):
logging.warning('jobs > 1 only supported scikit-learn %s' % KMEANS)
options.jobs = 1
wv = wvlib.load(options.vectors[0], max_rank=options.max_rank)
if options.k is None:
options.k = int(math.ceil((len(wv.words())/2)**0.5))
logging.info('set k=%d (%d words)' % (options.k, len(wv.words())))
if options.normalize:
logging.info('normalize vectors to unit length')
wv.normalize()
words, vectors = wv.words(), wv.vectors()
if options.whiten:
logging.info('normalize features to unit variance')
vectors = scipy.cluster.vq.whiten(vectors)
return words, vectors, options
示例10: minibatch_kmeans
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def minibatch_kmeans(vectors, k):
if not with_sklearn:
raise NotImplementedError
# Sculley (http://www.eecs.tufts.edu/~dsculley/papers/fastkmeans.pdf)
# uses batch size 1000. sklearn KMeans defaults to n_init 10
kmeans = sklearn.cluster.MiniBatchKMeans(k, batch_size=1000, n_init=10)
kmeans.fit(vectors)
return kmeans.labels_
示例11: kmeans
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def kmeans(vectors, k, jobs=1):
vectors = numpy.array(vectors)
if with_sklearn:
if jobs == 1:
kmeans = sklearn.cluster.KMeans(k)
else:
kmeans = sklearn.cluster.KMeans(k, n_jobs=jobs) # sklearn > 0.10
kmeans.fit(vectors)
return kmeans.labels_
else:
codebook, distortion = scipy.cluster.vq.kmeans(vectors, k)
cluster_ids, dist = scipy.cluster.vq.vq(vectors, codebook)
return cluster_ids
示例12: write_cluster_ids
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def write_cluster_ids(words, cluster_ids, out=None):
"""Write given list of words and their corresponding cluster ids to out."""
assert len(words) == len(cluster_ids), 'word/cluster ids number mismatch'
if out is None:
out = sys.stdout
for word, cid in izip(words, cluster_ids):
print >> out, '%s\t%d' % (word, cid)
示例13: _auto_color
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _auto_color(self, url:str, ranks):
phrases = ["Calculating colors..."] # in case I want more
#try:
await self.bot.say("**{}**".format(random.choice(phrases)))
clusters = 10
async with aiohttp.get(url) as r:
image = await r.content.read()
with open('data/leveler/temp_auto.png','wb') as f:
f.write(image)
im = Image.open('data/leveler/temp_auto.png').convert('RGBA')
im = im.resize((290, 290)) # resized to reduce time
ar = scipy.misc.fromimage(im)
shape = ar.shape
ar = ar.reshape(scipy.product(shape[:2]), shape[2])
codes, dist = scipy.cluster.vq.kmeans(ar.astype(float), clusters)
vecs, dist = scipy.cluster.vq.vq(ar, codes) # assign codes
counts, bins = scipy.histogram(vecs, len(codes)) # count occurrences
# sort counts
freq_index = []
index = 0
for count in counts:
freq_index.append((index, count))
index += 1
sorted_list = sorted(freq_index, key=operator.itemgetter(1), reverse=True)
colors = []
for rank in ranks:
color_index = min(rank, len(codes))
peak = codes[sorted_list[color_index][0]] # gets the original index
peak = peak.astype(int)
colors.append(''.join(format(c, '02x') for c in peak))
return colors # returns array
#except:
#await self.bot.say("```Error or no scipy. Install scipy doing 'pip3 install numpy' and 'pip3 install scipy' or read here: https://github.com/AznStevy/Maybe-Useful-Cogs/blob/master/README.md```")
# converts hex to rgb
示例14: plot_rank_order_dendrogram
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def plot_rank_order_dendrogram(df:pd.DataFrame, threshold:float=0.8, savename:Optional[str]=None, settings:PlotSettings=PlotSettings()) \
-> Dict[str,Union[List[str],float]]:
r'''
Plots a dendrogram of features in df clustered via Spearman's rank correlation coefficient.
Also returns a sets of features with correlation coefficients greater than the threshold
Arguments:
df: Pandas DataFrame containing data
threshold: Threshold on correlation coefficient
savename: Optional name of file to which to save the plot of feature importances
settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance
Returns:
Dict of sets of features with correlation coefficients greater than the threshold and cluster distance
'''
corr = np.round(scipy.stats.spearmanr(df).correlation, 4)
corr_condensed = hc.distance.squareform(1-np.abs(corr)) # Abs because negtaive of a feature is a trvial transformation: information unaffected
z = hc.linkage(corr_condensed, method='average', optimal_ordering=True)
with sns.axes_style('white'), sns.color_palette(settings.cat_palette):
plt.figure(figsize=(settings.w_large, (0.5*len(df.columns))))
hc.dendrogram(z, labels=df.columns, orientation='left', leaf_font_size=settings.lbl_sz, color_threshold=1-threshold)
plt.xlabel("Distance (1 - |Spearman's Rank Correlation Coefficient|)", fontsize=settings.lbl_sz, color=settings.lbl_col)
plt.xticks(fontsize=settings.tk_sz, color=settings.tk_col)
if savename is not None: plt.savefig(settings.savepath/f'{savename}{settings.format}', bbox_inches='tight')
plt.show()
feats = df.columns
sets = {}
for i, merge in enumerate(z):
if merge[2] > 1-threshold: continue
if merge[0] <= len(z): a = [feats[int(merge[0])]]
else: a = sets.pop(int(merge[0]))['children']
if merge[1] <= len(z): b = [feats[int(merge[1])]]
else: b = sets.pop(int(merge[1]))['children']
sets[1 + i + len(z)] = {'children': [*a, *b], 'distance': merge[2]}
return sets
示例15: _get_covariance_from_clusters
# 需要导入模块: import scipy [as 别名]
# 或者: from scipy import cluster [as 别名]
def _get_covariance_from_clusters(self, points):
"""Compute covariance from re-centered clusters."""
# Compute pairwise distances.
distances = spatial.distance.pdist(points, metric='mahalanobis',
VI=self.am)
# Identify conglomerates of points by constructing a linkage matrix.
linkages = cluster.hierarchy.single(distances)
# Cut when linkage between clusters exceed the radius.
clusteridxs = cluster.hierarchy.fcluster(linkages, 1.0,
criterion='distance')
nclusters = np.max(clusteridxs)
if nclusters == 1:
return self._get_covariance_from_all_points(points)
else:
i = 0
overlapped_points = np.empty_like(points)
for idx in np.unique(clusteridxs):
group_points = points[clusteridxs == idx, :]
group_mean = group_points.mean(axis=0).reshape((1, -1))
j = i + len(group_points)
overlapped_points[i:j, :] = group_points - group_mean
i = j
return self._get_covariance_from_all_points(overlapped_points)