本文整理汇总了Python中scipy.cluster.hierarchy.linkage方法的典型用法代码示例。如果您正苦于以下问题:Python hierarchy.linkage方法的具体用法?Python hierarchy.linkage怎么用?Python hierarchy.linkage使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scipy.cluster.hierarchy
的用法示例。
在下文中一共展示了hierarchy.linkage方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_linkage_misc
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_linkage_misc():
# Misc tests on linkage
rng = np.random.RandomState(42)
X = rng.normal(size=(5, 5))
assert_raises(ValueError, AgglomerativeClustering(linkage='foo').fit, X)
assert_raises(ValueError, linkage_tree, X, linkage='foo')
assert_raises(ValueError, linkage_tree, X, connectivity=np.ones((4, 4)))
# Smoke test FeatureAgglomeration
FeatureAgglomeration().fit(X)
# test hierarchical clustering on a precomputed distances matrix
dis = cosine_distances(X)
res = linkage_tree(dis, affinity="precomputed")
assert_array_equal(res[0], linkage_tree(X, affinity="cosine")[0])
# test hierarchical clustering on a precomputed distances matrix
res = linkage_tree(X, affinity=manhattan_distances)
assert_array_equal(res[0], linkage_tree(X, affinity="manhattan")[0])
示例2: test_structured_linkage_tree
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_structured_linkage_tree():
# Check that we obtain the correct solution for structured linkage trees.
rng = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
# Avoiding a mask with only 'True' entries
mask[4:7, 4:7] = 0
X = rng.randn(50, 100)
connectivity = grid_to_graph(*mask.shape)
for tree_builder in _TREE_BUILDERS.values():
children, n_components, n_leaves, parent = \
tree_builder(X.T, connectivity)
n_nodes = 2 * X.shape[1] - 1
assert len(children) + n_leaves == n_nodes
# Check that ward_tree raises a ValueError with a connectivity matrix
# of the wrong shape
assert_raises(ValueError,
tree_builder, X.T, np.ones((4, 4)))
# Check that fitting with no samples raises an error
assert_raises(ValueError,
tree_builder, X.T[:0], connectivity)
示例3: test_unstructured_linkage_tree
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_unstructured_linkage_tree():
# Check that we obtain the correct solution for unstructured linkage trees.
rng = np.random.RandomState(0)
X = rng.randn(50, 100)
for this_X in (X, X[0]):
# With specified a number of clusters just for the sake of
# raising a warning and testing the warning code
with ignore_warnings():
children, n_nodes, n_leaves, parent = assert_warns(
UserWarning, ward_tree, this_X.T, n_clusters=10)
n_nodes = 2 * X.shape[1] - 1
assert_equal(len(children) + n_leaves, n_nodes)
for tree_builder in _TREE_BUILDERS.values():
for this_X in (X, X[0]):
with ignore_warnings():
children, n_nodes, n_leaves, parent = assert_warns(
UserWarning, tree_builder, this_X.T, n_clusters=10)
n_nodes = 2 * X.shape[1] - 1
assert_equal(len(children) + n_leaves, n_nodes)
示例4: test_identical_points
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_identical_points():
# Ensure identical points are handled correctly when using mst with
# a sparse connectivity matrix
X = np.array([[0, 0, 0], [0, 0, 0],
[1, 1, 1], [1, 1, 1],
[2, 2, 2], [2, 2, 2]])
true_labels = np.array([0, 0, 1, 1, 2, 2])
connectivity = kneighbors_graph(X, n_neighbors=3, include_self=False)
connectivity = 0.5 * (connectivity + connectivity.T)
connectivity, n_components = _fix_connectivity(X,
connectivity,
'euclidean')
for linkage in ('single', 'average', 'average', 'ward'):
clustering = AgglomerativeClustering(n_clusters=3,
linkage=linkage,
connectivity=connectivity)
clustering.fit(X)
assert_almost_equal(normalized_mutual_info_score(clustering.labels_,
true_labels), 1)
示例5: test_cluster_distances_with_distance_threshold
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def test_cluster_distances_with_distance_threshold():
rng = np.random.RandomState(0)
n_samples = 100
X = rng.randint(-10, 10, size=(n_samples, 3))
# check the distances within the clusters and with other clusters
distance_threshold = 4
clustering = AgglomerativeClustering(
n_clusters=None,
distance_threshold=distance_threshold,
linkage="single").fit(X)
labels = clustering.labels_
D = pairwise_distances(X, metric="minkowski", p=2)
# to avoid taking the 0 diagonal in min()
np.fill_diagonal(D, np.inf)
for label in np.unique(labels):
in_cluster_mask = labels == label
max_in_cluster_distance = (D[in_cluster_mask][:, in_cluster_mask]
.min(axis=0).max())
min_out_cluster_distance = (D[in_cluster_mask][:, ~in_cluster_mask]
.min(axis=0).min())
# single data point clusters only have that inf diagonal here
if in_cluster_mask.sum() > 1:
assert max_in_cluster_distance < distance_threshold
assert min_out_cluster_distance >= distance_threshold
示例6: create_newick_file_from_matrix_file
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def create_newick_file_from_matrix_file(observation_matrix_path, output_file_path, linkage=constants.linkage_method_default,
distance=constants.distance_metric_default, norm='l1', progress=progress, transpose=False,
items_order_file_path=None):
is_distance_and_linkage_compatible(distance, linkage)
filesnpaths.is_file_exists(observation_matrix_path)
filesnpaths.is_file_tab_delimited(observation_matrix_path)
filesnpaths.is_output_file_writable(output_file_path)
if items_order_file_path:
filesnpaths.is_output_file_writable(items_order_file_path)
id_to_sample_dict, sample_to_id_dict, header, vectors = utils.get_vectors_from_TAB_delim_matrix(observation_matrix_path, transpose=transpose)
vectors = np.array(vectors)
newick = get_newick_from_matrix(vectors, distance, linkage, norm, id_to_sample_dict)
if output_file_path:
open(output_file_path, 'w').write(newick.strip() + '\n')
if items_order_file_path:
open(items_order_file_path, 'w').write('\n'.join(utils.get_names_order_from_newick_tree(newick)) + '\n')
示例7: _get_clusters
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def _get_clusters(self):
"""Cluster the data according to the specified dimensions.
Returns:
- tuple: The linkage matrices for the columns and/or rows.
"""
Zcol = None
Zrow = None
# cluster along columns
if self._cluster in ["col", "all"]:
tmp = np.transpose(self._data)
dcol = self._dist_fun(tmp, metric=self._col_dist)
Zcol = self._link_fun(dcol, optimal_ordering=self._optimal_leaf_order)
# cluster along rows only if 'all' is selected
if self._cluster in ["row", "all"]:
drow = self._dist_fun(self._data, metric=self._row_dist)
Zrow = self._link_fun(drow, optimal_ordering=self._optimal_leaf_order)
return (Zcol, Zrow)
示例8: get_col_linkage
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def get_col_linkage(combined_df, method='ward', metric='cosine'):
CACHE_DIR = os.path.expanduser('~/cache/alt_splice_heatmap/sqtl')
if not os.path.exists(CACHE_DIR): os.makedirs(CACHE_DIR)
col_linkage_cache_path = os.path.join(CACHE_DIR, 'col_linkage_%s_%s.npy' %(method, metric))
idx_linkage_cache_path = os.path.join(CACHE_DIR, 'idx.npy')
col_name_cache_path = os.path.join(CACHE_DIR, 'col_names.npy')
if os.path.exists(col_linkage_cache_path):
print "Loading linkage from %s" %col_linkage_cache_path
col_linkage = np.load(col_linkage_cache_path)
assert np.array_equal(np.load(idx_linkage_cache_path), combined_df.index)
assert np.array_equal(np.load(col_name_cache_path), combined_df.columns)
else:
print "Calculating linkage"
col_linkage = hc.linkage(sp.distance.pdist(combined_df.values.T), method=method, metric=metric)
np.save(col_linkage_cache_path, col_linkage)
np.save(idx_linkage_cache_path, combined_df.index)
np.save(col_name_cache_path, combined_df.columns)
return col_linkage
示例9: create_cluster_map
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def create_cluster_map(self, bow, srcids):
cluster_map = {}
z = linkage(bow, metric='cityblock', method='complete')
dists = list(set(z[:, 2]))
thresh = (dists[1] + dists[2]) / 2
self.logger.info('Threshold: {0}'.format(thresh))
b = hier.fcluster(z, thresh, criterion='distance')
assert bow.shape[0] == len(b)
assert len(b) == len(srcids)
for cid, srcid in zip(b, srcids):
cluster_map[cid] = cluster_map.get(cid, []) + [srcid]
self.logger.info('# of clusters: {0}'.format(len(b)))
self.logger.info('sizes of clustsers:{0}'.format(sorted(map(len, cluster_map.values()))))
return cluster_map
示例10: hier_clustering
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def hier_clustering(d, threshold=3):
srcids = d.keys()
tokenizer = lambda x: x.split()
vectorizer = TfidfVectorizer(tokenizer=tokenizer)
assert isinstance(d, dict)
assert isinstance(list(d.values())[0], list)
assert isinstance(list(d.values())[0][0], str)
doc = [' '.join(d[srcid]) for srcid in srcids]
vect = vectorizer.fit_transform(doc)
#TODO: Make vect aligned to the required format
z = linkage(vect.toarray(), metric='cityblock', method='complete')
dists = list(set(z[:,2]))
# threshold = 3
#threshold = (dists[2] + dists[3]) / 2
b = hier.fcluster(z, threshold, criterion='distance')
cluster_dict = defaultdict(list)
for srcid, cluster_id in zip(srcids, b):
cluster_dict[str(cluster_id)].append(srcid)
value_lengther = lambda x: len(x[1])
return OrderedDict(\
sorted(cluster_dict.items(), key=value_lengther, reverse=True))
示例11: generate_graphs
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def generate_graphs(clusters_list, output, size, linkage, cutoff, distances, traj):
"""
DESCRIPTION
Create a linear cluster mapping graph where every frame is printed as a
colored barplot
Args:
clusters_list (list): list of cluster
output (string): output name for graph
size (int): number of frames
linkage (numpy array): matrix linkage
cutoff (float): cutoff distance value for clustering (in the dendogram)
distances(numpy array): distance matrix
traj (Trajectory): trajectory for time usage in axis barplot
Return:
colors_list (list) to be used with 2D distance projection graph
"""
colors_list = plot_barplot(clusters_list, output, size, traj)
plot_dendro(linkage, output, cutoff, colors_list, clusters_list)
plot_hist(clusters_list, output, colors_list)
if (distances.shape[0] < 10000):
plot_distmat(distances, output)
else:
printScreenLogfile("Too many frames! The RMSD distance matrix will not be generated")
return colors_list
示例12: get_hrp
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def get_hrp(cov, corr):
"""Construct a hierarchical portfolio
Params
------
cov: pd.DataFrame
corr: pd.DataFrame
Returns
-------
pd.Series
"""
dist = get_corr_dist(corr)
link = sch.linkage(dist, 'single')
sort_idx = get_quasi_diag(link)
# Recover label
sort_idx = corr.index[sort_idx].tolist()
hrp = get_rec_bipart(cov, sort_idx)
return hrp.sort_index()
示例13: performClusteringLinkage
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def performClusteringLinkage(segmentBKTable, segmentCVTable, N_init, linkageCriterion,linkageMetric ):
from scipy.cluster.hierarchy import linkage
from scipy import cluster
if linkageMetric == 'jaccard':
observations = segmentBKTable
elif linkageMetric == 'cosine':
observations = segmentCVTable
else:
observations = segmentCVTable
clusteringTable = np.zeros([np.size(segmentCVTable,0),N_init])
Z = linkage(observations,method=linkageCriterion,metric=linkageMetric)
for i in np.arange(N_init):
clusteringTable[:,i] = cluster.hierarchy.cut_tree(Z,N_init-i).T+1
k=N_init
print('done')
return clusteringTable, k
示例14: dendrogram
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def dendrogram(data, threshold, layer_directory):
colnames = data.columns
data = np.array(data)
Z = hierarchy.linkage(data.T, 'single', 'cosine')
plt.figure(figsize=(15, 9))
dn = hierarchy.dendrogram(Z, labels = colnames, color_threshold=threshold)
plt.title("Clustering of Samples Based on Mutational Signatures" )
plt.ylabel("Cosine Distance")
plt.xlabel("Sample IDs")
#plt.ylim((0,1))
plt.savefig(layer_directory+'/dendrogram.pdf',figsize=(10, 8), dpi=300)
# which datapoints goes to which cluster
# The indices of the datapoints will be displayed as the ids
Y = hierarchy.fcluster(Z, threshold, criterion='distance', R=None, monocrit=None)
dataframe = pd.DataFrame({"Cluster":Y, "Sample Names":list(colnames)})
dataframe = dataframe.set_index("Sample Names")
#print(dataframe)
dictionary = {"clusters":Y, "informations":dn}
return dataframe
######################################## Plot the reconstruction error vs stabilities and select the optimum number of signature ####################################################
示例15: __agglomerative__
# 需要导入模块: from scipy.cluster import hierarchy [as 别名]
# 或者: from scipy.cluster.hierarchy import linkage [as 别名]
def __agglomerative__(self,markings):
"""
runs an initial agglomerative clustering over the given markings
:param markings:
:return:
"""
# this converts stuff into panda format - probably a better way to do this but the labels do seem
# necessary
labels = [str(i) for i in markings]
param_labels = [str(i) for i in range(len(markings[0]))]
df = pd.DataFrame(np.array(markings), columns=param_labels, index=labels)
row_dist = pd.DataFrame(squareform(pdist(df, metric='euclidean')), columns=labels, index=labels)
# use ward metric to do the actual clustering
row_clusters = linkage(row_dist, method='ward')
return row_clusters