本文整理汇总了Python中sklearn.manifold.MDS类的典型用法代码示例。如果您正苦于以下问题:Python MDS类的具体用法?Python MDS怎么用?Python MDS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了MDS类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_2dprojection
def create_2dprojection(distmat):
#uses isomap to return a species distance map in 2d based on the topological distmat of all species in tree
print 'map to 3d space'
mapper=MDS(n_components=3, metric=True, n_init=4, max_iter=300, verbose=0, eps=0.001, n_jobs=-1, random_state=0, dissimilarity='precomputed')
projmat =mapper.fit_transform(distmat)
print 'DONE'
return projmat
示例2: project_in_2D
def project_in_2D(distance_mat, method='mds'):
"""
Project SDRs onto a 2D space using manifold learning algorithms
:param distance_mat: A square matrix with pairwise distances
:param method: Select method from 'mds' and 'tSNE'
:return: an array with dimension (numSDRs, 2). It contains the 2D projections
of each SDR
"""
seed = np.random.RandomState(seed=3)
if method == 'mds':
mds = MDS(n_components=2, max_iter=3000, eps=1e-9,
random_state=seed,
dissimilarity="precomputed", n_jobs=1)
pos = mds.fit(distance_mat).embedding_
nmds = MDS(n_components=2, metric=False, max_iter=3000, eps=1e-12,
dissimilarity="precomputed", random_state=seed,
n_jobs=1, n_init=1)
pos = nmds.fit_transform(distance_mat, init=pos)
elif method == 'tSNE':
tsne = TSNE(n_components=2, init='pca', random_state=0)
pos = tsne.fit_transform(distance_mat)
else:
raise NotImplementedError
return pos
示例3: main
def main():
args = docopt(__doc__)
is_mds = args['--mds']
# load datasets
digits = load_digits()
X = digits.data
y = digits.target
labels = digits.target_names
# dimension reduction
if is_mds:
model = MDS(n_components=2)
else:
model = PCA(n_components=2)
X_fit = model.fit_transform(X)
for i in range(labels.shape[0]):
plt.scatter(X_fit[y == i, 0], X_fit[y == i, 1],
color=COLORS[i], label=str(i))
plt.legend(loc='upper left')
plt.autoscale()
plt.grid()
plt.show()
示例4: main
def main():
digits = load_digits()
X = digits.data
y = digits.target
mds = MDS()
X_mds = mds.fit_transform(X)
plot_embedding(X_mds, y)
示例5: plotMap
def plotMap(maparr, freq, nest, seqs, dbfile, map2d, outfile, plotm='T'):
#mutli-dimensional scaling
similarities = euclidean_distances(np.matrix(maparr))
mds = MDS(n_components=2, max_iter=3000, eps=1e-9, random_state=np.random.RandomState(seed=3), dissimilarity="precomputed", n_jobs=1)
pos = mds.fit(similarities).embedding_
#plot attributes
N = len(pos)
#size = [20*n for n in freq]
size = 8000
color = np.array(range(N))
if str(plotm) == 'T':
#plot MDS
fig, ax = plt.subplots(figsize=(10,10))
warnings.filterwarnings("ignore")
scatter = ax.scatter(np.array(pos[:,0]), np.array(pos[:,1]), c=color, s=size, alpha=0.3, cmap=plt.cm.viridis, marker='s')
plt.xlabel('Dimension 1', fontsize=20, labelpad=20)
plt.ylabel('Dimension 2', fontsize=20, labelpad=20)
#plt.axis([xmin, xmax, ymin, ymax])
plt.tick_params(labelsize=15, length=14, direction='out', pad=15, top='off', right='off')
#save figures
fig.savefig(outfile + '.png', bbox_inches='tight', format='png')
fig.savefig(outfile + '.pdf', bbox_inches='tight', format='pdf')
plt.close(fig)
warnings.resetwarnings()
#write csv file
writePlotMDS(freq, nest, seqs, dbfile, pos, maparr, map2d, outfile)
return pos
示例6: labtest_MDS
def labtest_MDS(PID):
data = [patients[pid]['tests'] for pid in PID]
X = pp.scale(data)
mds = MDS(n_components = 2, metric = True, n_init = 4, max_iter = 300, verbose = 0, eps = 0.001, n_jobs = 1, dissimilarity = 'euclidean')
pos = mds.fit(X).embedding_
return pos
示例7: scale_plot
def scale_plot(input_data, data_colors=None, cluster_colors=None,
cluster_sizes=None, dissimilarity='euclidean', filey=None):
""" Plot MDS of data and clusters """
if data_colors is None:
data_colors = 'r'
if cluster_colors is None:
cluster_colors='b'
if cluster_sizes is None:
cluster_sizes = 2200
# scale
mds = MDS(dissimilarity=dissimilarity)
mds_out = mds.fit_transform(input_data)
with sns.axes_style('white'):
f=plt.figure(figsize=(14,14))
plt.scatter(mds_out[n_clusters:,0], mds_out[n_clusters:,1],
s=75, color=data_colors)
plt.scatter(mds_out[:n_clusters,0], mds_out[:n_clusters,1],
marker='*', s=cluster_sizes, color=cluster_colors,
edgecolor='black', linewidth=2)
# plot cluster number
offset = .011
font_dict = {'fontsize': 17, 'color':'white'}
for i,(x,y) in enumerate(mds_out[:n_clusters]):
if i<9:
plt.text(x-offset,y-offset,i+1, font_dict)
else:
plt.text(x-offset*2,y-offset,i+1, font_dict)
if filey is not None:
plt.title(path.basename(filey)[:-4], fontsize=20)
save_figure(f, filey)
plt.close()
示例8: main
def main():
# load sample data
data = np.loadtxt("distmat799.txt", delimiter=",")
dists = data / np.amax(data)
# load images
img_files = [img for img in os.listdir("799_patch") if re.search(r"\.png", img)]
# mds
mds = MDS(n_components=2, dissimilarity="precomputed")
results = mds.fit(dists)
# plot
fig, ax = plt.subplots()
for i, img_file in enumerate(img_files):
img_file = os.path.join("799_patch", img_file)
img = read_png(img_file)
imagebox = OffsetImage(img, zoom=2.0)
coords = results.embedding_[i, :]
xy = tuple(coords)
ab = AnnotationBbox(imagebox, xy)
ax.add_artist(ab)
ax.set_xlim(-1.0, 1.0)
ax.set_ylim(-1.0, 1.0)
plt.show()
示例9: plot_cities
def plot_cities():
#distance_matrix = get_distances()
cities = 'BOS CHI DC DEN LA MIA NY SEA SF'.split()
distance_matrix = np.array([
[0 , 963 , 429 , 1949, 2979, 1504, 206 , 2976, 3095],
[963 , 0 , 671 , 996 , 2054, 1329, 802 , 2013, 2142],
[429 , 671 , 0 , 1616, 2631, 1075, 233 , 2684, 2799],
[1949, 996 , 1616, 0 , 1059, 2037, 1771, 1307, 1235],
[2979, 2054, 2631, 1059, 0 , 2687, 2786, 1131, 379],
[1504, 1329, 1075, 2037, 2687, 0 , 1308, 3273, 3053],
[206 , 802 , 233 , 1771, 2786, 1308, 0 , 2815, 2934],
[2976, 2013, 2684, 1307, 1131, 3273, 2815, 0 , 808],
[3095, 2142, 2799, 1235, 379 , 3053, 2934, 808 , 0]
])
# assert symmetric
for (i, j) in [(i, j) for i in range(0, 8) for j in range(0, 8)]:
try:
assert(distance_matrix[i][j] == distance_matrix[j][i])
except AssertionError:
print((i, j))
print(distance_matrix)
mds = MDS(dissimilarity='precomputed')
mds.fit(distance_matrix)
print(mds.embedding_)
for idx, points in enumerate(mds.embedding_):
plt.plot(points[0], points[1], 'r.')
plt.text(points[0], points[1], cities[idx])
plt.show()
return
示例10: plotFlatClusterGraph
def plotFlatClusterGraph(tf_idf_matrix, clusters, headlines_utf):
dist = 1 - cosine_similarity(tf_idf_matrix)
MDS()
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
pos = mds.fit_transform(dist)
xs, ys = pos[:, 0], pos[:, 1]
cluster_colors = {0: '#FE642E', 1: '#B40404', 2: '#D7DF01', 3: '#01DF01', 4: '#00FFBF', 5: '#2E64FE', 6:'#8904B1', 7:'#FA58F4', 8:'#FE2E9A', 9:'#A4A4A4'}
#create data frame that has the result of the MDS plus the cluster numbers and titles
df = pandas.DataFrame(dict(x=xs, y=ys, label=clusters, title=headlines_utf))
groups = df.groupby('label')
# set up plots
fig, ax = plt.subplots(figsize=(17, 9)) # set size
#iterate through groups to layer the plots
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12, color=cluster_colors[name], mec='none')
ax.set_aspect('auto')
ax.tick_params(axis= 'x', which='both', bottom='off', top='off', labelbottom='off')
ax.tick_params(axis= 'y', which='both', left='off', top='off', labelleft='off')
ax.legend(numpoints=1) #show legend with only 1 point
#add label in x,y position with the label as the film title
for t_n in range(len(df)):
ax.text(df.ix[t_n]['x'], df.ix[t_n]['y'], df.ix[t_n]['title'], size=8)
plt.savefig('../plots/flat_clusters.png', dpi=400)
示例11: reorder_channels_by_xyz_coord
def reorder_channels_by_xyz_coord(data, channel_names=None):
"""
:param data: 2-d array in the format [n_samples, n_channels]
:param channel_names: names of the EEG channels
:return: data, channel_names permutated accordingly
"""
# work on transposed view, i.e. [channel, samples]
data = data.T
# map channels to 1-d coordinates through MDS
from sklearn.manifold import MDS
distances = compute_electrode_distance_matrix()
mds = MDS(n_components=1, dissimilarity='precomputed')
projection = mds.fit_transform(distances).reshape(data.shape[0])
order = np.argsort(projection)
print mds.stress_
print order
# re-order channels
data = data[order]
# restore initial axes layout
data = data.T
# re-order channel_names
channel_names = reorder_channel_names(channel_names, order)
return data, channel_names
示例12: embed_two_dimensions
def embed_two_dimensions(data, vectorizer, size=10, n_components=5, colormap='YlOrRd'):
if hasattr(data, '__iter__'):
iterable = data
else:
raise Exception('ERROR: Input must be iterable')
import itertools
iterable_1, iterable_2 = itertools.tee(iterable)
# get labels
labels = []
for graph in iterable_2:
label = graph.graph.get('id', None)
if label:
labels.append(label)
# transform iterable into sparse vectors
data_matrix = vectorizer.transform(iterable_1)
# embed high dimensional sparse vectors in 2D
from sklearn import metrics
distance_matrix = metrics.pairwise.pairwise_distances(data_matrix)
from sklearn.manifold import MDS
feature_map = MDS(n_components=n_components, dissimilarity='precomputed')
explicit_data_matrix = feature_map.fit_transform(distance_matrix)
from sklearn.decomposition import TruncatedSVD
pca = TruncatedSVD(n_components=2)
low_dimension_data_matrix = pca.fit_transform(explicit_data_matrix)
plt.figure(figsize=(size, size))
embed_dat_matrix_two_dimensions(low_dimension_data_matrix, labels=labels, density_colormap=colormap)
plt.show()
示例13: visualize_clusters
def visualize_clusters(tfidf_matrix, vocabulary, km):
# calcuate the cosine distance between each document
# this will be used for plotting on a euclidean (2-dimensional) plane.
dist = 1 - cosine_similarity(tfidf_matrix)
clusters = km.labels_.tolist()
# convert two components as we are plotting points in a two-dimensional plane
# 'precomputed' because we provide a distance matrix
# we will also specify 'random_state' so the plot is reproducible.
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
pos = mds.fit_transform(dist) # shape (n_components, n_samples)
xs, ys = pos[:, 0], pos[:, 1]
# set up colors per clusters using a dict
cluster_colors = {0: '#1b9e77', 1: '#d95f02', 2: '#7570b3', 3: '#e7298a', 4: '#66a61e', 5: '#99cc00'}
# set up cluster names using a dict (perhaps using the top terms of each cluster)
cluster_names = {0: '0',
1: '1',
2: '2',
3: '3',
4: '4',
5: '5'}
#create data frame that has the result of the MDS plus the cluster numbers and titles
df = pd.DataFrame(dict(x=xs, y=ys, label=clusters))
#group by cluster
groups = df.groupby('label')
# set up plot
fig, ax = plt.subplots(figsize=(17, 9)) # set size
ax.margins(0.05) # Optional, just adds 5% padding to the autoscaling
#iterate through groups to layer the plot
#note that I use the cluster_name and cluster_color dicts with the 'name' lookup to return the appropriate color/label
for name, group in groups:
ax.plot(group.x, group.y, marker='o', linestyle='', ms=12,
label=cluster_names[name], color=cluster_colors[name],
mec='none')
ax.set_aspect('auto')
ax.tick_params(\
axis= 'x', # changes apply to the x-axis
which='both', # both major and minor ticks are affected
bottom='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelbottom='off')
ax.tick_params(\
axis= 'y', # changes apply to the y-axis
which='both', # both major and minor ticks are affected
left='off', # ticks along the bottom edge are off
top='off', # ticks along the top edge are off
labelleft='off')
ax.legend(numpoints=1) #show legend with only 1 point
plt.show() #show the plot
示例14: non_param_multi_dim_scaling
def non_param_multi_dim_scaling(dists, n_dims=3, n_threads=None, metric=True):
mds = MDS(n_components=n_dims, metric=metric, n_jobs=n_threads,
dissimilarity='precomputed')
mds.fit(squareform(dists))
projs = mds.embedding_
res = {'stress': mds.stress_,
'projections': projs}
return res
示例15: generate_cluster_plot_frame
def generate_cluster_plot_frame(self):
MDS()
mds = MDS(n_components=2, dissimilarity="precomputed", random_state=1)
dist = 1 - cosine_similarity(self.tfidf_matrix)
pos = mds.fit_transform(dist)
xs, ys = pos[:,0], pos[:,1]
self.cluster_plot_frame = pd.DataFrame(dict(x=xs, y=ys, label=self.clusters, chapter=self.chapter_list, book=self.book_list))