本文整理汇总了Python中Pycluster.treecluster方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.treecluster方法的具体用法?Python Pycluster.treecluster怎么用?Python Pycluster.treecluster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类Pycluster
的用法示例。
在下文中一共展示了Pycluster.treecluster方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: DoClustering
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def DoClustering(self,nclusters=30,distance_matrix=None):
#Avoid working two times
if not self._tree_done:
df_nc = self._df_nodes[self._df_nodes['ID']>=0].copy()
data = df_nc[['JuvenileMass', 'AdultMass']]
data = data.as_matrix()
data = self._scale_function(data)
if(self._normalize_data==True):
data = whiten(data)
data = np.c_[data,100.*df_nc.FunctionalGroup.values]
if distance_matrix:
self._tree = pc.treecluster(distancematrix=distance_matrix)
else:
self._tree = pc.treecluster(data)
self._data = data
self._tree_done = True
self.FillClusterIndividualData(self._tree.cut(nclusters))
示例2: DoClustering
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def DoClustering(self,nclusters=30):
'''Main clustering function'''
gx = self._gx; func = self._scale_function
nid,jm,am,fg=zip(*[(x,gx.node[x]['JuvenileMass'],gx.node[x]['AdultMass'],gx.node[x]['FunctionalGroup']) for x in gx.node.keys()])
data = np.c_[func(jm),func(am)]
if(self._normalize_data==True):
data = whiten(data)
data = np.c_[data,1000*np.array(fg)]
if self._algorithm == Aggregation._HIERARCHICAL_CLUSTERING:
if not self._tree_done:
if self._distance_matrix:
self._tree = pc.treecluster(distancematrix=self._distance_matrix)
else:
self._tree = pc.treecluster(data)
self._tree_done = True
self._data = data
self._nodes_ids = nid
clusters_ids = self._tree.cut(nclusters)
self._clusters_ids = clusters_ids
self._nclusters = len(np.unique(self._clusters_ids))
cluster_attrib = dict(zip(nid,clusters_ids))
nx.set_node_attributes(gx,'cluster',cluster_attrib)
self._gx = gx
for cid in clusters_ids:
fg = [gx.node[x]['FunctionalGroup'] for x in gx.node.keys() if gx.node[x]['cluster']==cid]
if len(np.unique(fg)) is not 1:
raise Exception('Many functional groups inside the same cluster!!!!!! A CRASH JUST HAPPENED, just joking!!!!')
示例3: generate_network_clusters
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def generate_network_clusters(G):
# Function creates the cluster partitions using heierarchical clustering
# on geodesic distances
# First check to make sure the given network is a single fully
# connected component.
if len(NX.component.connected_component_subgraphs(G)) >1:
raise NX.NetworkXError, 'G must be single component! Extract main component...'
# Now generte clusters
dist_matrix=get_dist_matrix(G)
# Default Heierarchical Clustering algo used
hclus=PC.treecluster(data=None,distancematrix=dist_matrix,method='m')
partitions={} # create dictionary of partitioning at each cut in heierarchy
for c in range(1,len(hclus)+1): # treecluster cuts start at 1
partitions[c]=hclus.cut(c).tolist()
return partitions
示例4: tree_cluster_test
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def tree_cluster_test(data,real_labels, outputfile = None):
start = time.time()
tree = Pycluster.treecluster(data, method='m')
ks = range(25,50,1)
if outputfile != None:
f = open(outputfile,'w')
f.write(out_result_header())
for k in ks:
print 'hierachical clustering whn k=%d' % k
predicted = tree.cut(k).tolist()
if outputfile != None:
f.write(out_result(predicted,k, real_labels))
elasped = time.time() - start
print 'hierarchical clustering time: %.3f' % (elasped/float(len(ks)))
示例5: hierarchical
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def hierarchical(flat_data, data, nclusters, method, distance):
""" Hierarchical clustering """
tree = pc.treecluster(data=flat_data.values(),
mask=None,
weight=None,
transpose=0,
method=method,
dist=distance,
distancematrix=None)
clusterid = tree.cut(nclusters)
clusters = defaultdict(list)
for i, j in zip(clusterid, data):
clusters[i].append(j)
return clusters
示例6: _guide_tree
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def _guide_tree(self, dist_matrix):
"""
@summary: Build a guide tree from the distance matrix
@param dist_matrix: The distance matrix
@type dist_matrix: numpy.ndarray
@return: Pycluster similarity tree
@rtype: Pycluster.cluster.Tree
@author: Woon Wai Keen
@author: Vladimir Likic
"""
n = len(dist_matrix)
print " -> Clustering %d pairwise alignments." % (n*(n-1)),
tree = Pycluster.treecluster(distancematrix=dist_matrix, method='a')
print "Done"
return tree
示例7: heatmap
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
#.........这里部分代码省略.........
extend_dyn_up = 1000
extend_dyn_down = 1000
# load the data once to get the features which extend below 0
guard = check_data(featurefile, guard, dynam, extend_dyn_up, extend_dyn_down)
else:
amount_bins = bins
extend_dyn_up = extend_up
extend_dyn_down = extend_down
# Load data for clustering
data, regions, guard = load_data(featurefile, amount_bins, extend_dyn_up, extend_dyn_down, rmdup, rpkm, rmrepeats,
fragmentsize, dynam, guard)
# Normalize
norm_data = normalize_data(data, DEFAULT_PERCENTILE)
clus = hstack([norm_data[t] for i, t in enumerate(tracks) if (not pick or i in pick)])
# Clustering
if cluster_type == "k":
print "K-means clustering"
## K-means clustering
# PyCluster
labels, error, nfound = Pycluster.kcluster(clus, args.numclusters, dist=METRIC)
if not dynam and merge_mirrored:
(i, j) = mirror_clusters(data, labels)
while j:
for track in data.keys():
data[track][labels == j] = [row[::-1] for row in data[track][labels == j]]
for k in range(len(regions)):
if labels[k] == j:
(chrom, start, end, gene, strand) = regions[k]
if strand == "+":
strand = "-"
else:
strand = "+"
regions[k] = (chrom, start, end, gene, strand)
n = len(set(labels))
labels[labels == j] = i
for k in range(j + 1, n):
labels[labels == k] = k - 1
(i, j) = mirror_clusters(data, labels)
ind = labels.argsort()
# Hierarchical clustering
elif cluster_type == "h":
print "Hierarchical clustering"
tree = Pycluster.treecluster(clus, method="m", dist=METRIC)
labels = tree.cut(args.numclusters)
ind = sort_tree(tree, arange(len(regions)))
else:
ind = arange(len(regions))
labels = zeros(len(regions))
# Load data for visualization if -g option was used
if dynam:
data, regions, guard = load_data(featurefile, bins, extend_up, extend_down, rmdup, rpkm, rmrepeats,
fragmentsize, dynam, guard)
f = open("{0}_clusters.bed".format(outfile), "w")
for (chrom, start, end, gene, strand), cluster in zip(array(regions, dtype="object")[ind], array(labels)[ind]):
if not gene:
f.write("{0}\t{1}\t{2}\t.\t{3}\t{4}\n".format(chrom, start, end, cluster + 1, strand))
else:
f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chrom, start, end, gene, cluster + 1, strand))
f.close()
# Save read counts
readcounts = {}
for i, track in enumerate(tracks):
readcounts[track] = {}
readcounts[track]['bins'] = []
for idx, row in enumerate(data[track]):
bins = ''
for bin in row:
if not bins:
bins = '{0}'.format(bin)
else:
bins = '{0};{1}'.format(bins, bin)
readcounts[track]['bins'].append(bins)
input_fileBins = open('{0}_readCounts.txt'.format(outfile), 'w')
input_fileBins.write('Regions\t'.format(track))
for i, track in enumerate(titles):
input_fileBins.write('{0}\t'.format(track))
input_fileBins.write('\n')
for i, track in enumerate(tracks):
for idx in ind:
input_fileBins.write('{0}:{1}-{2}\t'.format(regions[idx][0], regions[idx][1], regions[idx][2]))
for i, track in enumerate(tracks):
input_fileBins.write('{0}\t'.format(readcounts[track]['bins'][idx]))
input_fileBins.write('\n')
break
input_fileBins.close()
if not cluster_type == "k":
labels = None
scale = get_absolute_scale(args.scale, [data[track] for track in tracks])
heatmap_plot(data, ind[::-1], outfile, tracks, titles, colors, bgcolors, scale, tscale, labels, fontsize)
示例8:
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
data = eventutils.trim_data_to_events(data, events, trimming_range)
input_vecs = []
if treat_data_differentially:
input_vecs = utils.make_prices_diffs_vecs(data)
else:
input_vecs = utils.make_prices_vecs(data)
# Run clustering algorithm.
if algorithm_type == ClusterAlg.KMEANS:
labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters,
dist = dist_measure, npass = number_of_iters,
method = dist_method)
elif algorithm_type == ClusterAlg.HIERARCHICAL:
tree = Pycluster.treecluster(input_vecs, method = dist_method,
dist = dist_method)
labels = tree.cut(number_of_clusters)
elif algorithm_type == ClusterAlg.SELFORGMAPS:
labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid,
nygrid = ygrid, niter = number_of_iters)
# If algorithm is self-organizing maps each item is assigned to
# a particular 2D point, so we need to create groups from 2D points.
# See implementation of making groups from labels for details.
if algorithm_type == ClusterAlg.SELFORGMAPS:
clusters = utils.make_groups_from_labels(labels, data, True)
else:
clusters = utils.make_groups_from_labels(labels, data)
# Check with which type of key we have to deal with.
示例9: clean
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
red1 = cv2.inRange(imgHSV,a([0,150,80]),a([3,255,255]))
red2 = cv2.inRange(imgHSV,a([150,150,80]),a([180,255,255]))
red = red1 | red2
red = clean(red)
stickers += detect(red)
for i in range(len(stickers)):
stickers[i].id = i
stickertime=clock()*1000
print "stickers detected in " + str(int(stickertime-starttime)) + " ms"
#CLUSTERING
D=[[distance(st1,st2) for st1 in stickers] for st2 in stickers]
tree = Pycluster.treecluster(distancematrix=D)
cluster_count = 1
while True:
clusters = tree.cut(cluster_count)
for i in range(len(stickers)): #Debug
if (stickers[i].weight>1 and cluster_count<7):
cv2.drawContours(eval("klastry"+str(cluster_count)), np.array([stickers[i].V]), 0, 255*(clusters[i]+1)/(cluster_count+1),-1)
weights=[0 for i in range(cluster_count+1)]
for i in range(len(stickers)):
weights[clusters[i]] += stickers[i].size + stickers[i].weight
maxcluster_weight = 0
maxcluster_id = 0
maximum = 0
for i in range(cluster_count):
if weights[i]>maximum:
示例10: diff_plot
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def diff_plot(motifs, pwms, names, freq, counts, bgfreq, bgcounts, outfile, mindiff=0, minenr=3, minfreq=0.01):
w_ratio = np.array([14, len(names), len(names) + 1])
plot_order = [0, 1, 2]
nbar = 5
freq = np.array(freq)
counts = np.array(counts)
bgfreq = np.array([[x] for x in bgfreq])
enr = np.log2(np.divide(freq, bgfreq))
filt = np.ones(len(enr), dtype="bool")
filters = [
np.sum(enr > minenr, 1) > 0,
np.sum(freq > minfreq, 1) > 0,
(np.max(enr, 1) - np.min(enr, 1)) > mindiff,
np.sum(counts > 2, 1) > 0,
]
for f in filters:
filt = np.logical_and(filt, f)
print "Filter: ", sum(filt)
motifs = np.array(motifs)[filt]
freq = freq[filt]
bgfreq = bgfreq[filt]
enr = enr[filt]
for m, f, b, e in zip(motifs, freq, bgfreq, enr):
sys.stderr.write("{0}\t{1}\t{2}\t{3}\n".format(m, f, b, e))
if len(freq) == 0:
sys.stderr.write("No enriched and/or differential motifs found.\n")
return
elif len(freq) >= 3:
tree = Pycluster.treecluster(freq, method="m", dist="c")
ind = sort_tree(tree, np.arange(len(motifs)))
else:
ind = np.arange(len(freq))
fig = plt.figure(figsize=((5 + 0.75 * len(names)) * 3, (0.3 * len(motifs) + 1.5) * 3))
gs = GridSpec(
len(motifs) + 3 + nbar, 3, height_ratios=[1] * nbar + [3] * (len(motifs) + 3), width_ratios=w_ratio[plot_order]
)
# Colormaps
c1 = mpl.cm.RdBu
c2 = mpl.cm.Blues ##create_colormap("white", "blue")
### Frequency plot ###
# Create axis
ax = plt.subplot(gs[nbar:-3, plot_order[2]])
# Plot frequencies
vmin = 0
vmax = 0.3
pfreq = np.hstack((freq, bgfreq))
ax.pcolormesh(pfreq[ind], cmap=c2, vmin=vmin, vmax=vmax)
sm = plt.cm.ScalarMappable(cmap=c2, norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax))
# Show percentages
for y, row in enumerate(pfreq[ind]):
for x, val in enumerate(row):
v = vmax
if val >= (vmin + ((vmax - vmin) / 2)):
v = vmin
plt.text(x + 0.5, y + 0.5, "{:.1%}".format(val), ha="center", va="center", color=sm.to_rgba(v))
# Hide most labels
plt.setp(ax.get_xticklines(), visible=False)
plt.setp(ax.get_yticklines(), visible=False)
plt.setp(ax.get_yticklabels(), visible=False)
# Set the X labels
ticks = np.arange(len(names) + 1) + 0.5
plt.xticks(ticks, names + ["background"], rotation=30, ha="right")
ax.set_ylim(0, len(motifs))
# Title
plt.title("Frequency")
# Colorbar
sm._A = []
cax = plt.subplot(gs[0, plot_order[2]])
cb = fig.colorbar(sm, cax=cax, ticks=[0, 0.3], orientation="horizontal")
cb.ax.set_xticklabels(["0%", "30%"])
#### Enrichment plot
ax = plt.subplot(gs[nbar:-3, plot_order[1]])
vmin = -10
vmax = 10
ax.pcolormesh(enr[ind], cmap=c1, vmin=vmin, vmax=vmax)
for y, row in enumerate(enr[ind]):
for x, val in enumerate(row):
#.........这里部分代码省略.........
示例11: len
# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
strand = "-"
else:
strand = "+"
regions[k] = (chrom, start, end, gene, strand)
n = len(set(labels))
labels[labels == j] = i
for k in range(j + 1, n):
labels[labels == k] = k - 1
(i,j) = mirror_clusters(data, labels)
ind = labels.argsort()
# Other cluster implementation
# centres, labels, dist = kmeanssample(clus, options.numclusters, len(clus) / 10, metric=cl, maxiter=200, verbose=1, delta=0.00001)
elif cluster_type == "h":
print "Hierarchical clustering"
tree = Pycluster.treecluster(clus, method="m", dist=METRIC)
labels = tree.cut(options.numclusters)
ind = sort_tree(tree, arange(len(regions)))
else:
ind = arange(len(regions))
labels = zeros(len(regions))
f = open("{0}_clusters.bed".format(outfile), "w")
for (chrom,start,end,gene,strand), cluster in zip(array(regions, dtype="object")[ind], array(labels)[ind]):
if not gene:
f.write("{0}\t{1}\t{2}\t.\t{3}\t{4}\n".format(chrom, start, end, cluster+1, strand))
else:
f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chrom, start, end, gene, cluster+1, strand))
f.close()
if not cluster_type == "k":
labels = None