当前位置: 首页>>代码示例>>Python>>正文


Python Pycluster.treecluster方法代码示例

本文整理汇总了Python中Pycluster.treecluster方法的典型用法代码示例。如果您正苦于以下问题:Python Pycluster.treecluster方法的具体用法?Python Pycluster.treecluster怎么用?Python Pycluster.treecluster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在Pycluster的用法示例。


在下文中一共展示了Pycluster.treecluster方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: DoClustering

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
 def DoClustering(self,nclusters=30,distance_matrix=None):
     #Avoid working two times
     if not self._tree_done:
         df_nc = self._df_nodes[self._df_nodes['ID']>=0].copy()
         
         data = df_nc[['JuvenileMass', 'AdultMass']]
         data = data.as_matrix()
         data = self._scale_function(data)
 
         if(self._normalize_data==True):
             data = whiten(data)
         
         data = np.c_[data,100.*df_nc.FunctionalGroup.values]
         
         if distance_matrix:
             self._tree = pc.treecluster(distancematrix=distance_matrix)
         else:
             self._tree = pc.treecluster(data)
         
         self._data = data
         self._tree_done = True
     
     self.FillClusterIndividualData(self._tree.cut(nclusters))
开发者ID:cesar7f,项目名称:code,代码行数:25,代码来源:KMeansAggregation.py

示例2: DoClustering

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
    def DoClustering(self,nclusters=30):
        '''Main clustering function'''
        
        gx = self._gx; func = self._scale_function
        
        nid,jm,am,fg=zip(*[(x,gx.node[x]['JuvenileMass'],gx.node[x]['AdultMass'],gx.node[x]['FunctionalGroup']) for x in gx.node.keys()])
        data = np.c_[func(jm),func(am)]
        
        if(self._normalize_data==True):
            data = whiten(data)        
        data = np.c_[data,1000*np.array(fg)]
       
        if self._algorithm == Aggregation._HIERARCHICAL_CLUSTERING:

            if not self._tree_done:
                if self._distance_matrix:
                    self._tree = pc.treecluster(distancematrix=self._distance_matrix)
                else:
                    self._tree = pc.treecluster(data)
            
                self._tree_done = True

        self._data = data        
        self._nodes_ids = nid
        clusters_ids = self._tree.cut(nclusters)
        self._clusters_ids = clusters_ids
        self._nclusters = len(np.unique(self._clusters_ids))
        
        cluster_attrib = dict(zip(nid,clusters_ids))
        nx.set_node_attributes(gx,'cluster',cluster_attrib)
        self._gx = gx
        
        for cid in clusters_ids:

            fg = [gx.node[x]['FunctionalGroup'] for x in gx.node.keys() if gx.node[x]['cluster']==cid]
            if len(np.unique(fg)) is not 1:
                raise Exception('Many functional groups inside the same cluster!!!!!! A CRASH JUST HAPPENED, just joking!!!!')
开发者ID:cesar7f,项目名称:code,代码行数:39,代码来源:Aggregation.py

示例3: generate_network_clusters

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def generate_network_clusters(G):
# Function creates the cluster partitions using heierarchical clustering
# on geodesic distances
    # First check to make sure the given network is a single fully
    # connected component.
    if len(NX.component.connected_component_subgraphs(G)) >1:
        raise NX.NetworkXError, 'G must be single component! Extract main component...'
    # Now generte clusters
    dist_matrix=get_dist_matrix(G)
    # Default Heierarchical Clustering algo used
    hclus=PC.treecluster(data=None,distancematrix=dist_matrix,method='m')
    partitions={}   # create dictionary of partitioning at each cut in heierarchy
    for c in range(1,len(hclus)+1):  # treecluster cuts start at 1
        partitions[c]=hclus.cut(c).tolist()
    return partitions
开发者ID:0svaldo,项目名称:ZIA,代码行数:17,代码来源:exploded_view_3d.py

示例4: tree_cluster_test

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def tree_cluster_test(data,real_labels, outputfile = None):
    start = time.time()
    tree = Pycluster.treecluster(data, method='m')    

    ks = range(25,50,1)
    if outputfile != None:
        f = open(outputfile,'w')
        f.write(out_result_header())
    for k in ks:
        print 'hierachical clustering whn k=%d' % k
        predicted = tree.cut(k).tolist()
        if outputfile != None:
            f.write(out_result(predicted,k, real_labels))

    elasped = time.time() - start
    print 'hierarchical clustering time: %.3f' % (elasped/float(len(ks)))
开发者ID:Calvin-he,项目名称:docclustering,代码行数:18,代码来源:kmeans.py

示例5: hierarchical

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def hierarchical(flat_data, data, nclusters, method, distance):
    """ Hierarchical clustering """
    
    tree = pc.treecluster(data=flat_data.values(),
                       mask=None,
                       weight=None,
                       transpose=0,
                       method=method,
                       dist=distance,
                       distancematrix=None)
    
    clusterid = tree.cut(nclusters)
    
    clusters = defaultdict(list)
    for i, j in zip(clusterid, data):
        clusters[i].append(j)
        
    return clusters
开发者ID:audy,项目名称:clump,代码行数:20,代码来源:clustering.py

示例6: _guide_tree

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
    def _guide_tree(self, dist_matrix):

        """
        @summary: Build a guide tree from the distance matrix

        @param dist_matrix: The distance matrix
        @type dist_matrix: numpy.ndarray
        @return: Pycluster similarity tree
        @rtype: Pycluster.cluster.Tree

        @author: Woon Wai Keen
        @author: Vladimir Likic
        """

        n = len(dist_matrix)

        print " -> Clustering %d pairwise alignments." % (n*(n-1)),
        tree = Pycluster.treecluster(distancematrix=dist_matrix, method='a')
        print "Done"

        return tree
开发者ID:DongElkan,项目名称:pyms,代码行数:23,代码来源:Class.py

示例7: heatmap

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]

#.........这里部分代码省略.........
        extend_dyn_up = 1000
        extend_dyn_down = 1000
        # load the data once to get the features which extend below 0
        guard = check_data(featurefile, guard, dynam, extend_dyn_up, extend_dyn_down)
    else:
        amount_bins = bins
        extend_dyn_up = extend_up
        extend_dyn_down = extend_down

    # Load data for clustering
    data, regions, guard = load_data(featurefile, amount_bins, extend_dyn_up, extend_dyn_down, rmdup, rpkm, rmrepeats,
                                     fragmentsize, dynam, guard)
    # Normalize
    norm_data = normalize_data(data, DEFAULT_PERCENTILE)

    clus = hstack([norm_data[t] for i, t in enumerate(tracks) if (not pick or i in pick)])

    # Clustering
    if cluster_type == "k":
        print "K-means clustering"
        ## K-means clustering
        # PyCluster
        labels, error, nfound = Pycluster.kcluster(clus, args.numclusters, dist=METRIC)
        if not dynam and merge_mirrored:
            (i, j) = mirror_clusters(data, labels)
            while j:
                for track in data.keys():
                    data[track][labels == j] = [row[::-1] for row in data[track][labels == j]]
                for k in range(len(regions)):
                    if labels[k] == j:
                        (chrom, start, end, gene, strand) = regions[k]
                        if strand == "+":
                            strand = "-"
                        else:
                            strand = "+"
                        regions[k] = (chrom, start, end, gene, strand)
                n = len(set(labels))
                labels[labels == j] = i
                for k in range(j + 1, n):
                    labels[labels == k] = k - 1
                (i, j) = mirror_clusters(data, labels)

        ind = labels.argsort()

        # Hierarchical clustering
    elif cluster_type == "h":
        print "Hierarchical clustering"
        tree = Pycluster.treecluster(clus, method="m", dist=METRIC)
        labels = tree.cut(args.numclusters)
        ind = sort_tree(tree, arange(len(regions)))
    else:
        ind = arange(len(regions))
        labels = zeros(len(regions))


    # Load data for visualization if -g option was used
    if dynam:
        data, regions, guard = load_data(featurefile, bins, extend_up, extend_down, rmdup, rpkm, rmrepeats,
                                         fragmentsize, dynam, guard)

    f = open("{0}_clusters.bed".format(outfile), "w")
    for (chrom, start, end, gene, strand), cluster in zip(array(regions, dtype="object")[ind], array(labels)[ind]):
        if not gene:
            f.write("{0}\t{1}\t{2}\t.\t{3}\t{4}\n".format(chrom, start, end, cluster + 1, strand))
        else:
            f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chrom, start, end, gene, cluster + 1, strand))
    f.close()
    # Save read counts
    readcounts = {}
    for i, track in enumerate(tracks):
        readcounts[track] = {}
        readcounts[track]['bins'] = []
        for idx, row in enumerate(data[track]):
            bins = ''
            for bin in row:
                if not bins:
                    bins = '{0}'.format(bin)
                else:
                    bins = '{0};{1}'.format(bins, bin)
            readcounts[track]['bins'].append(bins)

    input_fileBins = open('{0}_readCounts.txt'.format(outfile), 'w')
    input_fileBins.write('Regions\t'.format(track))
    for i, track in enumerate(titles):
        input_fileBins.write('{0}\t'.format(track))
    input_fileBins.write('\n')
    for i, track in enumerate(tracks):
        for idx in ind:
            input_fileBins.write('{0}:{1}-{2}\t'.format(regions[idx][0], regions[idx][1], regions[idx][2]))
            for i, track in enumerate(tracks):
                input_fileBins.write('{0}\t'.format(readcounts[track]['bins'][idx]))
            input_fileBins.write('\n')
        break
    input_fileBins.close()

    if not cluster_type == "k":
        labels = None

    scale = get_absolute_scale(args.scale, [data[track] for track in tracks])
    heatmap_plot(data, ind[::-1], outfile, tracks, titles, colors, bgcolors, scale, tscale, labels, fontsize)
开发者ID:cauyrd,项目名称:fluff,代码行数:104,代码来源:heatmap.py

示例8:

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
		data = eventutils.trim_data_to_events(data, events, trimming_range)

	input_vecs = []
	if treat_data_differentially:
		input_vecs = utils.make_prices_diffs_vecs(data)
	else:
		input_vecs = utils.make_prices_vecs(data)

	# Run clustering algorithm.

	if algorithm_type == ClusterAlg.KMEANS:
		labels, wcss, n = Pycluster.kcluster(input_vecs, number_of_clusters, 
				dist = dist_measure, npass = number_of_iters, 
				method = dist_method)
	elif algorithm_type == ClusterAlg.HIERARCHICAL:
		tree = Pycluster.treecluster(input_vecs, method = dist_method,
				dist = dist_method)
		labels = tree.cut(number_of_clusters)
	elif algorithm_type == ClusterAlg.SELFORGMAPS:
		labels, celldata = Pycluster.somcluster(input_vecs, nxgrid = xgrid, 
				nygrid = ygrid, niter = number_of_iters)

	# If algorithm is self-organizing maps each item is assigned to
	# a particular 2D point, so we need to create groups from 2D points.
	# See implementation of making groups from labels for details.

	if algorithm_type == ClusterAlg.SELFORGMAPS:
		clusters = utils.make_groups_from_labels(labels, data, True)
	else:
		clusters = utils.make_groups_from_labels(labels, data)

	# Check with which type of key we have to deal with.
开发者ID:kstosiek,项目名称:HDiDM2010,代码行数:34,代码来源:cluster.py

示例9: clean

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
red1 = cv2.inRange(imgHSV,a([0,150,80]),a([3,255,255]))
red2 = cv2.inRange(imgHSV,a([150,150,80]),a([180,255,255]))
red = red1 | red2
red = clean(red)
stickers += detect(red)

for i in range(len(stickers)):
	stickers[i].id = i
stickertime=clock()*1000
print "stickers detected in " + str(int(stickertime-starttime)) + " ms"

#CLUSTERING

D=[[distance(st1,st2) for st1 in stickers] for st2 in stickers]
tree = Pycluster.treecluster(distancematrix=D)

cluster_count = 1
while True:
	clusters = tree.cut(cluster_count)
	for i in range(len(stickers)): #Debug
		if (stickers[i].weight>1 and cluster_count<7):
			cv2.drawContours(eval("klastry"+str(cluster_count)), np.array([stickers[i].V]), 0, 255*(clusters[i]+1)/(cluster_count+1),-1)
	weights=[0 for i in range(cluster_count+1)]
	for i in range(len(stickers)):
		weights[clusters[i]] += stickers[i].size + stickers[i].weight
	maxcluster_weight = 0
	maxcluster_id = 0
	maximum = 0
	for i in range(cluster_count):
		if weights[i]>maximum:
开发者ID:chocoman,项目名称:mistr-kostky,代码行数:32,代码来源:kostka.py

示例10: diff_plot

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
def diff_plot(motifs, pwms, names, freq, counts, bgfreq, bgcounts, outfile, mindiff=0, minenr=3, minfreq=0.01):
    w_ratio = np.array([14, len(names), len(names) + 1])
    plot_order = [0, 1, 2]

    nbar = 5

    freq = np.array(freq)
    counts = np.array(counts)
    bgfreq = np.array([[x] for x in bgfreq])

    enr = np.log2(np.divide(freq, bgfreq))

    filt = np.ones(len(enr), dtype="bool")
    filters = [
        np.sum(enr > minenr, 1) > 0,
        np.sum(freq > minfreq, 1) > 0,
        (np.max(enr, 1) - np.min(enr, 1)) > mindiff,
        np.sum(counts > 2, 1) > 0,
    ]
    for f in filters:
        filt = np.logical_and(filt, f)

        print "Filter: ", sum(filt)

    motifs = np.array(motifs)[filt]
    freq = freq[filt]
    bgfreq = bgfreq[filt]
    enr = enr[filt]

    for m, f, b, e in zip(motifs, freq, bgfreq, enr):
        sys.stderr.write("{0}\t{1}\t{2}\t{3}\n".format(m, f, b, e))

    if len(freq) == 0:
        sys.stderr.write("No enriched and/or differential motifs found.\n")
        return
    elif len(freq) >= 3:
        tree = Pycluster.treecluster(freq, method="m", dist="c")
        ind = sort_tree(tree, np.arange(len(motifs)))
    else:
        ind = np.arange(len(freq))

    fig = plt.figure(figsize=((5 + 0.75 * len(names)) * 3, (0.3 * len(motifs) + 1.5) * 3))

    gs = GridSpec(
        len(motifs) + 3 + nbar, 3, height_ratios=[1] * nbar + [3] * (len(motifs) + 3), width_ratios=w_ratio[plot_order]
    )

    # Colormaps
    c1 = mpl.cm.RdBu
    c2 = mpl.cm.Blues  ##create_colormap("white", "blue")

    ### Frequency plot ###

    # Create axis
    ax = plt.subplot(gs[nbar:-3, plot_order[2]])

    # Plot frequencies
    vmin = 0
    vmax = 0.3

    pfreq = np.hstack((freq, bgfreq))
    ax.pcolormesh(pfreq[ind], cmap=c2, vmin=vmin, vmax=vmax)

    sm = plt.cm.ScalarMappable(cmap=c2, norm=mpl.colors.Normalize(vmin=vmin, vmax=vmax))

    # Show percentages
    for y, row in enumerate(pfreq[ind]):
        for x, val in enumerate(row):
            v = vmax
            if val >= (vmin + ((vmax - vmin) / 2)):
                v = vmin
            plt.text(x + 0.5, y + 0.5, "{:.1%}".format(val), ha="center", va="center", color=sm.to_rgba(v))

    # Hide most labels
    plt.setp(ax.get_xticklines(), visible=False)
    plt.setp(ax.get_yticklines(), visible=False)
    plt.setp(ax.get_yticklabels(), visible=False)

    # Set the X labels
    ticks = np.arange(len(names) + 1) + 0.5
    plt.xticks(ticks, names + ["background"], rotation=30, ha="right")

    ax.set_ylim(0, len(motifs))

    # Title
    plt.title("Frequency")

    # Colorbar
    sm._A = []
    cax = plt.subplot(gs[0, plot_order[2]])
    cb = fig.colorbar(sm, cax=cax, ticks=[0, 0.3], orientation="horizontal")
    cb.ax.set_xticklabels(["0%", "30%"])

    #### Enrichment plot
    ax = plt.subplot(gs[nbar:-3, plot_order[1]])
    vmin = -10
    vmax = 10
    ax.pcolormesh(enr[ind], cmap=c1, vmin=vmin, vmax=vmax)
    for y, row in enumerate(enr[ind]):
        for x, val in enumerate(row):
#.........这里部分代码省略.........
开发者ID:georgeg9,项目名称:gimmemotifs,代码行数:103,代码来源:plot.py

示例11: len

# 需要导入模块: import Pycluster [as 别名]
# 或者: from Pycluster import treecluster [as 别名]
                        strand = "-"
                    else:
                        strand = "+"
                    regions[k] = (chrom, start, end, gene, strand)
            n = len(set(labels))
            labels[labels == j] = i
            for k in range(j + 1, n):
                labels[labels == k] = k - 1
            (i,j) = mirror_clusters(data, labels)
            
    ind = labels.argsort()
    # Other cluster implementation
    #    centres, labels, dist = kmeanssample(clus, options.numclusters, len(clus) / 10,  metric=cl, maxiter=200, verbose=1, delta=0.00001)
elif cluster_type == "h":
    print "Hierarchical clustering"
    tree = Pycluster.treecluster(clus, method="m", dist=METRIC)
    labels = tree.cut(options.numclusters)
    ind = sort_tree(tree, arange(len(regions)))
else:
    ind = arange(len(regions))
    labels = zeros(len(regions))
f = open("{0}_clusters.bed".format(outfile), "w")
for (chrom,start,end,gene,strand), cluster in zip(array(regions, dtype="object")[ind], array(labels)[ind]):
  if not gene:
    f.write("{0}\t{1}\t{2}\t.\t{3}\t{4}\n".format(chrom, start, end, cluster+1, strand))
  else: 
    f.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}\n".format(chrom, start, end, gene, cluster+1, strand))
f.close()

if not cluster_type == "k":
    labels = None
开发者ID:CnatureS,项目名称:fluff,代码行数:33,代码来源:fluff_heatmap.py


注:本文中的Pycluster.treecluster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。