当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.fcluster函数代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.fcluster函数的典型用法代码示例。如果您正苦于以下问题:Python fcluster函数的具体用法?Python fcluster怎么用?Python fcluster使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了fcluster函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: hcluster_cols

	def hcluster_cols(self, thresh):
		try:
			link = linkage(self.X.T, method='complete', metric = 'cosine')
			assignments = fcluster(link, thresh, 'distance')

		except:
			link = linkage(self.X.T, method='complete', metric = 'euclidean')
			assignments = fcluster(link, thresh, 'distance')

		col_ind = np.arange(len(self.crimes))
		d = pd.DataFrame(zip(col_ind, assignments)).groupby(1)[0].aggregate(lambda x: tuple(x))
		df_new = pd.DataFrame(index = np.arange(len(self.names)))
		for i in d:
			cols = []
			for w in i:
			    cols.append(w)
			if len(cols) > 1:
				df_new[str(self.crimes[cols])] = np.mean(self.X[:,cols], axis = 1)
			else:
			    df_new[str(self.crimes[cols[0]])] = self.X[:,cols[0]]

		# plt.figure(figsize=(10,20))
		# dendro = dendrogram(link, color_threshold=thresh, leaf_font_size=13, labels = self.crimes, orientation = 'left')
		# plt.subplots_adjust(top=.99, bottom=0.5, left=0.05, right=0.99)
		# plt.show()

		self.df = df_new
		self.crimes = df_new.columns.values
开发者ID:nhu2000,项目名称:hood_project,代码行数:28,代码来源:pca_class.py

示例2: cluster_fps

    def cluster_fps(self):
        clkg = hcluster.linkage(self.dm,method = 'average') 
        coarse_r = hcluster.fcluster(clkg,0.3,criterion = 'distance')
        self.coarse_r = coarse_r

        bcount = np.bincount(coarse_r)
        knum = len(np.nonzero(bcount > 1)[0])

        s = self.density_matrix.shape
        if False and len(s) >1 and s[0] > 10 and s[1] > 10 and knum < min(s) / 2:
            (u,s,vt) = la.svds(self.sps_matrixs,k = knum)
            self.u = u
            print '============'
        else:
            
            self.result = self.coarse_r
            return (clkg,clkg)
 

#rankA = npla.matrix_rank(self.sps_matrixs)
#        if rankA < 3:
        a = np.matrix(np.diag(s)) * np.matrix(vt)
        pd = dist.pdist(np.array(a.T),'cosine')
        pd[np.abs(pd) < 1e-11] = 0
        lkg = hcluster.linkage(pd,method = 'average')
        self.lkg = lkg

        self.result = hcluster.fcluster(lkg,self.svd_cluster_thr,criterion = 'distance')

#        self.result = hcluster.fcluster(lkg,1)

# self.result = hcluster.fclusterdata(u,0.7,metric = 'cosine', criterion = 'distance',method = 'average')
        return (lkg,clkg)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:33,代码来源:cluster_by_grid.py

示例3: elbow

 def elbow(self, no_plot=False):
     """Plot within groups variance vs. number of clusters.
     Elbow criterion could be used to determine number of clusters.
     """
     from scipy.cluster.hierarchy import fcluster
     import matplotlib.pyplot as plt
     idx = fcluster(self.Z, len(self.data), criterion='maxclust')
     nclust = list(np.arange(1, np.sqrt(idx.max() / 2) + 1, dtype=int))
     within_grp_var = []
     mean_var = []
     for n in nclust:
         idx = fcluster(self.Z, n, criterion='maxclust')
         grp = [np.flatnonzero(idx == c) for c in np.unique(idx)]
         # between_grp_var = Group([self.data[ix].R.uv for ix in grp]).var
         var = [100*self.data[ix].var for ix in grp]
         within_grp_var.append(var)
         mean_var.append(np.mean(var))
     if not no_plot:
         plt.boxplot(within_grp_var, positions=nclust)
         plt.plot(nclust, mean_var, 'k')
         plt.xlabel('Number of clusters')
         plt.ylabel('Variance')
         plt.title('Within-groups variance vs. number of clusters')
         plt.show()
     else:
         return nclust, within_grp_var
开发者ID:drcoronel,项目名称:apsg,代码行数:26,代码来源:core.py

示例4: refineEnsemble

def refineEnsemble(ens, lower=.5, upper=10.):
    """Refine a PDB ensemble based on RMSD criterions.""" 

    from scipy.cluster.hierarchy import linkage, fcluster
    from scipy.spatial.distance import squareform
    from collections import Counter

    ### calculate pairwise RMSDs ###
    RMSD = ens.getRMSDs(pairwise=True)

    # convert the RMSD table to the compressed form
    v = squareform(RMSD)

    ### apply upper threshold ###
    Z_upper = linkage(v, method='complete')
    labels = fcluster(Z_upper, upper, criterion='distance')
    most_common_label = Counter(labels).most_common(1)[0][0]
    I = np.where(labels==most_common_label)[0]

    ### apply lower threshold ###
    Z_lower = linkage(v, method='single')
    labels = fcluster(Z_lower, lower, criterion='distance')
    uniq_labels = np.unique(labels)

    clusters = []
    for label in uniq_labels:
        indices = np.where(labels==label)[0]
        clusters.append(indices)

    J = np.ones(len(clusters), dtype=int) * -1
    rmsd = None
    for i, cluster in enumerate(clusters):
        if len(cluster) > 0:
            # find the conformations with the largest coverage 
            # (the weight of the ref should be 1)
            weights = [ens[j].getWeights().sum() for j in cluster]
            js = np.where(weights==np.max(weights))[0]

            # in the case where there are multiple structures with the same weight,
            # the one with the smallest rmsd wrt the ens._coords is selected. 
            if len(js) > 1:
                # rmsd is not calulated unless necessary for the sake of efficiency
                rmsd = ens.getRMSDs() if rmsd is None else rmsd
                j = js[np.argmin(rmsd[js])]
            else:
                j = js[0]
            J[i] = cluster[j]
        else:
            J[i] = cluster[0]

    ### refine ensemble ###
    K = np.intersect1d(I, J)

    reens = ens[K]

    return reens
开发者ID:fongchun,项目名称:ProDy,代码行数:56,代码来源:legacy.py

示例5: cutTree

def cutTree(z, threshold, crit):
    try:
        z = np.clip(z,0,9999999)
        tree = hac.fcluster(z, threshold, criterion = crit)
        return tree
    except ValueError, e:
        print("cutTree: %s" % str(e))
        tree = hac.fcluster(z, 50, criterion = "euclidean")
        print "negative values in matrix"
        return tree
开发者ID:helgejo,项目名称:codelibrary,代码行数:10,代码来源:clustering.py

示例6: process_stay

def process_stay(imei,traj):
#    print imei,'------------------------>',traj.shape
    r = 20
    interval = 60*8
#    wfs = wfs[:1000]
#    traj = traj[:1000]
    if len(traj.shape) < 1 or traj.shape[0] <2:
        return
    x = traj['x']
    y = traj['y']
    in_sample = False
#print x,y
    if sample_range is not None:
        for (cx,cy,cr) in sample_range:
            crange = math.sqrt(math.pow(cx-x[0],2) + math.pow(cy-y[0],2))
            if crange < cr:
                in_sample = True
                break
    #ids = grid_util.get_grid_ids(np.median(x),np.median(y),300,3)
        if not in_sample:
            return
    
    ids = G.get_gridids_with_align(np.median(x),np.median(y))
#
#    print traj
    dm = get_pdist(traj,100,convert_sig = True)
    dm[np.abs(dm) < 1e-3] = 0
#    print dm
#    print dm.shape
#lkg = hcluster.linkage(traj[...,:2],metric = 'euclidean',method = 'average')
#    print dm
#    print dm.shape
    lkg = hcluster.linkage(dm,method = 'average')
    rst = hcluster.fcluster(lkg,0.7,criterion = 'distance') #rough dist
    rst_merge = hcluster.fcluster(lkg,0.2,criterion = 'distance') #rough dist
    seg = []
    for i in range(len(rst) + 1):
        if i == 0 or i == len(rst) or rst[i] != rst[i-1]:
            seg.append(i)
#
#    print rst
#    print rst_merge
#    print seg
    for (s,e) in zip(seg[:-1],seg[1:]):
        seg_traj = traj[s:e]
        seg_id = rst_merge[s:e]
        itl = seg_traj[-1]['t'] - seg_traj[0]['t']
        if itl > interval:
            print_merge_fp(ids,imei,seg_traj,seg_id,itl) 
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:49,代码来源:get_stayd.py

示例7: clusterTrajectories

def clusterTrajectories(
    trajectories, fname, path, metric_func=trajectoryDissimilarityL2, user_distance_matrix=None, criterion="distance"
):
    """
	trajectories: the trajectories need to be in XY coordinates
	"""
    plot_path = utils.queryPath(path + "/plots")
    if user_distance_matrix is None:
        distance_matrix = getTrajectoryDistanceMatrix(trajectories, metric_func)
        writeToCSV.saveData(distance_matrix, path + "/" + fname)  # save the distance_matrix
    else:
        distance_matrix = user_distance_matrix
        assert len(distance_matrix) == len(
            trajectories
        ), "distance_matrix (n, n) and trajectories(n) should have same number of samples"

    print "distance_matrix:\n", distance_matrix

    v = DIST.squareform(distance_matrix)
    cluster_result = HAC.linkage(v, method="average")
    dg = HAC.dendrogram(cluster_result)
    plt.xlabel("cluster_dengrogram_{fname}".format(fname=fname))
    plt.savefig("{path}/cluster_dengrogram_{fname}.png".format(fname=fname, path=plot_path))
    plt.clf()

    if criterion == "distance":
        if metric_func == trajectoryDissimilarityL2:
            this_cluster_label = HAC.fcluster(
                Z=cluster_result, t=1 * 1000, criterion="distance"
            )  # distance for l2 measure
        elif metric_func == trajectoryDissimilarityCenterMass:
            this_cluster_label = HAC.fcluster(
                Z=cluster_result, t=1.5, criterion="distance"
            )  # distance for center of mass measure
    elif criterion == "inconsistent":
        this_cluster_label = HAC.fcluster(Z=cluster_result, t=0.8, criterion="inconsistent")

    print "this_cluster_label:", this_cluster_label, "number of clusters:", len(set(this_cluster_label))

    """Plot the representative trajectories"""
    plotRepresentativeTrajectory(
        this_cluster_label,
        trajectories,
        fname="cluster_centroids_{n}_classes".format(n=len(set(this_cluster_label))),
        path=plot_path,
        show=False,
    )

    return this_cluster_label, [this_cluster_label], []
开发者ID:Jim61C,项目名称:vessel-trajectory-modeller,代码行数:49,代码来源:clustering_worker.py

示例8: clusterize_hierarchical

def clusterize_hierarchical(peakels, matrix_dist, cut, clip=False):
    """

    :param clip:
    :param peakels:
    :param matrix_dist:
    :param method:
    :param cut:
    """
    #having negative value in the matrix distance
    # leading to a valueerror
    # clip i order to prevent negative value in the matrix distance
    if clip:
        np.clip(matrix_dist, 0, 1, matrix_dist)
    k = linkage(matrix_dist, method='complete')

    #dist = maxdists(k)
    #fit = norm.fit(dist)
    #cut = np.percentile(dist, 10.0)  #norm.ppf(5.0, loc=fit[0], scale=fit[1])

    k2 = fcluster(k, cut, criterion='distance')  #, criterion='distance')
    clust_by_id = ddict(list)
    for i, v in enumerate(k2):
        clust_by_id[v].append(peakels[i])
    return clust_by_id.values()
开发者ID:jerkos,项目名称:eledelphe,代码行数:25,代码来源:clustering.py

示例9: main

def main(): #clustering and write output
    if len(pep_array)>1:
        matrix=[]
        for i in range(0,len(pep_array)):
            matrix.append(pep_array[i][4].replace('\"',"").split(','))

        dataMatrix=numpy.array(matrix,dtype=float)
        d = sch.distance.pdist(dataMatrix,metric)# vector of pairwise distances
        if metric=="correlation":
            D = numpy.clip(d,0,2) #when using correlation, all values in distance matrix should be in range[0,2]
        else:
            D=d
        try:
            cutoff=float(t)
        except ValueError:
            print "please provide a numeric value for --t"; sys.exit()
        L = sch.linkage(D, method,metric)
        ind = sch.fcluster(L,cutoff,'distance')#distance is dissmilarity(1-correlation)
        p=numpy.array(pep_array)
        p=numpy.column_stack([p,ind])
        formatoutput(p)
    else:
        p=numpy.array(pep_array)
        p=numpy.column_stack([p,[0]])
        formatoutput(p)
开发者ID:Nausx,项目名称:SpliceVista,代码行数:25,代码来源:clusterpeptide.py

示例10: user_fp_group

def user_fp_group(data,key,user,filter = 'mid',merge = False,thr = 0.2):
#data = np.fromiter(data,dtype = dt)
    if len(data.shape) == 0 or data.shape[0] == 1:
        print '\t'.join([key,user,'%s' % data['wf_list'],str(data['x']),str(data['y']),'1'])
        return
    dists = get_pdist(data,100)
#print dists
    clusters = hcluster.linkage(dists,method = 'average')
#   print clusters
    r = hcluster.fcluster(clusters,thr,'distance')
    ids = np.unique(r)
    sz = []
    for id in ids:
        sz.append(data[r==id].shape[0])
    
    mid_size = max(1.1,max(sz) / 2.0)
    for id in ids:
        d = data[r==id]
        if filter == 'mid' and d.shape[0] < mid_size:
            continue
        if merge == True:
            print '\t'.join([key,user,wf_to_str(get_mean_wf(d)),str(np.median(d['x'])),str(np.median(d['y'])),str(get_largest_dur(d)),str(d.shape[0])])
            continue
        for od in d:
            print '\t'.join([key,user,od['wf_list'],str(od['x']),str(od['y']),str(od['t']),str(id)])
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:25,代码来源:cluster_fps2.py

示例11: process

def process(tag,infos,wf_lists,count):
    if wf_lists == None or infos == None:
        return

    x = infos['x']
    y = infos['y']
    imeis = infos['imei']
#wf_lists = np.fromiter(wf_lists,dtype = np.array)

    std_x = np.std(x)
    std_y = np.std(y)
    users_num = len(np.unique(imeis))
    if users_num < 3:
        return 
    if len(wf_lists.shape) < 2 or wf_lists.shape[1] < 2:
        return
    dists = sci_dist.pdist(wf_lists,'cosine')        
    dists[(dists < 1e-10)] = 0
    clusters = hierarchy.linkage(dists,method ='average')
    r = hierarchy.fcluster(clusters,0.3,'distance')

    for c in np.unique(r):
        idx = (r==c)
        c_x = np.median(x[idx] )
        c_y = np.median(y[idx] )
        c_std_x = np.std(x[idx])
        c_std_y = np.std(y[idx])
        c_user = len(np.unique(imeis[idx]))
        wfs = wf_lists[idx]
        wf =  np.sum(wfs,axis=0) / len(wfs)
        wf = [ '%d' % sig for sig in wf ]
        print '%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d' % (tag,'\t'.join(wf),c_x,c_y,c_user,std_x,std_y,c_std_x,c_std_y,count)
开发者ID:lrpopeyou,项目名称:cluster_fp_user,代码行数:32,代码来源:count_users_var.py

示例12: run_entity_model

def run_entity_model(cdev, cprc):
    print '____________________________________________________'
    print 'running entity model'
    hdev, hprc, hmapping, entcorp, er = process_entities(cdev, cprc)
    print 'removed', len(cdev)- len(hdev), 'documents', len(hdev), 'left'
    voc = build_voc(entcorp, 2)
    
    ent_vectorizer = CountVectorizer(vocabulary = voc)
    E = ent_vectorizer.fit_transform(hdev)
    
    Eclean, emapping = filter_rare(E, 0)

    E_dense = np.matrix(Eclean).astype('float')
    E_scaled = preprocessing.scale(E_dense)
    E_normalized = preprocessing.normalize(E_scaled, norm='l2')
    
    EMatrix = pairwise_distances(E_normalized, metric='cosine')
    EL = fastcluster.linkage(EMatrix, method='average')
    flat_eclust = hierarchy.fcluster(EL, 0.5, 'distance')
    ec = organize_clusters(flat_eclust, th = 3)
    
    ecf = []
    for cl in ec:
        ecf.append([hmapping[emapping[t]] for t in cl])
    print 'detected', len(ecf), 'entity clusters'      
    return ecf, voc
开发者ID:gaphex,项目名称:Oracle,代码行数:26,代码来源:tweet_proc.py

示例13: main

def main():
#     distMatrix = loadDistanceMatrix()
#    linkage = saveLinkage(distMatrix)
#     linkage = loadLinkage()
#     loadFCluster()
#     R = dendrogram(linkage, truncate_mode='level',  p=4, show_contracted=True)
#     afile = open(r'/home/rojosewe/Dropbox/MAI90/tesis/structs/R5000.pkl', 'wb')
#     pickle.dump(R, afile);
#     afile.close();
    linkage = loadLinkage()
    print len(linkage)
    k = 1.5
#   18 -> 54 
#   19 -> 46 
    
    R = dendrogram(linkage, color_threshold=6.8, show_contracted=True)
    pylab.savefig( "/home/rojosewe/Dropbox/MAI90/tesis/images/wordClustering/dgram446.8.png" )
#    print "cheese!"
    T = sch.fcluster(linkage, k, 'distance')
    n = len(T)
 #   print len(T)
    # calculate labels
    labels = np.zeros((n, 1))
    print str(k) + ": " + str(max(T))
    for i in range(n):
        labels[i,0] = int(T[i]);
    with open(datafolder + 'labels.csv', 'wb') as csvfile:
        csvw = csv.writer(csvfile);
        for i in range(n):
            csvw.writerow(labels[i,:])
            
    print 'done writing'
开发者ID:rojosewe,项目名称:TesisScrips,代码行数:32,代码来源:dendragramBuilder.py

示例14: cluster_words

def cluster_words(k):

    ts = os.listdir('types')
    ts.sort(key=alphanum_key)
    ts = np.array(ts)

    T = fcluster(Z,k,criterion='maxclust')

    def words(i):
        cluster = ts[T == i]
        print(len(cluster))
        allwords = []

        for t in cluster:
            fname = 'types/{}'.format(t)
            with open(fname) as file:
                data = json.loads(file.read())
            desc = data['description']
            words = re.findall('\w+', desc.lower())
            allwords.extend(words)

        allwords = [word for word in allwords if word not in stop_words]

        counts = Counter(allwords)
        return counts

    return [words(i+1) for i in range(k)]
开发者ID:MareinK,项目名称:datamining,代码行数:27,代码来源:words.py

示例15: get_ROIs

def get_ROIs(df_sequence,x,limit_meters):
	# encontrar puntos de transacciones origen
	X,locations,pi_locations = get_latlong_points(df_sequence)
	if len(locations) == 1:
		return [[{"lat":X[0,0],"long":X[0,1]}],1.0]
	elif len(locations) < 1:
		return None
	# construir dendrograma
	Z = linkage(X,'weighted',lambda x,y: vincenty(x,y).meters)
	clusters = fcluster(Z,limit_meters,criterion='distance')
	centroids = []
	nums_by_clusters =[]
	pi_sums = []
	the_clusters = []
	# join pi_sums of locations that are in the same cluster
	for i in range(len(clusters)):
		indice = buscar_locacion(the_clusters,clusters[i])
		if indice < 0:
			the_clusters.append(clusters[i])
			indice = len(the_clusters)-1
			pi_sums.append(0)
			nums_by_clusters.append(0)
			centroids.append({"lat":0,"long":0})
		pi_sums[indice] += pi_locations[i]
		centroids[indice]["lat"] += X[i,0]
		centroids[indice]["long"] += X[i,1]
		nums_by_clusters[indice] += 1

	the_indexs, the_sum = get_upToX_pi_locations(np.asarray(pi_sums),x)
	the_centroids = []
	for i in the_indexs:
		the_centroids.append({"lat":centroids[i]["lat"]/nums_by_clusters[i],"long":centroids[i]["long"]/nums_by_clusters[i]})
	return [the_centroids,the_sum]
开发者ID:cinai,项目名称:ODtrips,代码行数:33,代码来源:tfe.py


注:本文中的scipy.cluster.hierarchy.fcluster函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。