当前位置: 首页>>代码示例>>Python>>正文


Python hierarchy.inconsistent函数代码示例

本文整理汇总了Python中scipy.cluster.hierarchy.inconsistent函数的典型用法代码示例。如果您正苦于以下问题:Python inconsistent函数的具体用法?Python inconsistent怎么用?Python inconsistent使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了inconsistent函数的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: flatcluster

def flatcluster(
    dRow, runLogs, interClusterDistance="complete", plotDendrogram=True, cMethod="inconsistent", cValue=2.5
):
    #    if 'inter-cluster distance' in clusterSetup.keys():
    #        method = clusterSetup['inter-cluster distance']
    #    else:
    #        method = 'complete'

    z = linkage(dRow, interClusterDistance)
    inc = inconsistent(z)
    # print inc

    if plotDendrogram:
        plotdendrogram(z)

    clusters = fcluster(z, cValue, cMethod)

    noClusters = max(clusters)
    print("Total number of clusters:", noClusters)
    for i in range(noClusters):
        counter = 0
        for j in range(len(clusters)):
            if clusters[j] == (i + 1):
                counter += 1
        print("Cluster", str(i + 1), ":", str(counter))

    global clusterCount
    clusterCount = noClusters
    print(len(clusters))
    print(len(runLogs))
    for i, log in enumerate(runLogs):
        log[0]["Cluster"] = str(clusters[i])

    return z, clusters, runLogs
开发者ID:adrivsh,项目名称:EMAworkbench,代码行数:34,代码来源:clusterer.py

示例2: _run_hier_clust_on_centroids

    def _run_hier_clust_on_centroids(self,method='average'):
        '''
        runs hierarchical clustering based on the centroids of the data per scipy's methods

        '''

        uniqueLabels = np.sort(np.unique(self.templateLabels))
        centroids = np.array([self.templateMat[np.where(self.templateLabels == i)[0],:].mean(axis=0) for i in uniqueLabels])
               
        self.y = pdist(centroids)
        self.z = hierarchy.linkage(self.y,method)
        r2 = hierarchy.inconsistent(self.z,2)

        ## rank the average of linkage hieghts by standard deviation the report the averages
        meanHeights = r2[:,0]
        stdHeights = r2[:,1]
        rankedInds = np.argsort(stdHeights)[::-1]
        bestCutPoints = meanHeights[rankedInds]

        ## save centroid labels for all cuts of the dentragram
        allCentroidLabels = {}
        rankedK = []
        for cp in bestCutPoints:
            centroidLabels = hierarchy.fcluster(self.z,t=cp,criterion='distance')
            k = len(np.unique(centroidLabels))
            if allCentroidLabels.has_key(str(k)) == True:
                continue
            
            allCentroidLabels[str(k)] = centroidLabels 
            rankedK.append(k)
        
        centroidLabels = allCentroidLabels[str(rankedK[0])]
    
        ## save the top xx modes 
        self.bestModeLabels = []
        print 'doing ranking...'
       
        for rk in rankedK[:25]:
            centroidLabels = allCentroidLabels[str(rk)]
            modeLabels = self._get_mode_labels(self.templateLabels,centroidLabels,uniqueLabels)
            self.bestModeLabels.append(modeLabels)
        
        ## provide silvalue ranks in case we wish to reorder the top xx modes by sil value
        self.modeSilValues = []
        self.modeSizes = []
        allEvents = [self.templateData]

        for count in range(len(self.bestModeLabels)):
            numClusters = np.unique(self.bestModeLabels[count]).size
            silValues = get_silhouette_values(allEvents,[self.bestModeLabels[count]],subsample=self.noiseSample,
                                              minNumEvents=5000,resultsType='raw')
            silMean = silValues['0'].mean()
            self.modeSilValues.append(silValues['0'].mean())
            self.modeSizes.append(numClusters)

        silValues = get_silhouette_values(allEvents,[self.templateLabels],subsample=self.noiseSample,
                                          minNumEvents=5000,resultsType='raw')
        self.clusterSilValues = silValues['0'].mean()
        self.modeSilValues = np.array(self.modeSilValues)
        self.modeSizes = np.array(self.modeSizes)
开发者ID:ajrichards,项目名称:cytostream,代码行数:60,代码来源:TemplateFileCreator.py

示例3: check_maxRstat_Q_linkage

 def check_maxRstat_Q_linkage(self, method, i):
     # Tests maxRstat(Z, R, i) on the Q data set
     X = hierarchy_test_data.Q_X
     Z = linkage(X, method)
     R = inconsistent(Z)
     MD = maxRstat(Z, R, 1)
     expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
     assert_allclose(MD, expectedMD, atol=1e-15)
开发者ID:abudulemusa,项目名称:scipy,代码行数:8,代码来源:test_hierarchy.py

示例4: test_is_valid_im_4_and_up

 def test_is_valid_im_4_and_up(self):
     # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
     # (step size 3).
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         R = inconsistent(Z)
         assert_(is_valid_im(R) == True)
开发者ID:abudulemusa,项目名称:scipy,代码行数:8,代码来源:test_hierarchy.py

示例5: check_maxRstat_Q_linkage

 def check_maxRstat_Q_linkage(self, method, i):
     # Tests maxRstat(Z, R, i) on the Q data set
     X = eo['Q-X']
     Y = pdist(X)
     Z = linkage(X, method)
     R = inconsistent(Z)
     MD = maxRstat(Z, R, 1)
     expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
     assert_allclose(MD, expectedMD, atol=1e-15)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:9,代码来源:test_hierarchy.py

示例6: test_is_valid_im_4_and_up_neg_dist

 def test_is_valid_im_4_and_up_neg_dist(self):
     # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
     # (step size 3) with negative link counts.
     for i in xrange(4, 15, 3):
         y = np.random.rand(i*(i-1)//2)
         Z = linkage(y)
         R = inconsistent(Z)
         R[i//2,2] = -0.5
         assert_(is_valid_im(R) == False)
         assert_raises(ValueError, is_valid_im, R, throw=True)
开发者ID:abudulemusa,项目名称:scipy,代码行数:10,代码来源:test_hierarchy.py

示例7: flatcluster

def flatcluster(dMatrix, clusterSetup):
    dRow = prepareDRow(dMatrix)
    print dRow
    
    # Checking user-specified options, if there is any. Otherwise the default 
    # values are assigned
    if 'inter-cluster distance' in clusterSetup.keys():
        method = clusterSetup['inter-cluster distance']
    else:
        method = 'complete'
    z = linkage(dRow, method)
    inc = inconsistent(z)
    print inc
    
    if 'plotDendrogram?' in clusterSetup.keys():
        if clusterSetup['plotDendrogram?']:
            plotdendrogram(z)
        else:
            pass
    else:
        plotdendrogram(z)
    
    if 'cutoff criteria' in clusterSetup.keys():
        cmethod = clusterSetup['cutoff criteria']
    else:
        cmethod = 'inconsistent'
    
    if 'cutoff criteria value' in clusterSetup.keys():
        cvalue = clusterSetup['cutoff criteria value']
    else:
        cvalue = 2.5
    
    clusters = fcluster(z, cvalue, cmethod)
    
    noClusters = max(clusters)
    print 'Total number of clusters:', noClusters
    for i in range(noClusters):
        counter = 0
        for j in range(len(clusters)):
            if clusters[j]==(i+1):
                counter+=1
        print "Cluster",str(i+1),":",str(counter)
    
    for runIndex in range(len(clusters)):
        global runLogs
        runLogs[runIndex][0]['Cluster'] = str(clusters[runIndex])
        global clusterCount
        if clusters[runIndex] > clusterCount:
            clusterCount = clusters[runIndex]
    return clusters
开发者ID:canerhamarat,项目名称:EMAworkbench,代码行数:50,代码来源:x_clustererV1.py

示例8: Hierarchy

def Hierarchy(V, **kwargs):
    """Performs hierarchical clustering on *V*. The function essentially uses two scipy functions: ``linkage`` and 
    ``fcluster``. See :func:`scipy.cluster.hierarchy.linkage` and :func:`scipy.cluster.hierarchy.fcluster` for the 
    explaination of the arguments. Here lists arguments that are different from those of scipy.

    :arg V: row-normalized eigenvectors for the purpose of clustering.
    :type V: :class:`numpy.ndarray`

    :arg inconsistent_percentile: if the clustering *criterion* for :func:`scipy.cluster.hierarchy.fcluster`
    is ``inconsistent`` and threshold *t* is not given (default), then the function will use the percentile specified 
    by this argument as the threshold.
    :type inconsistent_percentile: double

    :arg n_clusters: specifies the maximal number of clusters. If this argument is given, then the function will 
    automatically set *criterion* to ``maxclust`` and *t* equal to *n_clusters*.
    :type n_clusters: int
    """

    from scipy.cluster.hierarchy import linkage, fcluster, inconsistent
    
    method = kwargs.pop('method', 'single')
    metric = kwargs.pop('metric', 'euclidean')
    Z = linkage(V, method=method, metric=metric)
    
    criterion = kwargs.pop('criterion', 'inconsistent')
    t = kwargs.get('t', None)
    ip = kwargs.pop('inconsistent_percentile', 99.9)
    if t is None and criterion == 'inconsistent':
        I = inconsistent(Z)
        i = np.percentile(I[:,3], ip)

    t = kwargs.pop('t', i)
    depth = kwargs.pop('depth', 2)
    R = kwargs.pop('R', None)
    monocrit = kwargs.pop('monocrit', None)

    n_clusters = kwargs.pop('n_clusters', None)
    if n_clusters is not None:
        criterion = 'maxclust'
        t = n_clusters
    labels = fcluster(Z, t, criterion=criterion, depth=depth, R=R, monocrit=monocrit)
    return labels.flatten()
开发者ID:fongchun,项目名称:ProDy,代码行数:42,代码来源:cluster.py

示例9: silhouette_score

def silhouette_score(dendroMatrix, distance_metric, linkage_method, labels):
    """
    Generate silhoutte score based on hierarchical clustering.

    Args:
        dendroMatrix: list, occurance of words in different files
        distance_metric: string, style of distance metric in the dendrogram
        linkage_method: string, style of linkage method in the dendrogram
        labels: list, file names

    Returns:
        silhouetteScore: string, containing the result of silhouette score 
        silhouetteAnnotation: string, annotation of the silhouette score
        score: float, silhouette score
        inconsistentMax: float, upper bound of threshold to calculate silhouette score if using Inconsistent criterion 
        maxclustMax: integer, upper bound of threshold to calculate silhouette score if using Maxclust criterion
        distanceMax: float, upper bound of threshold to calculate silhouette score if using Distance criterion
        distanceMin: float, lower bound of threshold to calculate silhouette score if using Distance criterion
        monocritMax: float, upper bound of threshold to calculate silhouette score if using Monocrit criterion
        monocritMin: float, lower bound of threshold to calculate silhouette score if using Monocrit criterion
        threshold: float/integer/string, threshold (t) value that users entered, equals to 'N/A' if users leave the field blank
    """
    activeFiles = len(labels) - 1
    if (activeFiles > 2):  # since "number of lables should be more than 2 and less than n_samples - 1"
        Y = metrics.pairwise.pairwise_distances(dendroMatrix, metric=distance_metric)
        Z = hierarchy.linkage(Y, method=linkage_method)

        monocrit = None

        # 'maxclust' range
        maxclustMax = len(labels) - 1

        # 'incosistent' range
        R = hierarchy.inconsistent(Z, 2)
        inconsistentMax = R[-1][-1]
        slen = len('%.*f' % (2, inconsistentMax))
        inconsistentMax = float(str(inconsistentMax)[:slen])

        # 'distance' range
        d = hierarchy.cophenet(Z)
        distanceMax = d.max()
        slen = len('%.*f' % (2, distanceMax))
        distanceMax = float(str(distanceMax)[:slen])
        distanceMin = d.min() + 0.01
        slen = len('%.*f' % (2, distanceMin))
        distanceMin = float(str(distanceMin)[:slen])

        # 'monocrit' range
        MR = hierarchy.maxRstat(Z, R, 0)
        monocritMax = MR.max()
        slen = len('%.*f' % (2, monocritMax))
        monocritMax = float(str(monocritMax)[:slen])
        monocritMin = MR.min() + 0.01
        slen = len('%.*f' % (2, monocritMin))
        monocritMin = float(str(monocritMin)[:slen])

        threshold = request.form['threshold']
        if threshold == '':
            threshold = str(threshold)
        else:
            threshold = float(threshold)

        if request.form['criterion'] == 'maxclust':
            criterion = 'maxclust'
            if (threshold == '') or (threshold > maxclustMax):
                threshold = len(labels) - 1
            else:
                threshold = round(float(threshold))
        elif request.form['criterion'] == 'distance':
            criterion = 'distance'
            if (threshold == '') or (threshold > distanceMax) or (threshold < distanceMin):
                threshold = distanceMax
        elif request.form['criterion'] == 'inconsistent':
            criterion = 'inconsistent'
            if (threshold == '') or (threshold > inconsistentMax):
                threshold = inconsistentMax
        elif request.form['criterion'] == 'monocrit':
            criterion = 'monocrit'
            monocrit = MR
            if (threshold == '') or (threshold > monocritMax) or (threshold < monocritMin):
                threshold = monocritMax
        scoreLabel = hierarchy.fcluster(Z, t=threshold, criterion=criterion, monocrit=monocrit)

        if len(set(scoreLabel)) <= 1:  # this means all the files are divided into only 1 or less cluster
            silhouetteScore = "Silhouette Score: invalid for only 1 cluster."
            silhouetteAnnotation = "because your file are too similar to each other, program classify all of them in the same cluster"
            score = 'invalid for only 1 cluster'
            inconsistentMax = maxclustMax = distanceMax = distanceMin = monocritMax = monocritMin = threshold = 'N/A'
        else:
            score = metrics.silhouette_score(Y, labels=scoreLabel, metric='precomputed')
            score = round(score, constants.ROUND_DIGIT)
            inequality = '≤'.decode('utf-8')
            silhouetteScore = "Silhouette Score: " + str(
                score) + "\n(-1 " + inequality + " Silhouette Score " + inequality + " 1)"
            silhouetteAnnotation = "The best value is 1 and the worst value is -1. Values near 0 indicate overlapping clusters. Negative values generally indicate that a sample has been assigned to the wrong cluster, as a different cluster is more similar."

    else:
        silhouetteScore = "Silhouette Score: invalid for less than or equal to 2 files."
        silhouetteAnnotation = ""
        score = 'invalid for less than or equal to 2 files.'
#.........这里部分代码省略.........
开发者ID:chantisnake,项目名称:Lexos,代码行数:101,代码来源:dendrogrammer.py

示例10: check_inconsistent_tdist

 def check_inconsistent_tdist(self, depth):
     Z = hierarchy_test_data.linkage_ytdist_single
     assert_allclose(inconsistent(Z, depth),
                     hierarchy_test_data.inconsistent_ytdist[depth])
开发者ID:abudulemusa,项目名称:scipy,代码行数:4,代码来源:test_hierarchy.py

示例11: check_inconsistent_q_single

 def check_inconsistent_q_single(self, depth):
     X = eo['Q-X']
     Z = linkage(X, 'single', 'euclidean')
     R = inconsistent(Z, depth)
     Rright = eo['inconsistent-Q-single-%d' % depth]
     assert_allclose(R, Rright, atol=1e-05)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:6,代码来源:test_hierarchy.py

示例12: check_inconsistent_tdist

 def check_inconsistent_tdist(self, method, depth, atol):
     Y = squareform(_tdist)
     Z = linkage(Y, method)
     R = inconsistent(Z, depth)
     Rright = eo['inconsistent-%s-tdist-depth-%d' % (method, depth)]
     assert_allclose(R, Rright, atol=atol)
开发者ID:FrankZhao66,项目名称:scipy,代码行数:6,代码来源:test_hierarchy.py

示例13: pdist

    centroids = np.array([case1[np.where(case1Labels == i)[0],:].mean(axis=0) for i in uniqueLabels])

    fig = plt.figure()
    ax = fig.add_subplot(111)
    ncluster = 27
    y = pdist(centroids)
    method = 'centroid'#'average'
    z = hierarchy.linkage(y,'average')
    #t = hierarchy.fcluster(27,criterion='maxclust')

    ## computes the max distance between any cluster and ea non singleton cluster
    print 'max dists', hierarchy.maxdists(z)
    

    ## inconsistancy
    r = hierarchy.inconsistent(z)
    print 'r',r
    #print 'max inconsts', hierarchy.maxinconsts(z,r,i)
    print 'z',z
    #print 'blah', z[:,2] - np.array(z[1:,2].tolist()+[0])
    print z[:,2]
    print np.hstack([z[1:,2],[0]])
    levelDiffs = np.abs(z[:,2] - np.hstack([z[1:,2],[0]]))
    levelDiffMeans = z[:,2]# - 0.001 #np.hstack([z[1:,2],[0]]) / 2.0#z[:,2] + np.hstack([z[1:,2],[0]]) / 2.0

    print 'diffs',levelDiffs*100
    diffInds = np.argsort(levelDiffs)
    diffInds = diffInds[::-1]
    print 'a',levelDiffMeans[diffInds[:6]] #,levelDiffMeans[diffInds[1]],levelDiffMeans[diffInds[2]],levelDiffMeans[diffInds[3]]
    print 'b', diffInds[:5]
    hierarchy.dendrogram(z)
开发者ID:ajrichards,项目名称:cytostream,代码行数:31,代码来源:SimulatedData3.py

示例14: linkage

# generate two clusters: a with 100 points, b with 50:
np.random.seed(4711)  # for repeatability of this tutorial
a = np.random.multivariate_normal([10, 0], [[3, 1], [1, 4]], size=[100,])
b = np.random.multivariate_normal([0, 20], [[3, 1], [1, 4]], size=[50,])
X = np.concatenate((a, b),)
print X.shape  # 150 samples with 2 dimensions
#plt.scatter(X[:,0], X[:,1])
#plt.show()

# generate the linkage matrix
Z = linkage(X, 'ward')

print Z.shape

depth = 5
incons = inconsistent(Z, depth)
print incons[-10:]

last = Z[-10:, 2]
last_rev = last[::-1]
idxs = np.arange(1, len(last) + 1)
plt.plot(idxs, last_rev)

acceleration = np.diff(last, 2)  # 2nd derivative of the distances
acceleration_rev = acceleration[::-1]
plt.plot(idxs[:-2] + 1, acceleration_rev)
plt.show()
k = acceleration_rev.argmax() + 2  # if idx 0 is the max of this we want 2 clusters
print "clusters:", k

开发者ID:ronanki,项目名称:Hybrid_prosody_model,代码行数:29,代码来源:hierarchical_clustering.py


注:本文中的scipy.cluster.hierarchy.inconsistent函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。