当前位置: 首页>>代码示例>>Python>>正文


Python cluster.HierarchicalClustering类代码示例

本文整理汇总了Python中cluster.HierarchicalClustering的典型用法代码示例。如果您正苦于以下问题:Python HierarchicalClustering类的具体用法?Python HierarchicalClustering怎么用?Python HierarchicalClustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了HierarchicalClustering类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: testClusterLen1

 def testClusterLen1(self):
     """
     Testing if hierarchical clustering a set of length 1 returns a set of
     length 1
     """
     cl = HierarchicalClustering([876], lambda x, y: abs(x - y))
     self.assertEqual([876], cl.getlevel(40))
开发者ID:Trekky12,项目名称:python-cluster,代码行数:7,代码来源:test.py

示例2: testCompleteLinkage

    def testCompleteLinkage(self):
        "Basic Hierarchical Clustering test with integers"
        cl = HierarchicalClustering(self.__data,
                                    lambda x, y: abs(x - y),
                                    linkage='complete')
        result = cl.getlevel(40)

        # sort the values to make the tests less prone to algorithm changes
        result = sorted([sorted(_) for _ in result])

        expected = [
            [24],
            [84],
            [124, 131, 134],
            [336, 365, 365],
            [391, 398],
            [518],
            [542, 564],
            [594],
            [676],
            [791],
            [835],
            [940, 956, 971],
        ]
        self.assertEqual(result, expected)
开发者ID:bwall,项目名称:python-cluster,代码行数:25,代码来源:test_hierarchical.py

示例3: testDataTypes

 def testDataTypes(self):
     "Test for bug #?"
     cl = HierarchicalClustering(self.__data, self.sim)
     for item in cl.getlevel(0.5):
         self.assertEqual(
                 type(item), type([]),
                 "Every item should be a list!")
开发者ID:Trekky12,项目名称:python-cluster,代码行数:7,代码来源:test.py

示例4: buildHcluster

def buildHcluster(data, threshold):
	"""
	Description:Build Hierachical Cluster
	Input:
			data: e.g. data = [	[12,12],[34,34],
								[23,23],[32,32],
								[46,46],[96,96],
								[13,13],[1,1],
								[4,4],[9,9]] 
								# The first variable is key, not counted for clustering
			threshold: threshold distance to break cluster
	Output: cluster record file  /searchc/save/H.cluster
	"""
	print "Clustering..."
	a = datetime.datetime.now()
	cl = HierarchicalClustering(data,distance_function,'complete')
	clusterH =  cl.getlevel(threshold)	     			# get h clusters
	b = datetime.datetime.now()
	print "Naming..."
	featureAll = readFeature('all')
	c = nameCluster(clusterH,featureAll)
	name = c[0]
	centroid = c[1]
	writeCluster('H',clusterH,name,centroid,threshold)
	print "Writing..."
	with open(path+'/log/H_'+str(threshold)+'.log','w') as outfile:
		outfile.write("Hierahical Clustering Log\nDate:\t"+str(a.date())+"\nStart:\t"+str(a.time())+"\nEnd:\t"+str(b.time())+"\nDuration:\t"+str(b-a)+"\nH:\t"+str(threshold)+"\nMethod:\tComplete"+"\nNo. cluster:\t"+str(len(clusterH))+"\n\n")
		for cluster in clusterH:
			outfile.write(str(len(cluster)-2)+"\n")
	
	return
开发者ID:asif080199,项目名称:ExamPapers,代码行数:31,代码来源:oldmethod.py

示例5: clustertitle

def clustertitle( request ):
    """cluster based on title and ngram sim"""

    from cluster import HierarchicalClustering

    def sim( a, b ):
        return 1 - NGram.compare( a.title, b.title, warp=WARP, iconv=enrich )

    articles = Article.objects.filter( status = "live", date_published__gte = datetime.datetime.now() - datetime.timedelta(1) ).order_by( "date_published" )[:1000]
    cl = HierarchicalClustering(articles, sim)
    # 0.7 chosen pretty much through trial and error :)
    res = cl.getlevel(0.7)
    #import pprint
    #pprint.pprint( cl.topo() )

    clusters = []
    for cluster in res:
        if len(cluster) > 1:
            node = {
                    'type': 'cluster',
                    #'topic': longest_common_substring(cluster[0].title, cluster[1].title),
                    'topic': common_terms( [a.title for a in cluster] ),
                    'articles': cluster
                    }
        else:
            node = {
                    'type': 'article',
                    'article': cluster[0]
            }
        clusters.append(node)

    return render( request, "clusters.html", dictionary = { "clusters": clusters, } )
开发者ID:mrmonkington,项目名称:channelfunnel,代码行数:32,代码来源:views.py

示例6: test

def test(data, expected):
    cl = HierarchicalClustering(data, lambda x, y: abs(x-y))
    result = cl.getlevel(5)
    print(sorted(data))
    print result
    print expected
    assert result == expected
    print 'ok'
开发者ID:dpallagolla,项目名称:sequoia_justdial_analytics,代码行数:8,代码来源:regression.py

示例7: testSingleLinkage

    def testSingleLinkage(self):
        "Basic Hierarchical Clustering test with integers"

        def euclidian_distance(a, b):
            return sqrt(sum([pow(z[0] - z[1], 2) for z in zip(a, b)]))

        self.__data = [(1, 1), (1, 2), (1, 3)]
        cl = HierarchicalClustering(self.__data, euclidian_distance)
        result = cl.getlevel(40)
        self.assertIsNotNone(result)
开发者ID:Denvar94,项目名称:python-cluster,代码行数:10,代码来源:test_hierarchical.py

示例8: testIssue28

    def testIssue28(self):
        "Issue28 (Hierarchical Clustering)"

        points1D = {
            'p4' : 5, 'p2' : 6, 'p7' : 10,
            'p9' : 120, 'p10' : 121, 'p11' : 119,
        }

        distance_func = lambda a,b : abs(points1D[a]-points1D[b])
        cl = HierarchicalClustering(list(points1D.keys()), distance_func)
        result = cl.getlevel(20)
        self.assertIsNotNone(result)
开发者ID:exhuma,项目名称:python-cluster,代码行数:12,代码来源:test_hierarchical.py

示例9: testCluster

 def testCluster(self):
     "Basic Hierarchical Clustering test with integers"
     cl = HierarchicalClustering(self.__data, lambda x, y: abs(x - y))
     cl.cluster()
     self.assertEqual([
             [24],
             [84, 124, 131, 134],
             [336, 365, 365, 365, 398, 391],
             [940, 956, 971],
             [791],
             [835],
             [676],
             [518, 564, 542]],
             cl.getlevel(40))
开发者ID:Trekky12,项目名称:python-cluster,代码行数:14,代码来源:test.py

示例10: hierarchical_clustering_by_title

def hierarchical_clustering_by_title(csv_file):
    csvReader = csv.DictReader(codecs.open(csv_file, "rb", "utf-16"), delimiter='\t', quotechar='"')
    csvReader.next()
    contacts = [row for row in csvReader]

    all_titles = []
    for i, _ in enumerate(contacts):
        if contacts[i]['Current Position'] == '':
            contacts[i]['Job Titles'] = ['']
            continue
        titles = [contacts[i]['Current Position']]
        for title in titles:
            for separator in separators:
                if title.find(separator) >= 0:
                    titles.remove(title)
                    titles.extend([title.strip() for title in title.split(separator)
                                   if title.strip() != ''])

        for transform in transforms:
            titles = [title.replace(*transform) for title in titles]
        contacts[i]['Job Titles'] = titles
        all_titles.extend(titles)

    all_titles = list(set(all_titles))

    # Define a scoring function
    def score(title1, title2):
        return DISTANCE(set(title1.split()), set(title2.split()))

    # Feed the class your data and the scoring function
    hc = HierarchicalClustering(all_titles, score)

    # Cluster the data according to a distance threshold
    clusters = hc.getlevel(DISTANCE_THRESHOLD)

    # Remove singleton clusters
    clusters = [c for c in clusters if len(c) > 1]

    # Round up contacts who are in these clusters and group them together

    clustered_contacts = {}
    for cluster in clusters:
        clustered_contacts[tuple(cluster)] = []
        for contact in contacts:
            for title in contact['Job Titles']:
                if title in cluster:
                    clustered_contacts[tuple(cluster)].append('%s %s'
                                                              % (contact['First Name'], contact['Last Name']))

    return clustered_contacts
开发者ID:paudan,项目名称:python-scripts,代码行数:50,代码来源:connections_analysis.py

示例11: breakToPeriods

def breakToPeriods(arg, maximaOrder=20, clusteringGranularity = 0.5, file=False):
    inputAsList = []
    if(file):
        file = open(arg, 'r')
        for line in file:
            inputAsList.append(float(line))
    else:
        inputAsList = arg
    inputAsList = inputAsList if type(inputAsList) is list else inputAsList.tolist()
    a = np.array(inputAsList)
    localMax = argrelextrema(a, np.greater, 0, maximaOrder)[0].tolist()
    try:
        amplitude = np.max(a) - np.min(a)
    except:
        return []
    cl = HierarchicalClustering(a.take(localMax).tolist(), lambda x,y: abs(x-y))
    clusters = cl.getlevel(int(amplitude*clusteringGranularity))
    if(len(clusters) == 0):
        return []
    #print clusters
    max = 0
    longestSeq = None
    if(len(clusters) == len(localMax)):#It clustered every maxima differently
        longestSeq = clusters
    else:
        for cluster in clusters:
            l = len(cluster)
            if(l>max):
                longestSeq = cluster
                max = l
    #print longestSeq
    if(len(longestSeq) < 2):
        return []
    averageLength = len(inputAsList)/len(longestSeq)  
    periods = []
    indices = [inputAsList.index(x) for x in longestSeq]
    indices.sort()
    open = indices[0]
    for i in indices[1:]:
        #plt.figure()
        close = i
        strideLen = close - open
        if(strideLen > 0.5*averageLength and strideLen < 1.8*averageLength):
            period = inputAsList[open:close]
            periods.append(period)
        else:
            pass
        open = close
    return periods
开发者ID:ranBernstein,项目名称:GaitKinect,代码行数:49,代码来源:partitionizing.py

示例12: main

def main():
  pC = PhamCluster()
  pC.initialize_matrix()
  #pC.calculate_distances()

  #print 'scoreMatrix:', pC.scoreMatrix
  #print 'distMatrix:', pC.distMatrix
  cl = HierarchicalClustering(pC.scoreMatrix, lambda x,y: pC.get_distance(x,y))
  #cutoff = raw_input('specify cutoff level:')
  cutoff = 1
  print 'using cutoff of 1'
  clusters = cl.getlevel(float(cutoff))
  print 'there are', len(clusters), 'clusters'
  print clusters
  print 'there are', len(clusters), 'clusters'
开发者ID:byuphamerator,项目名称:phamerator-dev,代码行数:15,代码来源:phamCluster.py

示例13: testCluster

 def testCluster(self):
     "Basic Hierachical clustering test with strings"
     cl = HierarchicalClustering(self.__data, self.sim)
     self.assertEqual([
         ['ultricies'],
         ['Sed'],
         ['Phasellus'],
         ['mi'],
         ['Nullam'],
         ['sit', 'elit', 'elit', 'Ut', 'amet', 'at'],
         ['leo', 'Lorem', 'dolor'],
         ['congue', 'neque', 'consectetuer', 'consequat'],
         ['adipiscing'],
         ['ipsum'],
     ], cl.getlevel(0.5))
开发者ID:dpallagolla,项目名称:sequoia_justdial_analytics,代码行数:15,代码来源:test.py

示例14: getCorners

def getCorners(intersections):
    cl = HierarchicalClustering(intersections, lambda p1, p2: length([p1, p2]))
    clusters = cl.getlevel(25)

    # probably want to make sure we actually have the corners at this point.
    # For now, I'm taking the 4 biggest clusters.
    cornerClusters = sorted(clusters, key=len, reverse=True)[:4]

    corners = map(averageCoords, cornerClusters)
    corners = sorted(corners, key= lambda p: p[0])
    left = sorted(corners[:2], key=lambda p: p[1])
    right = sorted(corners[2:], key=lambda p: p[1])
    #{'top-left': left[0], 'bottom-left': left[1],
    # 'top-right': right[0], 'bottom-right': right[1]}
    return left[0], left[1], right[0], right[1]
开发者ID:dferrer,项目名称:Pool,代码行数:15,代码来源:poolcv.py

示例15: testCluster

 def testCluster(self):
     "Basic Hierachical clustering test with strings"
     self.skipTest('These values lead to non-deterministic results. '
                   'This makes it untestable!')
     cl = HierarchicalClustering(self.__data, self.sim)
     self.assertEqual([
         ['ultricies'],
         ['Sed'],
         ['Phasellus'],
         ['mi'],
         ['Nullam'],
         ['sit', 'elit', 'elit', 'Ut', 'amet', 'at'],
         ['leo', 'Lorem', 'dolor'],
         ['congue', 'neque', 'consectetuer', 'consequat'],
         ['adipiscing'],
         ['ipsum'],
     ], cl.getlevel(0.5))
开发者ID:bwall,项目名称:python-cluster,代码行数:17,代码来源:test_hierarchical.py


注:本文中的cluster.HierarchicalClustering类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。