本文整理汇总了Python中cluster.HierarchicalClustering类的典型用法代码示例。如果您正苦于以下问题:Python HierarchicalClustering类的具体用法?Python HierarchicalClustering怎么用?Python HierarchicalClustering使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了HierarchicalClustering类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: testClusterLen1
def testClusterLen1(self):
"""
Testing if hierarchical clustering a set of length 1 returns a set of
length 1
"""
cl = HierarchicalClustering([876], lambda x, y: abs(x - y))
self.assertEqual([876], cl.getlevel(40))
示例2: testCompleteLinkage
def testCompleteLinkage(self):
"Basic Hierarchical Clustering test with integers"
cl = HierarchicalClustering(self.__data,
lambda x, y: abs(x - y),
linkage='complete')
result = cl.getlevel(40)
# sort the values to make the tests less prone to algorithm changes
result = sorted([sorted(_) for _ in result])
expected = [
[24],
[84],
[124, 131, 134],
[336, 365, 365],
[391, 398],
[518],
[542, 564],
[594],
[676],
[791],
[835],
[940, 956, 971],
]
self.assertEqual(result, expected)
示例3: testDataTypes
def testDataTypes(self):
"Test for bug #?"
cl = HierarchicalClustering(self.__data, self.sim)
for item in cl.getlevel(0.5):
self.assertEqual(
type(item), type([]),
"Every item should be a list!")
示例4: buildHcluster
def buildHcluster(data, threshold):
"""
Description:Build Hierachical Cluster
Input:
data: e.g. data = [ [12,12],[34,34],
[23,23],[32,32],
[46,46],[96,96],
[13,13],[1,1],
[4,4],[9,9]]
# The first variable is key, not counted for clustering
threshold: threshold distance to break cluster
Output: cluster record file /searchc/save/H.cluster
"""
print "Clustering..."
a = datetime.datetime.now()
cl = HierarchicalClustering(data,distance_function,'complete')
clusterH = cl.getlevel(threshold) # get h clusters
b = datetime.datetime.now()
print "Naming..."
featureAll = readFeature('all')
c = nameCluster(clusterH,featureAll)
name = c[0]
centroid = c[1]
writeCluster('H',clusterH,name,centroid,threshold)
print "Writing..."
with open(path+'/log/H_'+str(threshold)+'.log','w') as outfile:
outfile.write("Hierahical Clustering Log\nDate:\t"+str(a.date())+"\nStart:\t"+str(a.time())+"\nEnd:\t"+str(b.time())+"\nDuration:\t"+str(b-a)+"\nH:\t"+str(threshold)+"\nMethod:\tComplete"+"\nNo. cluster:\t"+str(len(clusterH))+"\n\n")
for cluster in clusterH:
outfile.write(str(len(cluster)-2)+"\n")
return
示例5: clustertitle
def clustertitle( request ):
"""cluster based on title and ngram sim"""
from cluster import HierarchicalClustering
def sim( a, b ):
return 1 - NGram.compare( a.title, b.title, warp=WARP, iconv=enrich )
articles = Article.objects.filter( status = "live", date_published__gte = datetime.datetime.now() - datetime.timedelta(1) ).order_by( "date_published" )[:1000]
cl = HierarchicalClustering(articles, sim)
# 0.7 chosen pretty much through trial and error :)
res = cl.getlevel(0.7)
#import pprint
#pprint.pprint( cl.topo() )
clusters = []
for cluster in res:
if len(cluster) > 1:
node = {
'type': 'cluster',
#'topic': longest_common_substring(cluster[0].title, cluster[1].title),
'topic': common_terms( [a.title for a in cluster] ),
'articles': cluster
}
else:
node = {
'type': 'article',
'article': cluster[0]
}
clusters.append(node)
return render( request, "clusters.html", dictionary = { "clusters": clusters, } )
示例6: test
def test(data, expected):
cl = HierarchicalClustering(data, lambda x, y: abs(x-y))
result = cl.getlevel(5)
print(sorted(data))
print result
print expected
assert result == expected
print 'ok'
示例7: testSingleLinkage
def testSingleLinkage(self):
"Basic Hierarchical Clustering test with integers"
def euclidian_distance(a, b):
return sqrt(sum([pow(z[0] - z[1], 2) for z in zip(a, b)]))
self.__data = [(1, 1), (1, 2), (1, 3)]
cl = HierarchicalClustering(self.__data, euclidian_distance)
result = cl.getlevel(40)
self.assertIsNotNone(result)
示例8: testIssue28
def testIssue28(self):
"Issue28 (Hierarchical Clustering)"
points1D = {
'p4' : 5, 'p2' : 6, 'p7' : 10,
'p9' : 120, 'p10' : 121, 'p11' : 119,
}
distance_func = lambda a,b : abs(points1D[a]-points1D[b])
cl = HierarchicalClustering(list(points1D.keys()), distance_func)
result = cl.getlevel(20)
self.assertIsNotNone(result)
示例9: testCluster
def testCluster(self):
"Basic Hierarchical Clustering test with integers"
cl = HierarchicalClustering(self.__data, lambda x, y: abs(x - y))
cl.cluster()
self.assertEqual([
[24],
[84, 124, 131, 134],
[336, 365, 365, 365, 398, 391],
[940, 956, 971],
[791],
[835],
[676],
[518, 564, 542]],
cl.getlevel(40))
示例10: hierarchical_clustering_by_title
def hierarchical_clustering_by_title(csv_file):
csvReader = csv.DictReader(codecs.open(csv_file, "rb", "utf-16"), delimiter='\t', quotechar='"')
csvReader.next()
contacts = [row for row in csvReader]
all_titles = []
for i, _ in enumerate(contacts):
if contacts[i]['Current Position'] == '':
contacts[i]['Job Titles'] = ['']
continue
titles = [contacts[i]['Current Position']]
for title in titles:
for separator in separators:
if title.find(separator) >= 0:
titles.remove(title)
titles.extend([title.strip() for title in title.split(separator)
if title.strip() != ''])
for transform in transforms:
titles = [title.replace(*transform) for title in titles]
contacts[i]['Job Titles'] = titles
all_titles.extend(titles)
all_titles = list(set(all_titles))
# Define a scoring function
def score(title1, title2):
return DISTANCE(set(title1.split()), set(title2.split()))
# Feed the class your data and the scoring function
hc = HierarchicalClustering(all_titles, score)
# Cluster the data according to a distance threshold
clusters = hc.getlevel(DISTANCE_THRESHOLD)
# Remove singleton clusters
clusters = [c for c in clusters if len(c) > 1]
# Round up contacts who are in these clusters and group them together
clustered_contacts = {}
for cluster in clusters:
clustered_contacts[tuple(cluster)] = []
for contact in contacts:
for title in contact['Job Titles']:
if title in cluster:
clustered_contacts[tuple(cluster)].append('%s %s'
% (contact['First Name'], contact['Last Name']))
return clustered_contacts
示例11: breakToPeriods
def breakToPeriods(arg, maximaOrder=20, clusteringGranularity = 0.5, file=False):
inputAsList = []
if(file):
file = open(arg, 'r')
for line in file:
inputAsList.append(float(line))
else:
inputAsList = arg
inputAsList = inputAsList if type(inputAsList) is list else inputAsList.tolist()
a = np.array(inputAsList)
localMax = argrelextrema(a, np.greater, 0, maximaOrder)[0].tolist()
try:
amplitude = np.max(a) - np.min(a)
except:
return []
cl = HierarchicalClustering(a.take(localMax).tolist(), lambda x,y: abs(x-y))
clusters = cl.getlevel(int(amplitude*clusteringGranularity))
if(len(clusters) == 0):
return []
#print clusters
max = 0
longestSeq = None
if(len(clusters) == len(localMax)):#It clustered every maxima differently
longestSeq = clusters
else:
for cluster in clusters:
l = len(cluster)
if(l>max):
longestSeq = cluster
max = l
#print longestSeq
if(len(longestSeq) < 2):
return []
averageLength = len(inputAsList)/len(longestSeq)
periods = []
indices = [inputAsList.index(x) for x in longestSeq]
indices.sort()
open = indices[0]
for i in indices[1:]:
#plt.figure()
close = i
strideLen = close - open
if(strideLen > 0.5*averageLength and strideLen < 1.8*averageLength):
period = inputAsList[open:close]
periods.append(period)
else:
pass
open = close
return periods
示例12: main
def main():
pC = PhamCluster()
pC.initialize_matrix()
#pC.calculate_distances()
#print 'scoreMatrix:', pC.scoreMatrix
#print 'distMatrix:', pC.distMatrix
cl = HierarchicalClustering(pC.scoreMatrix, lambda x,y: pC.get_distance(x,y))
#cutoff = raw_input('specify cutoff level:')
cutoff = 1
print 'using cutoff of 1'
clusters = cl.getlevel(float(cutoff))
print 'there are', len(clusters), 'clusters'
print clusters
print 'there are', len(clusters), 'clusters'
示例13: testCluster
def testCluster(self):
"Basic Hierachical clustering test with strings"
cl = HierarchicalClustering(self.__data, self.sim)
self.assertEqual([
['ultricies'],
['Sed'],
['Phasellus'],
['mi'],
['Nullam'],
['sit', 'elit', 'elit', 'Ut', 'amet', 'at'],
['leo', 'Lorem', 'dolor'],
['congue', 'neque', 'consectetuer', 'consequat'],
['adipiscing'],
['ipsum'],
], cl.getlevel(0.5))
示例14: getCorners
def getCorners(intersections):
cl = HierarchicalClustering(intersections, lambda p1, p2: length([p1, p2]))
clusters = cl.getlevel(25)
# probably want to make sure we actually have the corners at this point.
# For now, I'm taking the 4 biggest clusters.
cornerClusters = sorted(clusters, key=len, reverse=True)[:4]
corners = map(averageCoords, cornerClusters)
corners = sorted(corners, key= lambda p: p[0])
left = sorted(corners[:2], key=lambda p: p[1])
right = sorted(corners[2:], key=lambda p: p[1])
#{'top-left': left[0], 'bottom-left': left[1],
# 'top-right': right[0], 'bottom-right': right[1]}
return left[0], left[1], right[0], right[1]
示例15: testCluster
def testCluster(self):
"Basic Hierachical clustering test with strings"
self.skipTest('These values lead to non-deterministic results. '
'This makes it untestable!')
cl = HierarchicalClustering(self.__data, self.sim)
self.assertEqual([
['ultricies'],
['Sed'],
['Phasellus'],
['mi'],
['Nullam'],
['sit', 'elit', 'elit', 'Ut', 'amet', 'at'],
['leo', 'Lorem', 'dolor'],
['congue', 'neque', 'consectetuer', 'consequat'],
['adipiscing'],
['ipsum'],
], cl.getlevel(0.5))