本文整理匯總了Python中sklearn.cluster.AffinityPropagation方法的典型用法代碼示例。如果您正苦於以下問題:Python cluster.AffinityPropagation方法的具體用法?Python cluster.AffinityPropagation怎麽用?Python cluster.AffinityPropagation使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類sklearn.cluster
的用法示例。
在下文中一共展示了cluster.AffinityPropagation方法的13個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_objectmapper
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def test_objectmapper(self):
df = pdml.ModelFrame([])
self.assertIs(df.cluster.AffinityPropagation, cluster.AffinityPropagation)
self.assertIs(df.cluster.AgglomerativeClustering, cluster.AgglomerativeClustering)
self.assertIs(df.cluster.Birch, cluster.Birch)
self.assertIs(df.cluster.DBSCAN, cluster.DBSCAN)
self.assertIs(df.cluster.FeatureAgglomeration, cluster.FeatureAgglomeration)
self.assertIs(df.cluster.KMeans, cluster.KMeans)
self.assertIs(df.cluster.MiniBatchKMeans, cluster.MiniBatchKMeans)
self.assertIs(df.cluster.MeanShift, cluster.MeanShift)
self.assertIs(df.cluster.SpectralClustering, cluster.SpectralClustering)
self.assertIs(df.cluster.bicluster.SpectralBiclustering,
cluster.bicluster.SpectralBiclustering)
self.assertIs(df.cluster.bicluster.SpectralCoclustering,
cluster.bicluster.SpectralCoclustering)
示例2: test_affinity_propagation_class
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def test_affinity_propagation_class(self):
from sklearn.datasets.samples_generator import make_blobs
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=300, centers=centers,
cluster_std=0.5, random_state=0)
df = pdml.ModelFrame(data=X, target=labels_true)
af = df.cluster.AffinityPropagation(preference=-50)
df.fit(af)
af2 = cluster.AffinityPropagation(preference=-50).fit(X)
tm.assert_numpy_array_equal(af.cluster_centers_indices_,
af2.cluster_centers_indices_)
tm.assert_numpy_array_equal(af.labels_, af2.labels_)
示例3: test_Classifications
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def test_Classifications(self):
iris = datasets.load_iris()
df = pdml.ModelFrame(iris)
models = ['AffinityPropagation', 'MeanShift']
for model in models:
mod1 = getattr(df.cluster, model)()
mod2 = getattr(cluster, model)()
df.fit(mod1)
mod2.fit(iris.data)
result = df.predict(mod1)
expected = mod2.predict(iris.data)
self.assertIsInstance(result, pdml.ModelSeries)
self.assert_numpy_array_almost_equal(result.values, expected)
示例4: cluster
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def cluster(X):
X = preprocessing.normalize(X, norm='l2')
distance = X.dot(X.transpose())
c = AffinityPropagation(affinity="precomputed")
y = c.fit_predict(distance)
return y
示例5: cluster_affinity_propagation
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def cluster_affinity_propagation(similarity_matrix, desired_keys=None):
numpy_matrix = similarity_matrix_to_numpy(similarity_matrix, desired_keys)
clusterer = AffinityPropagation()
return clusterer.fit_predict(numpy_matrix)
示例6: affinity_propagation
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def affinity_propagation(feature_matrix):
sim = feature_matrix * feature_matrix.T
sim = sim.todense()
ap = AffinityPropagation()
ap.fit(sim)
clusters = ap.labels_
return ap, clusters
# get clusters using affinity propagation
示例7: compare
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def compare(data, n_groups, output_fol):
# plot_clusters(data.astype(np.float), scipy.cluster.vq.kmeans, 'scipy.cluster.vq.kmeans', output_fol, (n_groups,), {})
plot_clusters(data, cluster.KMeans, 'KMeans', output_fol, (), {'n_clusters': n_groups})
for ct in ['spherical', 'tied', 'diag', 'full']:
plot_clusters(data, mixture.GaussianMixture, 'GMM_{}'.format(ct), output_fol, (),
{'n_components': n_groups, 'covariance_type': ct})
plot_clusters(data, cluster.AffinityPropagation, 'AffinityPropagation', output_fol, (), {'preference': -5.0, 'damping': 0.95})
plot_clusters(data, cluster.MeanShift, 'MeanShift', output_fol, (0.175,), {'cluster_all': False})
plot_clusters(data, cluster.SpectralClustering, 'SpectralClustering', output_fol, (), {'n_clusters': n_groups})
plot_clusters(data, cluster.AgglomerativeClustering, 'AgglomerativeClustering', output_fol, (), {'n_clusters': n_groups, 'linkage': 'ward'})
plot_clusters(data, cluster.DBSCAN, 'DBSCAN', output_fol, (), {'eps': 0.025})
# plot_clusters(data, hdbscan.HDBSCAN, 'HDBSCAN', output_fol, (), {'min_cluster_size': 15})
示例8: __init__
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def __init__(self, similarity='cosine', decay_window=20, decay_alpha=0.25, clustering='dbscan', tagger='twitter', useful_tags=['Noun', 'Verb', 'Adjective', 'Determiner', 'Adverb', 'Conjunction', 'Josa', 'PreEomi', 'Eomi', 'Suffix', 'Alpha', 'Number'], delimiters=['. ', '\n', '.\n'], min_token_length=2, stopwords=stopwords_ko, no_below_word_count=2, no_above_word_portion=0.85, max_dictionary_size=None, min_cluster_size=2, similarity_threshold=0.85, matrix_smoothing=False, n_clusters=None, compactify=True, **kwargs):
self.decay_window = decay_window
self.decay_alpha = decay_alpha
if similarity == 'cosine': # very, very slow :(
self.vectorizer = DictVectorizer()
self.uniform_sim = self._sim_cosine
elif similarity == 'jaccard':
self.uniform_sim = self._sim_jaccard
elif similarity == 'normalized_cooccurrence':
self.uniform_sim = self._sim_normalized_cooccurrence
else:
raise LexRankError("available similarity functions are: cosine, jaccard, normalized_cooccurrence")
self.sim = lambda sentence1, sentence2: self.decay(sentence1, sentence2) * self.uniform_sim(sentence1, sentence2)
self.factory = SentenceFactory(tagger=tagger, useful_tags=useful_tags, delimiters=delimiters, min_token_length=min_token_length, stopwords=stopwords, **kwargs)
if clustering == 'birch':
self._birch = Birch(threshold=0.99, n_clusters=n_clusters)
self._clusterer = lambda matrix: self._birch.fit_predict(1 - matrix)
elif clustering == 'dbscan':
self._dbscan = DBSCAN()
self._clusterer = lambda matrix: self._dbscan.fit_predict(1 - matrix)
elif clustering == 'affinity':
self._affinity = AffinityPropagation()
self._clusterer = lambda matrix: self._affinity.fit_predict(1 - matrix)
elif clustering is None:
self._clusterer = lambda matrix: [0 for index in range(matrix.shape[0])]
else:
raise LexRankError("available clustering algorithms are: birch, markov, no-clustering(use `None`)")
self.no_below_word_count = no_below_word_count
self.no_above_word_portion = no_above_word_portion
self.max_dictionary_size = max_dictionary_size
self.similarity_threshold = similarity_threshold
self.min_cluster_size = min_cluster_size
self.matrix_smoothing = matrix_smoothing
self.compactify = compactify
示例9: cluster_song_in_playlist
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def cluster_song_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
"""
獲取單個歌單內的歌曲聚類信息
Args:
playlist_id: 歌單id
cluster_n:聚類數
is_detailed: 返回的結果是否包含詳情
Returns:
聚類後的列表
"""
playlist_obj = playlist_detail(playlist_id)
song_list = []
vec_list = []
song_info_dict = {}
ap_cluster = AffinityPropagation()
data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
for item in playlist_obj['tracks']:
song = item['name'].lower()
song_info_dict[song] = {
'name': song,
'artist': item['artists'][0]['name'],
'id': item['id'],
'album_img_url': item['album']['picUrl'],
'site_url': 'http://music.163.com/#/song?id=%s' % item['id']
}
# print song
if song not in song_list:
song_list.append(song)
# print self.song2vec_model.vocab.get(song)
# print self.song2vec_model.syn0norm == None
if self.song2vec_model.vocab.get(song) and len(self.song2vec_model.syn0norm):
song_vec = self.song2vec_model.syn0norm[self.song2vec_model.vocab[song].index]
else:
data_process_logger.warn(
'The song %s of playlist-%s is not in dataset' % (song, playlist_obj['name']))
song_vec = [0 for i in range(self.song2vec_model.vector_size)]
vec_list.append(song_vec)
# song_list = list(song_list)
if len(vec_list) > 1:
cluster_result = ap_cluster.fit(vec_list, song_list)
cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
for i in range(len(cluster_result.labels_)):
label = cluster_result.labels_[i]
index = i
cluster_array[label].append(song_list[i])
return cluster_array, playlist_obj['name'], song_info_dict
else:
return [song_list], playlist_obj['name'], song_info_dict
示例10: cluster_artist_in_playlist
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def cluster_artist_in_playlist(self, playlist_id, cluster_n=5, is_detailed=False):
"""
獲取單個歌單內的歌手聚類信息
Args:
playlist_id: 歌單id
cluster_n:聚類數
is_detailed: 是否包含詳情信息
Returns:
聚類後的列表
"""
playlist_obj = playlist_detail(playlist_id)
artist_list = []
vec_list = []
ap_cluster = AffinityPropagation()
data_process_logger.info('clustering playlist: %s' % playlist_obj['name'])
for item in playlist_obj['tracks']:
artist = item['artists'][0]['name'].lower()
# print artist
if artist not in artist_list:
artist_list.append(artist)
# print self.song2vec_model.vocab.get(artist)
# print self.song2vec_model.syn0norm == None
if self.artist2vec_model.vocab.get(artist) and len(self.artist2vec_model.syn0norm):
artist_vec = self.artist2vec_model.syn0norm[self.artist2vec_model.vocab[artist].index]
else:
data_process_logger.warn(
'The artist %s of playlist-%s is not in dataset' % (artist, playlist_obj['name']))
artist_vec = [0 for i in range(self.artist2vec_model.vector_size)]
vec_list.append(artist_vec)
# artist_list = list(artist_list)
# vec_list = list(vec_list)
if len(vec_list) > 1:
cluster_result = ap_cluster.fit(vec_list, artist_list)
cluster_array = [[] for i in range(len(cluster_result.cluster_centers_indices_))]
for i in range(len(cluster_result.labels_)):
label = cluster_result.labels_[i]
index = i
cluster_array[label].append(artist_list[i])
return cluster_array, playlist_obj['name'], {}
else:
return [artist_list], playlist_obj['name'], {}
示例11: build_groups2
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def build_groups2(nouns):
print('building groups', time.strftime("%H:%M:%S"))
all_senses = set()
sense_word_map = {}
for noun in nouns:
senses = wordnet.synsets(noun, pos='n')
all_senses.update(senses)
for sense in senses:
if sense.name() not in sense_word_map:
sense_word_map[sense.name()] = []
sense_word_map[sense.name()].append(noun)
all_senses = list(all_senses)
all_senses_names = [sense.name() for sense in all_senses]
print('number of senses:', len(all_senses))
sense_similarity_matrix, sense_similarity_matrix_columns =\
get_sense_similarity_submatrix(all_senses_names)
print('submatrix ready', time.strftime("%H:%M:%S"))
# affinity_propagation = AffinityPropagation()
# labels1 = affinity_propagation.fit_predict(sense_similarity_matrix)
# print('affinity propagation ready', time.strftime("%H:%M:%S"))
grouper = BaumanSensesGrouper(sense_similarity_matrix, 0.7)
groups = grouper.group_senses()
print('groups')
# print(groups)
new_groups = []
for group in groups:
new_group = set()
for element in group:
sense_name = sense_similarity_matrix_columns[element]
new_group.add(sense_name)
new_groups.append(new_group)
print('finished groups', time.strftime("%H:%M:%S"))
# print(groups)
# print(new_groups)
print('num groups: %d' % len(groups))
sense_groups = []
for group in new_groups:
sense_group = SenseGroup(group)
for sense in sense_group.senses:
sense_group.nouns |= set(sense_word_map[sense])
sense_groups.append(sense_group)
return sense_groups
示例12: evaluate_clustering
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def evaluate_clustering():
similarity_matrix = get_sense_similarity_submatrix(range(10000))
matrix_size = len(similarity_matrix)
print('got matrix')
affinity_propagation = AffinityPropagation()
labels1 = affinity_propagation.fit_predict(similarity_matrix)
print('affinity propagation')
dbscan = DBSCAN(min_samples=1)
labels2 = dbscan.fit_predict(similarity_matrix)
print('print dbscan')
distance_matrix = np.ndarray((matrix_size, matrix_size))
for i in range(matrix_size):
for j in range(matrix_size):
distance_matrix[i, j] = 1 - similarity_matrix[i, j]
print(distance_matrix[1, 2])
print(distance_matrix[1, 1])
print('created distance matrix')
cluster_map1 = cluster_evaluation.fpena_get_clusters(labels1)
cluster_map2 = cluster_evaluation.fpena_get_clusters(labels2)
print(cluster_map1)
print(cluster_map2)
sc1 = sklearn.metrics.silhouette_score(distance_matrix, labels1, metric='euclidean')
sc2 = sklearn.metrics.silhouette_score(distance_matrix, labels2, metric='euclidean')
sc5 = cluster_evaluation.fpena_evaluate(cluster_map1, distance_matrix)
sc6 = cluster_evaluation.fpena_evaluate(cluster_map2, distance_matrix)
num_elements1 = [len(values) for values in cluster_map1.values()]
num_elements2 = [len(values) for values in cluster_map2.values()]
print(num_elements1)
print(num_elements2)
print('Number of clusters Affinity Propagation: %f' % len(cluster_map1))
print('Number of clusters DBSCAN: %f' % len(cluster_map2))
print('Average elements per cluster Affinity Propagation: %f' % np.mean(num_elements1))
print('Average elements per cluster DBSCAN: %f' % np.mean(num_elements2))
print('Standard deviation per cluster Affinity Propagation: %f' % np.std(num_elements1))
print('Standard deviation per cluster DBSCAN: %f' % np.std(num_elements2))
print('Silouhette score Affinity Propagation (distance matrix): %f' % sc1)
print('Silouhette score DBSCAN (distance matrix): %f' % sc2)
print('Dunn index Affinity Propagation (distance matrix): %f' % sc5)
print('Dunn index DBSCAN (distance matrix): %f' % sc6)
# start = time.time()
# main()
# evaluate_clustering()
# get_similarity_submatrix()
# end = time.time()
# total_time = end - start
# print("Total time = %f seconds" % total_time)
示例13: exposons_from_sasas
# 需要導入模塊: from sklearn import cluster [as 別名]
# 或者: from sklearn.cluster import AffinityPropagation [as 別名]
def exposons_from_sasas(sasas, damping, weights, threshold):
"""Compute exposons for an MDTraj trajectory.
This function is a convenience wrapper to compute exposons using other
functions already existing in MDTraj, sklearn, and elsewhere in enspara.
Parameters
----------
sasas: np.ndarray, shape=(n_conformations, n_sidechains)
SASAs to use in the calculations.
damping: float
Damping parameter to use for affinity propagation. Goes from 0.5
to <1.0. Empirically, values between 0.85 and 0.95 tend to work best.
weights: ndarray, shape=(len(trj),), default=None
Weight of each frame in the simulation for the mutual information
calculation. Useful if `trj` represents cluster centers of an MSM
rather than a full trajectory. If None, frames will be weighted
equally.
threshold: float, default=0.02
Sidechains with greater than this amount of total SASA will count
as exposed for the purposes of the exposed/buried dichotomy used
in mutual information calculations.
Returns
-------
sasa_mi: np.ndarray, shape=(n_res, n_res)
Mutual information of each sidchain with each other sidechain
computed for the purposes of clustering exposons.
exposons: np.ndarray, shape=(n_res,)
Assignment of residues to exposons. Residues in the same exposon
share the same number in this array.
"""
sasa_mi = weighted_mi(sasas > threshold, weights)
c = AffinityPropagation(
damping=damping,
affinity='precomputed',
preference=0,
max_iter=10000)
c.fit(sasa_mi)
return sasa_mi, c.labels_