本文整理汇总了Python中sklearn.cluster.WardAgglomeration类的典型用法代码示例。如果您正苦于以下问题:Python WardAgglomeration类的具体用法?Python WardAgglomeration怎么用?Python WardAgglomeration使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了WardAgglomeration类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_ward_agglomeration
def test_ward_agglomeration():
"""
Check that we obtain the correct solution in a simplistic case
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(50, 100)
connectivity = grid_to_graph(*mask.shape)
assert_warns(DeprecationWarning, WardAgglomeration)
with warnings.catch_warnings(record=True) as warning_list:
warnings.simplefilter("always", DeprecationWarning)
if hasattr(np, 'VisibleDeprecationWarning'):
# Let's not catch the numpy internal DeprecationWarnings
warnings.simplefilter('ignore', np.VisibleDeprecationWarning)
ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
ward.fit(X)
assert_equal(len(warning_list), 1)
agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
agglo.fit(X)
assert_array_equal(agglo.labels_, ward.labels_)
assert_true(np.size(np.unique(agglo.labels_)) == 5)
X_red = agglo.transform(X)
assert_true(X_red.shape[1] == 5)
X_full = agglo.inverse_transform(X_red)
assert_true(np.unique(X_full[0]).size == 5)
assert_array_almost_equal(agglo.transform(X_full), X_red)
# Check that fitting with no samples raises a ValueError
assert_raises(ValueError, agglo.fit, X[:0])
示例2: test_ward_agglomeration
def test_ward_agglomeration():
"""
Check that we obtain the correct solution in a simplistic case
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(50, 100)
connectivity = grid_to_graph(*mask.shape)
assert_warns(DeprecationWarning, WardAgglomeration)
with ignore_warnings():
ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
ward.fit(X)
agglo = FeatureAgglomeration(n_clusters=5, connectivity=connectivity)
agglo.fit(X)
assert_array_equal(agglo.labels_, ward.labels_)
assert_true(np.size(np.unique(agglo.labels_)) == 5)
X_red = agglo.transform(X)
assert_true(X_red.shape[1] == 5)
X_full = agglo.inverse_transform(X_red)
assert_true(np.unique(X_full[0]).size == 5)
assert_array_almost_equal(agglo.transform(X_full), X_red)
# Check that fitting with no samples raises a ValueError
assert_raises(ValueError, agglo.fit, X[:0])
示例3: prepare_data
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100):
# data preparation
Z = nifti_masker.fit_transform(imgs)
pca = RandomizedPCA(n_components=n_components)
Z_ = pca.fit_transform(Z.T).T
ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
memory='nilearn_cache').fit(Z_)
W = ward.transform(Z)
del Z
# data cube is a more convenient representation
cube = np.array([W[subject_label == subject]
for subject in np.arange(n_subjects)])
# parcel connectivity
parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward)
return cube, ward, parcel_connectivity
示例4: test_ward_agglomeration
def test_ward_agglomeration():
"""
Check that we obtain the correct solution in a simplistic case
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(50, 100)
connectivity = grid_to_graph(*mask.shape)
ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
ward.fit(X)
assert_true(np.size(np.unique(ward.labels_)) == 5)
Xred = ward.transform(X)
assert_true(Xred.shape[1] == 5)
Xfull = ward.inverse_transform(Xred)
assert_true(np.unique(Xfull[0]).size == 5)
示例5: NiftiMasker
masker = NiftiMasker(mask_strategy='epi',
mask_args=dict(opening=8))
masker.fit(pet_files)
pet_masked = masker.transform_niimgs(pet_files, n_jobs=2)
#pet_masked = np.vstack(pet_masked)
mask = masker.mask_img_.get_data().astype(np.bool)
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
n_z=shape[2], mask=mask)
# Computing the ward for the first time, this is long...
start = time.time()
ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity,
memory='nilearn_cache')
ward.fit(pet_masked[0])
print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)
labels = ward.labels_ + 1
labels_img = masker.inverse_transform(labels)
first_plot = plot_roi(labels_img, pet_img[0], title="Ward parcellation",
display_mode='xz')
# labels_img is a Nifti1Image object, it can be saved to file with the
# following code:
labels_img.to_filename('parcellation.nii')
"""
##################################################################
示例6: BMA_consensus_cluster_parallel
def BMA_consensus_cluster_parallel(cfg, remote_path, remote_BOLD_fn, remote_mask_fn, Y, nifti_masker, \
num_vox, K_clus, K_clusters, \
parc, alpha, prop, nbItRFIR, onsets, durations,\
output_sub_parc, rescale=True, averg_bold=False):
'''
Performs all steps for one clustering case (Kclus given, number l of the parcellation given)
remote_path: path on the cluster, where results will be stored
'''
import os
import sys
sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_BB/Parcellations/")
sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/")
sys.path.append("/home/pc174679/pyhrf/pyhrf-tree_trunk/script/WIP/Scripts_IRMf_Adultes_Solv/Scripts_divers_utiles/Scripts_utiles/")
sys.path.append('/home/pc174679/local/installations/consensus-cluster-0.6')
from Random_parcellations import random_parcellations, subsample_data_on_time
from Divers_parcellations_test import *
from RFIR_evaluation_parcellations import JDE_estim, RFIR_estim, clustering_from_RFIR
from Random_parcellations import hrf_roi_to_vox
from pyhrf.tools._io import remote_copy, remote_mkdir
from nisl import io
#nifti_masker.mask=remote_mask_fn
# Creation of the necessary paths --> do not do here
parc_name = 'Subsampled_data_with_' + str(K_clus) + 'clusters'
parc_name_clus = parc_name + 'rnd_number_' + str(parc+1)
remote_sub = os.sep.join((remote_path, parc_name))
#if not os.path.exists(remote_sub):
#os.path.exists(remote_sub)
#print 'remote_sub:', remote_sub
#os.makedirs(remote_sub)
remote_sub_parc = os.sep.join((remote_sub,parc_name_clus))
#if not os.path.exists(remote_sub_parc):
#os.makedirs(remote_sub_parc)
output_RFIR_parc = os.sep.join((output_sub_parc,'RFIR_estim'))
###################################
## 1st STEP: SUBSAMPLING
print '--- Subsample data ---'
Ysub = subsample_data_on_time(Y, remote_mask_fn, K_clus, alpha, prop, \
nifti_masker, rescale=rescale)
print 'Ysub:', Ysub
print 'remote_sub_prc:', remote_sub_parc
Ysub_name = 'Y_sub_'+ str(K_clus) + 'clusters_' + 'rnd_number_' + str(parc+1) +'.nii'
Ysub_fn = os.sep.join((remote_sub_parc, Ysub_name))
Ysub_masked = nifti_masker.inverse_transform(Ysub).get_data()
write_volume(Ysub_masked, Ysub_fn)
###################################
## 2D STEP: RFIR
print '--- Performs RFIR estimation ---'
remote_RFIR_parc_clus = os.sep.join((remote_sub_parc, 'RFIR_estim'))
#if not os.path.exists(remote_RFIR_parc):os.makedirs(remote_RFIR_parc)
#remote_RFIR_parc_clus = os.sep.join((remote_RFIR_parc, parc_name_clus))
#if not os.path.exists(remote_RFIR_parc_clus):os.makedirs(remote_RFIR_parc_clus)
print ' * output path for RFIR ', remote_RFIR_parc_clus
print ' * RFIR for subsampling nb ', str(parc+1), ' with ', K_clus, ' clusters'
RFIR_estim(nbItRFIR, onsets, durations, Ysub_fn, remote_mask_fn, \
remote_RFIR_parc, avg_bold=averg_bold)
hrf_fn = os.sep.join((remote_RFIR_parc_clus, 'rfir_ehrf.nii'))
#remote_copy([hrf_fn], remote_host,
#remote_user, remote_path)[0]
###################################
## 3D STEP: CLUSTERING FROM RFIR RESULTS
name_hrf = 'rfir_ehrf.nii'
from pyhrf.tools._io import write_volume, read_volume
from pyhrf.tools._io import read_volume, write_volume
import nisl.io as ionisl
from sklearn.feature_extraction import image
from sklearn.cluster import WardAgglomeration
from scipy.spatial.distance import cdist, pdist
hrf_fn = os.sep.join((remote_RFIR_parc_clus,name_hrf))
hrf=read_volume(hrf_fn)[0]
hrf_t_fn = add_suffix(hrf_fn, 'transpose')
#taking only 1st condition to parcellate
write_volume(hrf[:,:,:,:,0], hrf_t_fn)
nifti_masker = ionisl.NiftiMasker(remote_mask_fn)
Nm = nifti_masker.fit(hrf_t_fn)
#features: coeff of the HRF
HRF = Nm.fit_transform(hrf_t_fn)
mask, meta_data = read_volume(remote_mask_fn)
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
#.........这里部分代码省略.........
示例7: WardAgglomeration
### Mask ######################################################################
fmri_data = dataset.func[0]
# Compute a brain mask
from nisl import masking
mask = masking.compute_mask(fmri_data)
# Mask data: go from a 4D dataset to a 2D dataset with only the voxels
# in the mask
fmri_masked = fmri_data[mask]
### Ward ######################################################################
# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=5000, connectivity=connectivity) # memory='nisl_cache')
ward.fit(fmri_masked.T)
print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start)
示例8: Memory
# Perform parcellation on smoothed PCA-ed timecourses for each ROI
mem = Memory(cachedir=".", verbose=1)
n_clust = np.zeros(n_rois) # Different #clusters for different ROI
template = np.zeros((dim[0], dim[1], dim[2]))
print ("Performing Ward Clustering")
for i in np.arange(n_rois):
# Determine the number of clusters to divide each ROI into
roi_mask = brain == rois[i]
n_clust[i] = np.round(np.sum(roi_mask) * n_parcels / n_vox)
if n_clust[i] <= 1:
template[roi_mask] = np.shape(np.unique(template))[0]
else:
# Define connectivity based on brain mask
A = grid_to_graph(n_x=dim[0], n_y=dim[1], n_z=dim[2], mask=roi_mask)
# Create ward object
ward = WardAgglomeration(n_clusters=n_clust[i], connectivity=A.tolil(), memory=mem)
ward.fit(tc_group[roi_mask.ravel(), :].T)
template[roi_mask] = ward.labels_ + np.shape(np.unique(template))[0]
# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
label = np.unique(template)
for sub in subList:
print str("Subject" + sub)
# Load preprocessed voxel timecourses
tc = io.loadmat(os.path.join(BASE_DIR, sub, "restfMRI/tc_vox.mat"))
tc = tc["tc"]
# Generate subject-specific tissue mask
gm_file = os.path.join(BASE_DIR, sub, "anat", "gmMask.nii")
示例9: gaussian_filter
# Spatial smoothing to encourage smooth parcels
dim = np.shape(brain)
tc = tc.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc.shape[-1]
for t in np.arange(n_tpts):
tc[:, :, :, t] = gaussian_filter(tc[:, :, :, t], sigma=1)
tc = tc.reshape((-1, n_tpts))
tc = tc[brain.ravel() == 1, :]
# Functional parcellation with Ward clustering
print("Performing Ward Clustering")
mem = Memory(cachedir=".", verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=n_parcels, connectivity=A.tolil(), memory=mem)
ward.fit(tc.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain == 1] = ward.labels_ + 1 # labels start from 0, which is used for background
# Remove single voxels not connected to parcel
# for i in np.unique(template)[1:]:
# labels, n_labels = label(template == i, structure=np.ones((3,3,3)))
# if n_labels > 1:
# for j in np.arange(n_labels):
# if np.sum(labels == j + 1) < 10:
# template[labels == j + 1] = 0
# Saving the template
nii = nib.Nifti1Image(template, brain_img.affine)
nib.save(nii, PARCEL_PATH)
示例10: classify
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
"""
Given the predictors and labels, performs single-class
classification with the given classifier using n-fold
c.v. Constructs a OvO classifier for every pair of terms.
Parameters
-----------
x : `numpy.ndarray`
(n_samples x n_features) array of features
y : `numpy.ndarray`
(1 x n_samples) array of labels
classifier : str, optional
which classifier model to use. Must be one of 'naive_bayes'| 'svm' | 'logistic_regression' | 'ensemble'.
Defaults to the original naive_bayes.
clustering : bool, optional
whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
value. Defaults to True.
n_folds : int
the number of fold of cv
Returns
-------
accuracy : `numpy.ndarray`
The results are stored as a list of confusion matrices for each fold and saved
as a numpy array of arrays, for further analysis.
"""
clf = None
ward = None
le = preprocessing.LabelEncoder()
le.fit(y)
y_new = le.transform(y)
# choose and assign appropriate classifier
classifier_dict = { 'naive_bayes' : MultinomialNB(),
'logistic_regression' : LogisticRegression(penalty='l2'),
'svm' : GridSearchCV(LinearSVC(), [{'C': [1, 10, 100, 1000]}])
}
if classifier == 'ensemble':
clf_nb = classifier_dict['naive_bayes']
clf_svm = classifier_dict['svm']
clf_lr = classifier_dict['logistic_regression']
else:
clf = classifier_dict[classifier]
# perform ward clustering if specified
if clustering:
mask = np.load('data/2mm_brain_mask.npy')
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
# actual cross validation
kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
accuracy = []
for train, test in kf:
x_train = x[train]
y_train = y_new[train]
x_test = x[test]
y_test = y_new[test]
if clustering:
ward.fit(x_train)
x_train = ward.transform(x_train)
x_test = ward.transform(x_test)
if classifier != 'ensemble':
predicted = clf.fit(x_train, y_train).predict(x_test)
else:
predicted_nb = clf_nb.fit(x_train, y_train).predict(x_test)
predicted_lr = clf_lr.fit(x_train, y_train).predict(x_test)
predicted_svm = clf_svm.fit(x_train, y_train).predict(x_test)
predicted = predicted_nb + predicted_lr + predicted_svm
predicted = np.array(predicted >= 2, dtype=int)
conf_mat = confusion_matrix(y_test, predicted, labels=[0,1])
accuracy.append(conf_mat)
return accuracy
示例11: classify
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
"""
Given the predictors and labels, performs multi-label
classification with the given classifier using n-fold
c.v. Constructs a OvR classifier for multilabel prediction.
Parameters
-----------
x : `numpy.ndarray`
(n_samples x n_features) array of features
y : `numpy.ndarray`
(n_samples x n_labels) array of labels
classifier : str, optional
which classifier model to use. Must be one of 'naive_bayes'| 'decision_tree' | 'logistic_regression'.
Defaults to the original naive_bayes.
clustering : bool, optional
whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
value. Defaults to True.
n_folds : int
the number of fold of cv
Returns
-------
score_per_label, score_per_class : tuple
The results are stored as a tuple of two dicts, with the keywords specifying the metrics.
"""
clf = None
ward = None
lb = preprocessing.LabelBinarizer()
y_new = lb.fit_transform(y)
#specify connectivity for clustering
mask = nb.load('data/MNI152_T1_2mm_brain.nii.gz').get_data().astype('bool')
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
# choose and assign appropriate classifier
classifier_dict = { 'naive_bayes' : OneVsRestClassifier(MultinomialNB()),
'logistic_regression' : OneVsRestClassifier(LogisticRegression(penalty='l2')),
'decision_tree' : tree.DecisionTreeClassifier()
}
clf = classifier_dict[classifier]
kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
score_per_class = []
score_per_label = []
for train, test in kf:
x_train = np.ascontiguousarray(x[train])
y_train = np.ascontiguousarray(y_new[train])
x_test = np.ascontiguousarray(x[test])
y_test = np.ascontiguousarray(y_new[test])
if clustering:
ward.fit(x_train)
x_train = ward.transform(x_train)
x_test = ward.transform(x_test)
model = clf.fit(x_train, y_train)
predicted = model.predict(x_test)
predict_prob = model.predict_proba(x_test)
if isinstance(predict_prob, list):
predict_prob = np.array(predict_prob)
cls_scores = utils.score_results(y_test, predicted, predict_prob)
label_scores = utils.label_scores(y_test, predicted, predict_prob)
score_per_class.append(cls_scores)
score_per_label.append(label_scores)
return (score_per_class,score_per_label)
示例12: WardAgglomeration
# Mask data
epi_masked = epi_img[mask]
### Ward ######################################################################
# Compute connectivity matrix
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
n_z=shape[2], mask=mask)
# Computing the ward for the first time, this is long...
from sklearn.cluster import WardAgglomeration
import time
start = time.time()
ward = WardAgglomeration(n_clusters=500, connectivity=connectivity,
memory='nisl_cache')
ward.fit(epi_masked.T)
print "Ward agglomeration 500 clusters: %.2fs" % (time.time() - start)
# Compute the ward with more clusters, should be faster
start = time.time()
ward = WardAgglomeration(n_clusters=1000, connectivity=connectivity,
memory='nisl_cache')
ward.fit(epi_masked.T)
print "Ward agglomeration 1000 clusters: %.2fs" % (time.time() - start)
### Prepare output ############################################################
### Show result ###############################################################
from matplotlib import pyplot as pl
示例13: print
else:
tc_group = np.hstack((tc_group, preprocessing.standardize(pca.transform(tc.T))))
print("Concatenating subject" + sub + "'s timecourses")
#io.savemat(os.path.join(BASE_DIR, "group/tc_rest_pca_vox.mat"), {"tc_group": tc_group})
# Perform parcellation on PCA-ed timecourses
brain_img = as_volume_img("/volatile/bernardng/templates/spm8/rgrey.nii")
brain = brain_img.get_data()
dim = np.shape(brain)
brain = brain > 0.2 # Generate brain mask
brain = mask_utils.largest_cc(brain)
mem = Memory(cachedir='.', verbose=1)
# Define connectivity based on brain mask
A = grid_to_graph(n_x=brain.shape[0], n_y=brain.shape[1], n_z=brain.shape[2], mask=brain)
# Create ward object
ward = WardAgglomeration(n_clusters=500, connectivity=A, memory=mem)
tc_group = tc_group.reshape((dim[0], dim[1], dim[2], -1))
n_tpts = tc_group.shape[-1]
for t in np.arange(n_tpts):
tc_group[:,:,:,t] = gaussian_filter(tc_group[:,:,:,t], sigma=5)
tc_group = tc_group.reshape((-1, n_tpts))
tc_group = tc_group[brain.ravel()==1, :]
print("Performing Ward Clustering")
ward.fit(tc_group.T)
template = np.zeros((dim[0], dim[1], dim[2]))
template[brain==1] = ward.labels_ + 1 # Previously processed data did not include +1
# Remove parcels with zero timecourses in any of the subjects
template = template.ravel()
template_refined = template.copy()
label = np.unique(template)
示例14: WardAgglomeration
pl.figure(figsize=(3.8, 4.5))
pl.axes([0, 0, 1, 1])
pl.imshow(colors[np.rot90(cut)], interpolation='nearest')
pl.axis('off')
# Compute connectivity matrix: which voxel is connected to which
from sklearn.feature_extraction import image
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
n_z=shape[2], mask=mask)
for n_clusters in 100, 1000:
# Compute Ward clustering
from sklearn.cluster import WardAgglomeration
ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
memory='nilearn_cache', compute_full_tree=True)
ward.fit(X)
labels = ward.labels_ + 1
labels = masking.unmask(labels, adhd_mask)
# 0 is the background, putting it to -1
labels = labels - 1
# Display the labels
plot_labels(labels, 8)
pl.savefig(join('clustering', 'ward_%i.eps' % n_clusters))
pl.savefig(join('clustering', 'ward_%i.pdf' % n_clusters))
# Compute Kmeans clustering
from sklearn.cluster import MiniBatchKMeans
示例15: WardAgglomeration
"""
Test various n_clusters
"""
for N_CLUSTERS in N_CLUSTERS_SET:
##############################################################################
# Ward
##############################################################################
mask = masker.mask_img_.get_data().astype(np.bool)
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
n_z=shape[2], mask=mask)
# Computing the ward for the first time, this is long...
ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity,
memory='nilearn_cache')
ward.fit(pet_data_masked)
ward_labels_unique = np.unique(ward.labels_)
ward_labels = ward.labels_
##############################################################################
# Generate cluster matrix
##############################################################################
x = np.zeros((len(data), N_CLUSTERS))
for idx in np.arange(len(data)):
for val in ward_labels_unique :
ind = (ward_labels == val)
x[idx, val] = np.mean(pet_data_masked[idx, ind])