本文整理汇总了Python中sklearn.cluster.WardAgglomeration.transform方法的典型用法代码示例。如果您正苦于以下问题:Python WardAgglomeration.transform方法的具体用法?Python WardAgglomeration.transform怎么用?Python WardAgglomeration.transform使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类sklearn.cluster.WardAgglomeration
的用法示例。
在下文中一共展示了WardAgglomeration.transform方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_ward_agglomeration
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def test_ward_agglomeration():
"""
Check that we obtain the correct solution in a simplistic case
"""
rnd = np.random.RandomState(0)
mask = np.ones([10, 10], dtype=np.bool)
X = rnd.randn(50, 100)
connectivity = grid_to_graph(*mask.shape)
ward = WardAgglomeration(n_clusters=5, connectivity=connectivity)
ward.fit(X)
assert_true(np.size(np.unique(ward.labels_)) == 5)
Xred = ward.transform(X)
assert_true(Xred.shape[1] == 5)
Xfull = ward.inverse_transform(Xred)
assert_true(np.unique(Xfull[0]).size == 5)
assert_array_almost_equal(ward.transform(Xfull), Xred)
示例2: prepare_data
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def prepare_data(imgs, connectivity, mask, n_clusters=5000, n_components=100):
# data preparation
Z = nifti_masker.fit_transform(imgs)
pca = RandomizedPCA(n_components=n_components)
Z_ = pca.fit_transform(Z.T).T
ward = WardAgglomeration(n_clusters=n_clusters, connectivity=connectivity,
memory='nilearn_cache').fit(Z_)
W = ward.transform(Z)
del Z
# data cube is a more convenient representation
cube = np.array([W[subject_label == subject]
for subject in np.arange(n_subjects)])
# parcel connectivity
parcel_connectivity = do_parcel_connectivity(mask, n_clusters, ward)
return cube, ward, parcel_connectivity
示例3: representation
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
first_epi = nifti_masker.inverse_transform(fmri_masked[0]).get_data()
first_epi = np.ma.masked_array(first_epi, first_epi == 0)
# Outside the mask: a uniform value, smaller than inside the mask
first_epi[np.logical_not(mask)] = 0.9 * first_epi[mask].min()
vmax = first_epi[..., 20].max()
vmin = first_epi[..., 20].min()
pl.imshow(np.rot90(first_epi[..., 20]),
interpolation='nearest', cmap=pl.cm.spectral, vmin=vmin, vmax=vmax)
pl.axis('off')
pl.title('Original (%i voxels)' % fmri_masked.shape[1])
# A reduced data can be create by taking the parcel-level average:
# Note that, as many objects in the scikit-learn, the ward object exposes
# a transform method that modifies input features. Here it reduces their
# dimension
fmri_reduced = ward.transform(fmri_masked)
# Display the corresponding data compressed using the parcellation
fmri_compressed = ward.inverse_transform(fmri_reduced)
compressed = nifti_masker.inverse_transform(
fmri_compressed[0]).get_data()
compressed = np.ma.masked_equal(compressed, 0)
pl.figure()
pl.imshow(np.rot90(compressed[:, :, 20]),
interpolation='nearest', cmap=pl.cm.spectral, vmin=vmin, vmax=vmax)
pl.title('Compressed representation (2000 parcels)')
pl.axis('off')
pl.show()
示例4:
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
labels[mask] = ward.labels_
cut = labels[:, :, 20].astype(np.int)
colors = np.random.random(size=(ward.n_clusters + 1, 3))
colors[-1] = 0
pl.axis('off')
pl.imshow(colors[cut], interpolation='nearest')
pl.title('Ward parcellation')
# Display the original data
pl.figure()
first_epi_img = epi_img[..., 0].copy()
first_epi_img[np.logical_not(mask)] = 0
pl.imshow(first_epi_img[..., 20], interpolation='nearest',
cmap=pl.cm.spectral)
pl.axis('off')
pl.title('Original')
# Display the corresponding data compressed using the parcellation
X_r = ward.transform(epi_masked.T)
X_c = ward.inverse_transform(X_r)
compressed_img = np.zeros(mask.shape)
compressed_img[mask] = X_c[0]
pl.figure()
pl.imshow(compressed_img[:, :, 20], interpolation='nearest',
cmap=pl.cm.spectral)
pl.title('Compressed representation')
pl.axis('off')
pl.show()
示例5: feature_extractor
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def feature_extractor(imgfile, maskfile, featurefile, maskerfile, wardfile, nclusters=[1000,], selectfile=None, targetfile=None, metafile=None, cachefile=None):
resultdict = {"imgfile":imgfile, "maskfile":maskfile}
# load data
print "--loading data"
nifti_masker = input_data.NiftiMasker(mask=maskfile, memory=cachefile, memory_level=1,
standardize=False)
fmri_masked = nifti_masker.fit_transform(imgfile)
print "--getting mask"
mask = nifti_masker.mask_img_.get_data().astype(np.bool)
# saveit
joblib.dump(nifti_masker, maskerfile)
resultdict["mask"] = mask
resultdict["Xmask"] = fmri_masked
resultdict["maskerfile"] = maskerfile
# get connectivity
print "--getting connectivity"
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1],
n_z=shape[2], mask=mask)
# saveit
resultdict["connectivity"] = connectivity
print "--save main file"
np.savez(featurefile+"_main.npz", **resultdict)
# run ward
y = np.load(targetfile)["ymap"]
meta = np.load(metafile)
train = meta["train"]
test = meta["test"]
ncv = meta['ycv']
# for each cv set
for cvx in range(ncv):
trainidx = train[cvx]
testidx = test[cvx]
resultdict = {}
wardfiles = []
selectfiles = []
print "--Running ward %d"%(cvx, )
for ix, nc in enumerate(nclusters):
ward = WardAgglomeration(n_clusters=nc, connectivity=connectivity, memory=cachefile)
ward.fit(fmri_masked[trainidx])
fmri_reduced_train = ward.transform(fmri_masked[trainidx])
fmri_reduced_test = ward.transform(fmri_masked[testidx])
# saveit
subwardfile = wardfile+"_D%d_cv%d.pkl"%(nc, cvx,)
joblib.dump(ward, subwardfile)
resultdict["Xward_%d_train"%(nc,)] = fmri_reduced_train
resultdict["Xward_%d_test"%(nc,)] = fmri_reduced_test
wardfiles.append(subwardfile)
# additional feature selection
selector = SelectPercentile(f_classif, percentile=30)
selector.fit(fmri_reduced_train, y[trainidx])
fmri_select_train = selector.transform(fmri_reduced_train)
fmri_select_test = selector.transform(fmri_reduced_test)
# saveit
subselectfile = selectfile+"_D%d_cv%d.pkl"%(nc, cvx,)
joblib.dump(selector, subselectfile)
resultdict["Xselect_%d_train"%(nc,)] = fmri_select_train
resultdict["Xselect_%d_test"%(nc,)] = fmri_select_test
selectfiles.append(subselectfile)
resultdict["wardfiles"] = wardfiles
resultdict["selectfiles"] = selectfiles
# save results
print "--save cv result"
np.savez(featurefile+"_cv%d.npz"%(cvx, ), **resultdict)
示例6: classify
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
"""
Given the predictors and labels, performs single-class
classification with the given classifier using n-fold
c.v. Constructs a OvO classifier for every pair of terms.
Parameters
-----------
x : `numpy.ndarray`
(n_samples x n_features) array of features
y : `numpy.ndarray`
(1 x n_samples) array of labels
classifier : str, optional
which classifier model to use. Must be one of 'naive_bayes'| 'svm' | 'logistic_regression' | 'ensemble'.
Defaults to the original naive_bayes.
clustering : bool, optional
whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
value. Defaults to True.
n_folds : int
the number of fold of cv
Returns
-------
accuracy : `numpy.ndarray`
The results are stored as a list of confusion matrices for each fold and saved
as a numpy array of arrays, for further analysis.
"""
clf = None
ward = None
le = preprocessing.LabelEncoder()
le.fit(y)
y_new = le.transform(y)
# choose and assign appropriate classifier
classifier_dict = { 'naive_bayes' : MultinomialNB(),
'logistic_regression' : LogisticRegression(penalty='l2'),
'svm' : GridSearchCV(LinearSVC(), [{'C': [1, 10, 100, 1000]}])
}
if classifier == 'ensemble':
clf_nb = classifier_dict['naive_bayes']
clf_svm = classifier_dict['svm']
clf_lr = classifier_dict['logistic_regression']
else:
clf = classifier_dict[classifier]
# perform ward clustering if specified
if clustering:
mask = np.load('data/2mm_brain_mask.npy')
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
# actual cross validation
kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
accuracy = []
for train, test in kf:
x_train = x[train]
y_train = y_new[train]
x_test = x[test]
y_test = y_new[test]
if clustering:
ward.fit(x_train)
x_train = ward.transform(x_train)
x_test = ward.transform(x_test)
if classifier != 'ensemble':
predicted = clf.fit(x_train, y_train).predict(x_test)
else:
predicted_nb = clf_nb.fit(x_train, y_train).predict(x_test)
predicted_lr = clf_lr.fit(x_train, y_train).predict(x_test)
predicted_svm = clf_svm.fit(x_train, y_train).predict(x_test)
predicted = predicted_nb + predicted_lr + predicted_svm
predicted = np.array(predicted >= 2, dtype=int)
conf_mat = confusion_matrix(y_test, predicted, labels=[0,1])
accuracy.append(conf_mat)
return accuracy
示例7: classify
# 需要导入模块: from sklearn.cluster import WardAgglomeration [as 别名]
# 或者: from sklearn.cluster.WardAgglomeration import transform [as 别名]
def classify(x, y, classifier='naive_bayes', clustering=True, n_folds=10):
"""
Given the predictors and labels, performs multi-label
classification with the given classifier using n-fold
c.v. Constructs a OvR classifier for multilabel prediction.
Parameters
-----------
x : `numpy.ndarray`
(n_samples x n_features) array of features
y : `numpy.ndarray`
(n_samples x n_labels) array of labels
classifier : str, optional
which classifier model to use. Must be one of 'naive_bayes'| 'decision_tree' | 'logistic_regression'.
Defaults to the original naive_bayes.
clustering : bool, optional
whether to do Ward clustering or not. Uses n_clusters = 10,000. Change global N_CLUSTERS for different
value. Defaults to True.
n_folds : int
the number of fold of cv
Returns
-------
score_per_label, score_per_class : tuple
The results are stored as a tuple of two dicts, with the keywords specifying the metrics.
"""
clf = None
ward = None
lb = preprocessing.LabelBinarizer()
y_new = lb.fit_transform(y)
#specify connectivity for clustering
mask = nb.load('data/MNI152_T1_2mm_brain.nii.gz').get_data().astype('bool')
shape = mask.shape
connectivity = image.grid_to_graph(n_x=shape[0], n_y=shape[1], n_z=shape[2], mask=mask)
ward = WardAgglomeration(n_clusters=N_CLUSTERS, connectivity=connectivity)
# choose and assign appropriate classifier
classifier_dict = { 'naive_bayes' : OneVsRestClassifier(MultinomialNB()),
'logistic_regression' : OneVsRestClassifier(LogisticRegression(penalty='l2')),
'decision_tree' : tree.DecisionTreeClassifier()
}
clf = classifier_dict[classifier]
kf = cross_validation.KFold(len(y_new), n_folds=n_folds)
score_per_class = []
score_per_label = []
for train, test in kf:
x_train = np.ascontiguousarray(x[train])
y_train = np.ascontiguousarray(y_new[train])
x_test = np.ascontiguousarray(x[test])
y_test = np.ascontiguousarray(y_new[test])
if clustering:
ward.fit(x_train)
x_train = ward.transform(x_train)
x_test = ward.transform(x_test)
model = clf.fit(x_train, y_train)
predicted = model.predict(x_test)
predict_prob = model.predict_proba(x_test)
if isinstance(predict_prob, list):
predict_prob = np.array(predict_prob)
cls_scores = utils.score_results(y_test, predicted, predict_prob)
label_scores = utils.label_scores(y_test, predicted, predict_prob)
score_per_class.append(cls_scores)
score_per_label.append(label_scores)
return (score_per_class,score_per_label)